diff --git a/.azure-pipelines/get_dut_version.py b/.azure-pipelines/get_dut_version.py new file mode 100755 index 00000000000..205cec3e9e1 --- /dev/null +++ b/.azure-pipelines/get_dut_version.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python2 + +import argparse +import logging +import os +import sys +import json +import yaml + +_self_dir = os.path.dirname(os.path.abspath(__file__)) +base_path = os.path.realpath(os.path.join(_self_dir, "..")) +if base_path not in sys.path: + sys.path.append(base_path) +ansible_path = os.path.realpath(os.path.join(_self_dir, "../ansible")) +if ansible_path not in sys.path: + sys.path.append(ansible_path) + +from devutil.devices.factory import init_localhost, init_testbed_sonichosts # noqa E402 + +logger = logging.getLogger(__name__) + +RC_INIT_FAILED = 1 +RC_GET_DUT_VERSION_FAILED = 2 + +ASIC_NAME_PATH = '../ansible/group_vars/sonic/variables' + + +def read_asic_name(hwsku): + asic_name_file = os.path.join(os.path.dirname(__file__), ASIC_NAME_PATH) + try: + with open(asic_name_file) as f: + asic_name = yaml.safe_load(f) + + asic_name_dict = {} + for key, value in asic_name.items(): + if "hwskus" in key: + asic_name_dict[key] = value + + for name, hw in asic_name_dict.items(): + if hwsku in hw: + return name.split('_')[1] + + return "unknown" + + except IOError: + return None + + +def get_duts_version(sonichosts, output=None): + try: + ret = {} + duts_version = sonichosts.command("show version") + for dut, version in duts_version.items(): + ret[dut] = {} + dut_version = version["stdout_lines"] + + for line in dut_version: + if ":" in line: + line_splitted = line.split(":", 1) + key = line_splitted[0].strip() + value = line_splitted[1].strip() + if key == "Docker images": + ret[dut]["Docker images"] = [] + continue + elif key == "ASIC": + ret[dut]["ASIC TYPE"] = value + continue + elif key == "HwSKU": + ret[dut]["ASIC"] = read_asic_name(value) + ret[dut][key] = value + elif "docker" in line: + line_splitted = line.split() + ret[dut]["Docker images"].append({"REPOSITORY": line_splitted[0], + "TAG": line_splitted[1], + "IMAGE ID": line_splitted[2], + "SIZE": line_splitted[3]}) + + if output: + with open(output, "w") as f: + f.write(json.dumps(ret)) + f.close() + else: + print(ret) + except Exception as e: + logger.error("Failed to get DUT version: {}".format(e)) + sys.exit(RC_GET_DUT_VERSION_FAILED) + + +def validate_args(args): + _log_level_map = { + "debug": logging.DEBUG, + "info": logging.INFO, + "warning": logging.WARNING, + "error": logging.ERROR, + "critical": logging.CRITICAL + } + logging.basicConfig( + stream=sys.stdout, + level=_log_level_map[args.log_level], + format="%(asctime)s %(filename)s#%(lineno)d %(levelname)s - %(message)s" + ) + + +def main(args): + logger.info("Validating arguments") + validate_args(args) + + logger.info("Initializing hosts") + localhost = init_localhost(args.inventory, options={"verbosity": args.verbosity}) + sonichosts = init_testbed_sonichosts( + args.inventory, args.testbed_name, testbed_file=args.tbfile, options={"verbosity": args.verbosity} + ) + + if not localhost or not sonichosts: + sys.exit(RC_INIT_FAILED) + + get_duts_version(sonichosts, args.output) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Tool for getting sonic device version.") + + parser.add_argument( + "-i", "--inventory", + dest="inventory", + nargs="+", + help="Ansible inventory file") + + parser.add_argument( + "-t", "--testbed-name", + type=str, + required=True, + dest="testbed_name", + help="Testbed name." + ) + + parser.add_argument( + "--tbfile", + type=str, + dest="tbfile", + default="testbed.yaml", + help="Testbed definition file." + ) + + parser.add_argument( + "-v", "--verbosity", + type=int, + dest="verbosity", + default=2, + help="Log verbosity (0-3)." + ) + + parser.add_argument( + "--log-level", + type=str, + dest="log_level", + choices=["debug", "info", "warning", "error", "critical"], + default="debug", + help="Loglevel" + ) + + parser.add_argument( + "-o", "--output", + type=str, + dest="output", + required=False, + help="Output duts version to the specified file." + ) + + args = parser.parse_args() + main(args) diff --git a/.azure-pipelines/pr_test_scripts.yaml b/.azure-pipelines/pr_test_scripts.yaml new file mode 100644 index 00000000000..3f9f33225f4 --- /dev/null +++ b/.azure-pipelines/pr_test_scripts.yaml @@ -0,0 +1,106 @@ +t0: + - arp/test_arp_extended.py + - arp/test_neighbor_mac.py + - arp/test_neighbor_mac_noptf.py + - bgp/test_bgp_fact.py + - bgp/test_bgp_gr_helper.py::test_bgp_gr_helper_routes_perserved + - bgp/test_bgp_speaker.py + - bgp/test_bgp_update_timer.py + - bgp/test_bgpmon.py + - cacl/test_cacl_application.py + - cacl/test_cacl_function.py + - cacl/test_ebtables_application.py + - container_checker/test_container_checker.py + - dhcp_relay/test_dhcp_relay.py + - dhcp_relay/test_dhcpv6_relay.py + - generic_config_updater/test_aaa.py + - generic_config_updater/test_bgpl.py + - generic_config_updater/test_cacl.py + - generic_config_updater/test_dhcp_relay.py + - generic_config_updater/test_ipv6.py + - generic_config_updater/test_lo_interface.py + - generic_config_updater/test_portchannel_interface.py + - generic_config_updater/test_syslog.py + - generic_config_updater/test_vlan_interface.py + - iface_namingmode/test_iface_namingmode.py + - lldp/test_lldp.py + - monit/test_monit_status.py + - ntp/test_ntp.py + - pc/test_po_cleanup.py + - pc/test_po_update.py + - platform_tests/test_advanced_reboot.py::test_warm_reboot + - platform_tests/test_cpu_memory_usage.py + - process_monitoring/test_critical_process_monitoring.py + - route/test_default_route.py + - route/test_static_route.py + - show_techsupport/test_techsupport_no_secret.py + - snmp/test_snmp_cpu.py + - snmp/test_snmp_default_route.py + - snmp/test_snmp_interfaces.py + - snmp/test_snmp_lldp.py + - snmp/test_snmp_loopback.py + - snmp/test_snmp_pfc_counters.py + - snmp/test_snmp_queue.py + - ssh/test_ssh_ciphers.py + - syslog/test_syslog.py + - system_health/test_system_status.py + - tacacs/test_accounting.py + - tacacs/test_authorization.py + - tacacs/test_jit_user.py + - tacacs/test_ro_disk.py + - tacacs/test_ro_user.py + - tacacs/test_rw_user.py + - telemetry/test_telemetry.py + - test_features.py + - test_interfaces.py + - test_procdockerstatsd.py + + +t0-2vlans: + - dhcp_relay/test_dhcp_relay.py + - dhcp_relay/test_dhcpv6_relay.py + +t0-sonic: + - bgp/test_bgp_fact.py + +t1-lag: + - bgp/test_bgp_allow_list.py + - bgp/test_bgp_bbr.py + - bgp/test_bgp_bounce.py + - bgp/test_bgp_fact.py + - bgp/test_bgp_multipath_relax.py + - bgp/test_bgp_update_timer.py + - bgp/test_bgpmon.py + - bgp/test_traffic_shift.py + - configlet/test_add_rack.py + - container_checker/test_container_checker.py + - http/test_http_copy.py + - ipfwd/test_mtu.py + - lldp/test_lldp.py + - monit/test_monit_status.py + - pc/test_lag_2.py + - platform_tests/test_cpu_memory_usage.py + - process_monitoring/test_critical_process_monitoring.py + - route/test_default_route.py + - scp/test_scp_copy.py + - test_interfaces.py + +multi-asic-t1-lag: + - bgp/test_bgp_fact.py + - snmp/test_snmp_default_route.py + - snmp/test_snmp_loopback.py + - snmp/test_snmp_pfc_counters.py + - snmp/test_snmp_queue.py + - tacacs/test_accounting.py + - tacacs/test_authorization.py + - tacacs/test_jit_user.py + - tacacs/test_ro_disk.py + - tacacs/test_ro_user.py + - tacacs/test_rw_user.py + +t2: + - test_vs_chassis_setup.py + - voq/test_voq_init.py + +dualtor: + - arp/test_arp_extended.py diff --git a/.azure-pipelines/pytest-collect-only.yml b/.azure-pipelines/pytest-collect-only.yml new file mode 100644 index 00000000000..b438f940085 --- /dev/null +++ b/.azure-pipelines/pytest-collect-only.yml @@ -0,0 +1,41 @@ +steps: + +- script: | + sudo apt-get update + sudo apt-get install \ + ca-certificates \ + curl \ + gnupg \ + lsb-release -y + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor \ + -o /usr/share/keyrings/docker-archive-keyring.gpg + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] \ + https://download.docker.com/linux/ubuntu \ + $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt-get update + sudo apt-get install docker-ce docker-ce-cli containerd.io -y + displayName: 'Install Docker' + +- checkout: self + clean: true + displayName: 'Checkout sonic-mgmt repo' + +- script: | + set -x + sudo docker pull sonicdev-microsoft.azurecr.io:443/docker-sonic-mgmt:latest + sudo docker run -dt --name sonic-mgmt-collect \ + -v $(System.DefaultWorkingDirectory):/var/src/sonic-mgmt \ + sonicdev-microsoft.azurecr.io:443/docker-sonic-mgmt:latest \ + /bin/bash + displayName: 'Prepare sonic-mgmt docker container' + +- script: | + set -x + sudo docker exec -t -w /var/src/sonic-mgmt/tests sonic-mgmt-collect \ + python2 -m pytest --inventory ../ansible/veos_vtb --host-pattern all \ + --testbed_file vtestbed.yaml --testbed vms-kvm-t0 \ + --ignore saitests --ignore ptftests --ignore acstests \ + --ignore scripts --ignore k8s --ignore sai_qualify --ignore common \ + --ignore-conditional-mark \ + --color=no --collect-only --continue-on-collection-errors + displayName: 'Run pytest --collect-only' diff --git a/.azure-pipelines/run-test-scheduler-template.yml b/.azure-pipelines/run-test-scheduler-template.yml new file mode 100644 index 00000000000..26c4e7e49f9 --- /dev/null +++ b/.azure-pipelines/run-test-scheduler-template.yml @@ -0,0 +1,207 @@ +parameters: +- name: TOPOLOGY + type: string + +- name: POLL_INTERVAL + type: number + default: 10 + +- name: POLL_TIMEOUT + type: number + default: 36000 + +- name: MIN_WORKER + type: string + default: 1 + +- name: MAX_WORKER + type: string + default: 1 + +- name: NUM_ASIC + type: number + default: 1 + +- name: TEST_SET + type: string + default: "" + +- name: DEPLOY_MG_EXTRA_PARAMS + type: string + default: "" + +- name: COMMON_EXTRA_PARAMS + type: string + default: "" + +- name: VM_TYPE + type: string + default: "ceos" + +- name: TESTBED_NAME + type: string + default: "" + +- name: IMAGE_URL + type: string + default: "" + +- name: HWSKU + type: string + default: "" + +- name: TEST_PLAN_TYPE + type: string + default: "" + +- name: PLATFORM + type: string + default: "" + +- name: SCRIPTS + type: string + default: "" + +- name: FEATURES + type: string + default: "" + +- name: SCRIPTS_EXCLUDE + type: string + default: "" + +- name: FEATURES_EXCLUDE + type: string + default: "" + +- name: REPO_NAME + type: string + default: "" + +- name: MGMT_BRANCH + type: string + default: "" + +- name: STOP_ON_FAILURE + type: string + default: "" + +- name: RETRY_TIMES + type: string + default: "" + +- name: DUMP_KVM_IF_FAIL + type: string + default: "True" + values: + - "True" + - "False" + +- name: REQUESTER + type: string + default: "" + +- name: MAX_RUN_TEST_MINUTES + type: number + default: 480 + +- name: KVM_IMAGE_BRANCH + type: string + default: "" + + +steps: + - script: | + set -e + # always use the test plan script from master branch. + wget -O ./.azure-pipelines/test_plan.py https://raw.githubusercontent.com/sonic-net/sonic-mgmt/master/.azure-pipelines/test_plan.py + displayName: Download test plan scripts + + - script: | + set -e + pip install PyYAML + rm -f new_test_plan_id.txt + + python ./.azure-pipelines/test_plan.py create \ + -t ${{ parameters.TOPOLOGY }} \ + -o new_test_plan_id.txt \ + --min-worker ${{ parameters.MIN_WORKER }} \ + --max-worker ${{ parameters.MAX_WORKER }} \ + --test-set ${{ parameters.TEST_SET }} \ + --kvm-image-branch "${{ parameters.KVM_IMAGE_BRANCH }}" \ + --kvm-build-id $(KVM_BUILD_ID) \ + --deploy-mg-extra-params "${{ parameters.DEPLOY_MG_EXTRA_PARAMS }}" \ + --mgmt-branch ${{ parameters.MGMT_BRANCH }} \ + --common-extra-params "${{ parameters.COMMON_EXTRA_PARAMS }}" + + TEST_PLAN_ID=`cat new_test_plan_id.txt` + + echo "Created test plan $TEST_PLAN_ID" + echo "Check $(FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID for test plan status" + echo "##vso[task.setvariable variable=TEST_PLAN_ID]$TEST_PLAN_ID" + env: + TESTBED_TOOLS_URL: $(TESTBED_TOOLS_URL) + TENANT_ID: $(TESTBED_TOOLS_MSAL_TENANT_ID) + CLIENT_ID: $(TESTBED_TOOLS_MSAL_CLIENT_ID) + CLIENT_SECRET: $(TESTBED_TOOLS_MSAL_CLIENT_SECRET) + displayName: Trigger test + + - script: | + set -e + echo "Lock testbed" + echo "SONiC PR system-level test is powered by SONiC Elastictest, for any issue, please send email to sonicelastictest@microsoft.com" + echo "Runtime detailed progress at $(FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID " + # When "LOCK_TESTBED" finish, it changes into "PREPARE_TESTBED" + python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --expected-state LOCK_TESTBED + env: + TESTBED_TOOLS_URL: $(TESTBED_TOOLS_URL) + displayName: Lock testbed + + - script: | + set -e + echo "Prepare testbed" + echo "Preparing the testbed(add-topo, deploy-mg) may take 15-30 minutes. Before the testbed is ready, the progress of the test plan keeps displayed as 0, please be patient" + echo "SONiC PR system-level test is powered by SONiC Elastictest, for any issue, please send email to sonicelastictest@microsoft.com" + echo "Runtime detailed progress at $(FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID " + # When "PREPARE_TESTBED" finish, it changes into "EXECUTING" + python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --expected-state PREPARE_TESTBED + env: + TESTBED_TOOLS_URL: $(TESTBED_TOOLS_URL) + displayName: Prepare testbed + + - script: | + set -e + echo "Run test" + echo "SONiC PR system-level test is powered by SONiC Elastictest, for any issue, please send email to sonicelastictest@microsoft.com" + echo "Runtime detailed progress at $(FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID " + # When "EXECUTING" finish, it changes into "KVMDUMP", "FAILED", "CANCELLED" or "FINISHED" + python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --expected-state EXECUTING + env: + TESTBED_TOOLS_URL: $(TESTBED_TOOLS_URL) + displayName: Run test + timeoutInMinutes: ${{ parameters.MAX_RUN_TEST_MINUTES }} + + - ${{ if eq(parameters.DUMP_KVM_IF_FAIL, 'True') }}: + - script: | + set -e + echo "KVM dump" + echo "SONiC PR system-level test is powered by SONiC Elastictest, for any issue, please send email to sonicelastictest@microsoft.com" + echo "Runtime detailed progress at $(FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID " + # When "KVMDUMP" finish, it changes into "FAILED", "CANCELLED" or "FINISHED" + python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --expected-state KVMDUMP + condition: succeededOrFailed() + env: + TESTBED_TOOLS_URL: $(TESTBED_TOOLS_URL) + displayName: KVM dump + + - script: | + set -e + echo "Try to cancel test plan $TEST_PLAN_ID, cancelling finished test plan has no effect." + python ./.azure-pipelines/test_plan.py cancel -i "$(TEST_PLAN_ID)" + condition: always() + env: + TESTBED_TOOLS_URL: $(TESTBED_TOOLS_URL) + TENANT_ID: $(TESTBED_TOOLS_MSAL_TENANT_ID) + CLIENT_ID: $(TESTBED_TOOLS_MSAL_CLIENT_ID) + CLIENT_SECRET: $(TESTBED_TOOLS_MSAL_CLIENT_SECRET) + displayName: Finalize running test plan diff --git a/.azure-pipelines/run-test-template.yml b/.azure-pipelines/run-test-template.yml index ae0de8405dd..bcf11bbb413 100644 --- a/.azure-pipelines/run-test-template.yml +++ b/.azure-pipelines/run-test-template.yml @@ -55,8 +55,12 @@ steps: pwd rm -rf $(Build.ArtifactStagingDirectory)/* + parent_dir=$(basename $PWD) - docker exec sonic-mgmt-2 bash -c "/var/src/$parent_dir/tests/kvmtest.sh -en -T ${{ parameters.tbtype }} -d /var/src/$parent_dir ${{ parameters.tbname }} ${{ parameters.dut }} ${{ parameters.section }}" + docker exec -e GIT_USER_NAME=$GIT_USER_NAME -e GIT_API_TOKEN=$GIT_API_TOKEN sonic-mgmt-2 bash -c "/var/src/$parent_dir/tests/kvmtest.sh -en -T ${{ parameters.tbtype }} -d /var/src/$parent_dir ${{ parameters.tbname }} ${{ parameters.dut }} ${{ parameters.section }}" + env: + GIT_USER_NAME: $(GIT_USER_NAME) + GIT_API_TOKEN: $(GIT_API_TOKEN) displayName: "Run tests" - script: | diff --git a/.azure-pipelines/upgrade_image.py b/.azure-pipelines/upgrade_image.py new file mode 100755 index 00000000000..819b7eaef64 --- /dev/null +++ b/.azure-pipelines/upgrade_image.py @@ -0,0 +1,359 @@ +#!/usr/bin/env python2 + +"""Script for upgrading SONiC image for nightly tests. + +Main purpose of this script is to upgrade SONiC image for nightly tests. Based on the arguments passed in, the script +may power cycle the devices before upgrade. Or only power cycle the devices only when they are unreachable. + +Before upgrade to the target image, this script may upgrade to a previous image firstly. This is to avoid that the +devices are already running the target image. Then image upgrading could be skipped. The problem is that the current +image may has been updated by people for debugging purpose. Upgrade to a previous image firstly can ensure that the +target image is clean. +""" +import argparse +import logging +import os +import requests +import sys + +from setuptools import distutils + +_self_dir = os.path.dirname(os.path.abspath(__file__)) +base_path = os.path.realpath(os.path.join(_self_dir, "..")) +if base_path not in sys.path: + sys.path.append(base_path) +ansible_path = os.path.realpath(os.path.join(_self_dir, "../ansible")) +if ansible_path not in sys.path: + sys.path.append(ansible_path) + + +from devutil.devices.factory import init_localhost, init_testbed_sonichosts # noqa E402 +from devutil.devices.sonic import upgrade_image # noqa E402 + +from tests.common.plugins.pdu_controller.pdu_manager import pdu_manager_factory # noqa E402 + +logger = logging.getLogger(__name__) + + +RC_INIT_FAILED = 1 +RC_UPGRADE_PREV_FAILED = 2 +RC_UPGRADE_FAILED = 3 +RC_ENABLE_FIPS_FAILED = 4 +RC_SET_DOCKER_FOLDER_SIZE_FAILED = 5 +RC_SHUTDOWN_FAILED = 6 + + +def validate_args(args): + _log_level_map = { + "debug": logging.DEBUG, + "info": logging.INFO, + "warning": logging.WARNING, + "error": logging.ERROR, + "critical": logging.CRITICAL + } + logging.basicConfig( + stream=sys.stdout, + level=_log_level_map[args.log_level], + format="%(asctime)s %(filename)s#%(lineno)d %(levelname)s - %(message)s" + ) + + args.skip_prev_image = False + if not args.prev_image_url: + args.prev_image_url = "{}.PREV.1".format(args.image_url) + logger.info("PREV_IMAGE_URL={}".format(args.prev_image_url)) + + try: + res_prev_image = requests.head(args.prev_image_url, timeout=20) + if res_prev_image.status_code != 200: + logger.info("Not able to get prev_image at {}, skip upgrading to prev_image.".format(args.prev_image_url)) + args.skip_prev_image = True + except Exception as e: + logger.info( + "Downloading prev image {} failed with {}, skip upgrading to prev image".format( + args.prev_image_url, repr(e) + ) + ) + args.skip_prev_image = True + + +def get_pdu_managers(sonichosts, conn_graph_facts): + """Get PDU managers for all the devices to be upgraded. + + Args: + sonichosts (SonicHosts): Instance of class SonicHosts + conn_graph_facts (dict): Connection graph dict. + + Returns: + dict: A dict of PDU managers. Key is device hostname. Value is the PDU manager object for the device. + """ + pdu_managers = {} + for hostname in sonichosts.hostnames: + pdu_links = conn_graph_facts["device_pdu_links"][hostname] + pdu_hostnames = [peer_info["peerdevice"] for peer_info in pdu_links.values()] + pdu_vars = {} + for pdu_hostname in pdu_hostnames: + pdu_vars[pdu_hostname] = sonichosts.get_host_visible_vars(pdu_hostname) + + pdu_managers[hostname] = pdu_manager_factory(hostname, None, conn_graph_facts, pdu_vars) + return pdu_managers + + +def main(args): + logger.info("Validating arguments") + validate_args(args) + + logger.info("Initializing hosts") + localhost = init_localhost(args.inventory, options={"verbosity": args.verbosity}) + sonichosts = init_testbed_sonichosts( + args.inventory, args.testbed_name, testbed_file=args.tbfile, options={"verbosity": args.verbosity} + ) + + if not localhost or not sonichosts: + sys.exit(RC_INIT_FAILED) + + conn_graph_facts = localhost.conn_graph_facts( + hosts=sonichosts.hostnames, + filepath=os.path.join(ansible_path, "files") + )["ansible_facts"] + + if args.always_power_cycle or args.power_cycle_unreachable: + pdu_managers = get_pdu_managers(sonichosts, conn_graph_facts) + + # Power cycle before upgrade + if args.always_power_cycle: + logger.info("Power cycle before upgrade") + for hostname, pdu_manager in pdu_managers.items(): + logger.info("Turn off power outlets to {}".format(hostname)) + pdu_manager.turn_off_outlet() + localhost.pause(seconds=30, prompt="Pause between power off/on") + for hostname, pdu_manager in pdu_managers.items(): + logger.info("Turn on power outlets to {}".format(hostname)) + pdu_manager.turn_on_outlet() + localhost.pause(seconds=180, prompt="Add some sleep to allow power cycled DUTs to come back") + + # Power cycle when unreachable + elif args.power_cycle_unreachable: + logger.info("Power cycle unreachable") + ping_results = {} + needs_sleep = False + for hostname, ip in zip(sonichosts.hostnames, sonichosts.ips): + logger.info("Ping {} @{} from localhost".format(hostname, ip)) + ping_failed = localhost.command( + "timeout 2 ping {} -c 1".format(ip), module_ignore_errors=True + ).get("localhost", {}).get("failed") + if ping_failed: + logger.info("Ping {} @{} from localhost failed. Going to power off it".format(hostname, ip)) + ping_results[hostname] = ping_failed + pdu_managers[hostname].turn_off_outlet() + needs_sleep = True + + if needs_sleep: + localhost.pause(seconds=30, prompt="Pause between power off/on") + + for hostname, ping_failed in ping_results.items(): + if ping_failed: + logger.info("Power on {}".format(hostname)) + pdu_managers[hostname].turn_on_outlet() + + if needs_sleep: + localhost.pause(seconds=180, prompt="Add some sleep to allow power cycled DUTs to come back") + + # Upgrade to prev image + if not args.skip_prev_image: + logger.info("upgrade to prev image at {}".format(args.prev_image_url)) + upgrade_success = upgrade_image( + sonichosts, + localhost, + args.prev_image_url, + upgrade_type=args.upgrade_type, + onie_pause_time=args.onie_pause_time + ) + + if not upgrade_success: + logger.error("Upgrade prev_image {} failed".format(args.prev_image_url)) + sys.exit(RC_UPGRADE_PREV_FAILED) + else: + logger.info("Upgraded to prev_image {}.".format(args.prev_image_url)) + + for hostname, version in sonichosts.sonic_version.items(): + logger.info("SONiC host {} current version {}".format(hostname, version.get("build_version"))) + + # Upgrade to target image + logger.info("upgrade to target image at {}".format(args.image_url)) + upgrade_success = upgrade_image( + sonichosts, + localhost, + args.image_url, + upgrade_type=args.upgrade_type, + onie_pause_time=args.onie_pause_time + ) + if not upgrade_success: + logger.error("Upgrade image {} failed".format(args.image_url)) + sys.exit(RC_UPGRADE_FAILED) + else: + logger.info("Upgraded to image {}".format(args.prev_image_url)) + + current_build_version = None + for hostname, version in sonichosts.sonic_version.items(): + logger.info("SONiC host {} current version {}".format(hostname, version.get("build_version"))) + if not current_build_version: + current_build_version = version.get("build_version") + + # Enable FIPS + need_shutdown = False + if args.enable_fips: + logger.info("Need to enable FIPS") + try: + sonichosts.command("sonic-installer set-fips", module_attrs={"become": True}) + need_shutdown = True + except Exception as e: + logger.error("Failed to enable FIPS mode: {}".repr(e)) + sys.exit(RC_ENABLE_FIPS_FAILED) + + # Set docker folder size, required for platforms with small disk + if args.docker_folder_size: + logger.info("Need to set docker folder size to '{}'".format(args.docker_folder_size)) + try: + sonichosts.lineinfile( + line="docker_inram_size={}".format(args.docker_folder_size), + path="/host/image-{}/kernel-cmdline-append".format(current_build_version), + state="present", + create=True, + module_attrs={"become": True} + ) + need_shutdown = True + except Exception as e: + logger.error("Failed to set docker folder size: {}".repr(e)) + sys.exit(RC_SET_DOCKER_FOLDER_SIZE_FAILED) + else: + logger.info("Use default docker folder size") + + # Force reboot the device to apply changes + if need_shutdown: + logger.info("Need to shutdown") + try: + sonichosts.command("shutdown -r now", module_attrs={"become": True, "async": 300, "poll": 0}) + except Exception as e: + logger.error("Failed to shutdown: {}".repr(e)) + sys.exit(RC_SHUTDOWN_FAILED) + + localhost.pause(seconds=180, prompt="Pause after reboot") + logger.info("===== UPGRADE IMAGE DONE =====") + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Tool for SONiC image upgrade during nightly tests.") + + parser.add_argument( + "-i", "--inventory", + nargs="+", + dest="inventory", + help="Ansible inventory file") + + parser.add_argument( + "-t", "--testbed-name", + type=str, + required=True, + dest="testbed_name", + help="Testbed name. DUTs of the specified testbed will be upgraded." + ) + + parser.add_argument( + "-u", "--url", + type=str, + dest="image_url", + required=True, + help="SONiC image url." + ) + + parser.add_argument( + "--prev-url", + type=str, + dest="prev_image_url", + default=None, + help="SONiC image url." + ) + + parser.add_argument( + "--tbfile", + type=str, + dest="tbfile", + default="testbed.yaml", + help="Testbed definition file." + ) + + parser.add_argument( + "--always-power-cycle", + type=distutils.util.strtobool, + dest="always_power_cycle", + default=0, + help="Always power cycle DUTs before upgrade." + ) + + parser.add_argument( + "--power-cycle-unreachable", + type=distutils.util.strtobool, + dest="power_cycle_unreachable", + default=1, + help="Only power cycle unreachable DUTs." + ) + + parser.add_argument( + "--onie-pause-time", + type=int, + dest="onie_pause_time", + default=30, + help="Seconds to pause after booted into onie." + ) + + parser.add_argument( + "-y", "--type", + type=str, + choices=["sonic", "onie"], + dest="upgrade_type", + required=False, + default="sonic", + help="Upgrade type." + ) + + parser.add_argument( + "--enable-fips", + type=distutils.util.strtobool, + dest="enable_fips", + required=False, + default=0, + help="Enable FIPS." + ) + + parser.add_argument( + "--docker-folder-size", + type=str, + dest="docker_folder_size", + required=False, + default="", + help="Docker folder size. Required for devices with small SSD." + "If set to 0, docker folder size will not be updated. Example: '3000M'" + ) + + parser.add_argument( + "-v", "--verbosity", + type=int, + dest="verbosity", + default=2, + help="Log verbosity (0-3)." + ) + + parser.add_argument( + "--log-level", + type=str, + dest="log_level", + choices=["debug", "info", "warning", "error", "critical"], + default="debug", + help="Loglevel" + ) + + args = parser.parse_args() + main(args) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 59872e64456..0b9afe492c8 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -7,7 +7,7 @@ # These owners will be the default owners for everything in the repo. # Unless a later match takes precedence, # @Azure/sonic-fundamentals # will be requested for review when someone opens a pull request. -* @Azure/sonic-fundamentals +# @Azure/sonic-fundamentals # Ansible @@ -29,10 +29,18 @@ tests/platform_tests/test_auto_negotiation.py @sujinmkang @Junchao-Mellanox tests/platform_tests/thermal_control_test_helper.py @sujinmkang @Junchao-Mellanox tests/system_health @sujinmkang @Junchao-Mellanox tests/show_techsupport @yxieca @noaOrMlnx +tests/qos @XuChen-MSFT @wsycqyz tests/qos/test_buffer.py @neethajohn @stephenxs tests/qos/test_buffer_traditional.py @neethajohn @stephenxs tests/qos/files/dynamic_buffer_param.json @neethajohn @stephenxs tests/qos/files/mellanox @stephenxs @keboliu tests/qos/args @neethajohn @stephenxs tests/common/mellanox_data.py @keboliu @Junchao-Mellanox - +tests/dhcp_relay @kellyyeh +tests/bgp @StormLiangMS +tests/pfcwd @lipxu @neethajohn +tests/lldp @ZhaohuiS +tests/cacl @ZhaohuiS +tests/dualtor @lolyu @wsycqyz +tests/dualtor_io @lolyu @wsycqyz +tests/dualtor_mgmt @lolyu @wsycqyz diff --git a/.github/codeql/codeql-config.yml b/.github/codeql/codeql-config.yml new file mode 100644 index 00000000000..bb10d01dde8 --- /dev/null +++ b/.github/codeql/codeql-config.yml @@ -0,0 +1,7 @@ +name: "CodeQL config" +queries: + - uses: security-and-quality + - uses: security-extended +query-filters: + - exclude: + id: py/polynomial-redos diff --git a/.github/workflows/automerge_scan.yml b/.github/workflows/automerge_scan.yml new file mode 100644 index 00000000000..d02297e50d9 --- /dev/null +++ b/.github/workflows/automerge_scan.yml @@ -0,0 +1,73 @@ +name: AutoMergeScan +on: + schedule: + - cron: '31 */2 * * *' + workflow_dispatch: + +jobs: + automerge_scan: + if: github.repository_owner == 'sonic-net' + runs-on: ubuntu-latest + steps: + - name: Debug + env: + TOKEN: ${{ secrets.TOKEN }} + run: | + set -e + + echo ${TOKEN} | gh auth login --with-token + gh pr list -R sonic-net/sonic-mgmt -A mssonicbld --json additions,assignees,author,baseRefName,body,changedFiles,closed,closedAt,comments,commits,createdAt,deletions,files,headRefName,headRepository,headRepositoryOwner,id,isCrossRepository,isDraft,labels,latestReviews,maintainerCanModify,mergeCommit,mergeStateStatus,mergeable,mergedAt,mergedBy,milestone,number,potentialMergeCommit,projectCards,reactionGroups,reviewDecision,reviewRequests,reviews,state,statusCheckRollup,title,updatedAt,url > prs.log + cat prs.log | jq + - name: Main + run: | + set -e + + count=$(cat prs.log | jq 'length') + for ((i=0;i<$count;i++)) + do + url=$(cat prs.log | jq -r ".[$i].url") + created_at=$(cat prs.log | jq -r ".[$i].createdAt") + echo PR: $(($i+1))/$count, URL: $url, createdAt: $created_at, now: $(date -u +"%FT%TZ") + [[ "$url" == "" ]] && continue + [[ $created_at > $(date --date "1 hour ago" -u +"%FT%TZ") ]] && continue + # only check automerge PR. + cat prs.log | jq -r ".[$i].labels[].name" | grep automerge || continue + + checks=$(cat prs.log | jq ".[$i].statusCheckRollup") + checks_count=$(echo $checks | jq 'length') + echo Checks count: $checks_count + for ((j=0;j<$checks_count;j++)) + do + check=$(echo $checks | jq ".[$j]") + state=$(echo $check | jq -r '.state') + conclusion=$(echo $check | jq -r '.conclusion') + name=$(echo $check | jq -r '.name') + + # EasyCLA success flag: state=SUCCESS + # Others success flag: conclusion in SUCCESS,NEUTRAL + # Ignore Azure.sonic-mgmt stage check result. It may be set continueOnError + echo "$name" | grep "Azure.sonic-mgmt (" && continue + # rerun Azure.sonic-mgmt per day + if [[ "$name" == "Azure.sonic-mgmt" ]] && [[ "$conclusion" == "FAILURE" ]];then + completedAt=$(echo $check | jq -r '.completedAt') + [[ "$completedAt" < $(date --date "2 hour ago" -u +"%FT%TZ") ]] && [[ $(date -u +"%T") < "02:00:00" ]] && gh pr comment $url --body "/azp run Azure.sonic-mgmt" + fi + # Ignore Semgrep, it has issues. + [[ "$name" == "Semgrep" ]] && continue + if [[ "$state" == "SUCCESS" ]];then + # check pass + continue + elif [[ "$conclusion" == "SUCCESS" ]] || [[ "$conclusion" == "NEUTRAL" ]];then + # check pass + continue + else + echo "$url Check failed!!!" + echo $check | jq + continue 2 + fi + done + # merge the PR + echo ========Merging PR======== + gh pr merge --rebase --admin -R sonic-net/sonic-mgmt $url || true + echo ========Finished PR======== + done diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 00000000000..86f17931383 --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,44 @@ +# For more infomation, please visit: https://github.com/github/codeql-action + +name: "CodeQL" + +on: + push: + branches: + - 'master' + - '202[0-9][0-9][0-9]' + pull_request_target: + branches: + - 'master' + - '202[0-9][0-9][0-9]' + +jobs: + analyze: + if: github.repository_owner == 'sonic-net' + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'python' ] + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + config-file: ./.github/codeql/codeql-config.yml + languages: ${{ matrix.language }} + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v2 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/pr_cherrypick_poststep.yml b/.github/workflows/pr_cherrypick_poststep.yml new file mode 100644 index 00000000000..d51776e8fcc --- /dev/null +++ b/.github/workflows/pr_cherrypick_poststep.yml @@ -0,0 +1,49 @@ +name: PostCherryPick +on: + pull_request_target: + types: + - closed + branches: + - '20*' + +jobs: + post_cherry_pick: + if: github.repository_owner == 'sonic-net' && github.event.pull_request.merged == true && contains(github.event.pull_request.labels.*.name, 'automerge') && github.event.pull_request.head.user.login == 'mssonicbld' && startsWith(github.event.pull_request.title, '[action]') + runs-on: ubuntu-latest + steps: + - name: Debug + env: + GITHUB_CONTEXT: ${{ toJson(github) }} + run: echo $GITHUB_CONTEXT | jq + - name: Checkout + uses: actions/checkout@v3 + with: + persist-credentials: false + - name: Main + env: + GITHUB_CONTEXT: ${{ toJson(github) }} + TOKEN: ${{ secrets.TOKEN }} + run: | + set -e + pr_url=$(echo $GITHUB_CONTEXT | jq -r ".event.pull_request._links.html.href") + pr_id=$(echo $GITHUB_CONTEXT | jq -r ".event.number") + base_ref=$(echo $GITHUB_CONTEXT | jq -r ".base_ref") + echo ${TOKEN} | gh auth login --with-token + title=$(echo $GITHUB_CONTEXT | jq -r ".event.pull_request.title") + origin_pr_id=$(echo $title | grep -Eo "\[action\] \[PR:[0-9]*\]" | grep -Eo "[0-9]*") + origin_pr_url=$(echo $pr_url | sed "s/$pr_id/$origin_pr_id/") + echo ============================= + echo pr_url: $pr_url + echo pr_id: $pr_id + echo base_ref: $base_ref + echo title: $title + echo origin_pr_id: $origin_pr_id + echo origin_pr_url: $origin_pr_url + echo ============================= + # Add label + if [[ "$origin_pr_id" == "" ]];then + echo "original PR didn't found." + exit 1 + fi + gh pr edit $origin_pr_url --add-label "Included in ${base_ref} branch" + gh pr edit $origin_pr_url --remove-label "Created PR to ${base_ref} branch" diff --git a/.github/workflows/pr_cherrypick_prestep.yml b/.github/workflows/pr_cherrypick_prestep.yml new file mode 100644 index 00000000000..2f5be6035c4 --- /dev/null +++ b/.github/workflows/pr_cherrypick_prestep.yml @@ -0,0 +1,138 @@ +name: PreCherryPick +on: + pull_request_target: + types: + - labeled + - closed + branches: + - master + +jobs: + pre_cherry_pick: + if: github.repository_owner == 'sonic-net' && github.event.pull_request.merged == true && ( (github.event.action == 'closed' && contains(join(github.event.pull_request.labels.*.name, ','), 'Approved for 20')) || (github.event.action == 'labeled' && startsWith(github.event.label.name, 'Approved for 20')) ) + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + persist-credentials: false + - name: Debug + env: + GITHUB_CONTEXT: ${{ toJson(github) }} + run: echo $GITHUB_CONTEXT | jq + - name: Main + env: + GITHUB_CONTEXT: ${{ toJson(github) }} + TOKEN: ${{ secrets.TOKEN }} + run: | + set -e + + sha=$(echo $GITHUB_CONTEXT | jq -r ".event.pull_request.merge_commit_sha") + pr_id=$(echo $GITHUB_CONTEXT | jq -r ".event.number") + pr_url=$(echo $GITHUB_CONTEXT | jq -r ".event.pull_request._links.html.href") + repository=$(echo $GITHUB_CONTEXT | jq -r ".repository") + labels=$(echo $GITHUB_CONTEXT | jq -r ".event.pull_request.labels[].name") + author=$(echo $GITHUB_CONTEXT | jq -r ".event.pull_request.user.login") + branches=$(git branch -a --list 'origin/20????' | awk -F/ '{print$3}' | grep -E "202[0-9]{3}") + if [[ $(echo $GITHUB_CONTEXT | jq -r ".event.action") == "labeled" ]];then + labels=$(echo $GITHUB_CONTEXT | jq -r ".event.label.name") + fi + title=$(echo $GITHUB_CONTEXT | jq -r ".event.pull_request.title") + body=$(echo $GITHUB_CONTEXT | jq -r ".event.pull_request.body") + echo ============================= + echo SHA: $sha + echo PRID: $pr_id + echo pr_url: $pr_url + echo repository: $repository + echo branches: $branches + echo labels: + echo "$labels" + echo ${TOKEN} | gh auth login --with-token + echo author: $author + echo title: $title + echo body: "$body" + echo ============================= + + git config user.name mssonicbld + git config user.email sonicbld@microsoft.com + git config credential.https://github.zerozr99.workers.dev.username mssonicbld + git remote add mssonicbld https://mssonicbld:${TOKEN}@github.com/mssonicbld/sonic-mgmt + git fetch mssonicbld + git remote -vv + + cherry_pick(){ + set -e + local create_pr='' + while read label + do + echo label: $label + if [[ "$label" == "Approved for $branch branch" ]];then + create_pr=1 + fi + if [[ "$label" == "Created PR to $branch branch" ]];then + echo "already has tag: Created PR to $branch branch, return" + return 0 + fi + if [[ "$label" == "Included in $branch branch" ]];then + echo "already has tag: Included in $branch branch, return" + return 0 + fi + if [[ "$label" == "Cherry Pick Conflict_$branch" ]];then + echo "already has tag: Cherry Pick Conflict_$branch, return" + return 0 + fi + done <<< "$labels" + + if [[ "$create_pr" != "1" ]];then + echo "Didn't find 'Approved for $branch branch' tag." + return 0 + fi + # Begin to cherry-pick PR + git cherry-pick --abort 2>/dev/null || true + git clean -xdff 2>/dev/null || true + git reset HEAD --hard || true + git checkout -b $branch --track origin/$branch + git status | grep "working tree clean" + + if ! git cherry-pick $sha;then + echo 'cherry-pick failed.' + git cherry-pick --abort + git status | grep "working tree clean" + # Add label + gh pr edit $pr_url --add-label "Cherry Pick Conflict_$branch" + echo 'Add label "Cherry Pick Conflict_$branch" success' + gh pr comment $pr_url --body "@${author} PR conflicts with $branch branch" + echo 'Add commnet "@${author} PR conflicts with $branch branch"' + else + # Create PR to release branch + git push mssonicbld HEAD:cherry/$branch/${pr_id} -f + result=$(gh pr create -R ${repository} -H mssonicbld:cherry/$branch/${pr_id} -B $branch -t "[action] [PR:$pr_id] $title" -b "$body" 2>&1) + echo $result | grep "already exists" && { echo $result; return 0; } + echo $result | grep github.com || { echo $result; return 1; } + new_pr_rul=$(echo $result | grep github.com) + echo new_pr_rul: $new_pr_rul + + # Add label to old PR + gh pr edit $pr_url --add-label "Created PR to $branch branch" + echo Add label Created PR to $branch branch + # Add comment to old PR + gh pr comment $pr_url --body "Cherry-pick PR to $branch: ${new_pr_rul}" + echo Add comment to old PR + + # Add label to new PR + gh pr edit $new_pr_rul --add-label "automerge" + echo Add label automerge to new PR + # Add comment to new PR + gh pr comment $new_pr_rul --body "Original PR: ${pr_url}" + echo Add comment to new PR + fi + } + + for branch in $branches + do + echo ------------------------------------------- + echo Begin to parse Branch: $branch + cherry_pick + done + diff --git a/ansible/TestbedProcessing.py b/ansible/TestbedProcessing.py old mode 100644 new mode 100755 index 887f4010c8e..9ad12b16e2a --- a/ansible/TestbedProcessing.py +++ b/ansible/TestbedProcessing.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python2 + from shutil import copyfile import yaml import datetime diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg index bc48c9ba73c..35a7eb47fc4 100644 --- a/ansible/ansible.cfg +++ b/ansible/ansible.cfg @@ -14,7 +14,7 @@ inventory = /etc/ansible/hosts library = library:library/ixia module_utils = module_utils -remote_tmp = $HOME/.ansible/tmp +remote_tmp = /tmp/.ansible-$USER pattern = * forks = 5 poll_interval = 15 diff --git a/ansible/collect_show_tech.yml b/ansible/collect_show_tech.yml new file mode 100644 index 00000000000..3c620bf45ea --- /dev/null +++ b/ansible/collect_show_tech.yml @@ -0,0 +1,63 @@ +# This Playbook run `show techsupport` on DUTs of a specific testbed and fetch the result. +# +# Parameters: +# -e testbed_name=vms1-1 - the testbed name specified in testbed.yaml file +# -e output_path - output path for dumped files, default is ./output if not defined +# +# Example Usage: +# ansible-playbook collect_show_tech.yml -i lab -e testbed_name=vms1-1 + +- hosts: sonic + gather_facts: no + tasks: + - name: Check variable testbed_name is defained + fail: msg="testbed_name is not defined" + when: testbed_name is not defined + + - name: Collect DUTs defined in testbed + block: + - name: Set default testbed file + set_fact: + testbed_file: testbed.yaml + when: testbed_file is not defined + + - name: Gather testbed information + test_facts: + testbed_name: "{{ testbed_name }}" + testbed_file: "{{ testbed_file }}" + + - name: Create group for target DUTs + add_host: + name: "{{ item }}" + groups: target_duts + loop: "{{ testbed_facts['duts'] }}" + delegate_to: localhost + run_once: True + +- hosts: target_duts + gather_facts: no + tasks: + - name: Run `show techsupport` on DUT + shell: show techsupport --silent | tail -1 + register: show_tech_result + + - name: Parse the location of dump file + set_fact: + dump_file: "{{ show_tech_result.stdout_lines[0] }}" + + - name: Set default output path + set_fact: + output_path: ./output + when: output_path is not defined + + - name: Fetch show techsupport dump file from DUTs to localhost + fetch: + src: "{{ dump_file }}" + dest: "{{ output_path }}/" + flat: true + + - name: Delete show techsupport dump file on DUTs + file: + path: "{{ dump_file }}" + state: absent + become: true diff --git a/ansible/config_sonic_basedon_testbed.yml b/ansible/config_sonic_basedon_testbed.yml index 3c71e0e95d4..23a3ddfac18 100644 --- a/ansible/config_sonic_basedon_testbed.yml +++ b/ansible/config_sonic_basedon_testbed.yml @@ -65,6 +65,10 @@ set_fact: ptf_image: "{{ testbed_facts['ptf_image_name'] }}" + - name: check if testbed is an ixia testbed + set_fact: + is_ixia_testbed: "{{ true if ptf_image == 'docker-keysight-api-server' else false }}" + - name: set vm set_fact: vm_base: "{% if testbed_facts['vm_base'] != '' %}{{ testbed_facts['vm_base'] }}{% else %}''{% endif %}" @@ -172,6 +176,15 @@ restart_pmon: no when: "'dualtor' in topo" + - name: gather hwsku for LeafRouter that supports dualtor deployment + set_fact: + hwsku_list_dualtor_t1: "['ACS-MSN4600C', 'Arista-7260CX3-C64']" + + - name: enable tunnel_qos_remap for T1 in dualtor deployment + set_fact: + enable_tunnel_qos_remap: true + when: "('leafrouter' == (vm_topo_config['dut_type'] | lower)) and (hwsku in hwsku_list_dualtor_t1) and not (is_ixia_testbed)" + - name: set default vm file path set_fact: vm_file: veos @@ -370,7 +383,7 @@ pause: seconds: 60 become: true - when: proxy_env is defined + when: proxy_env is defined and deploy is defined and deploy|bool == true - block: - name: saved original minigraph file in SONiC DUT(ignore errors when file does not exist) @@ -496,10 +509,51 @@ timeout: 600 changed_when: false + - name: Sync DUT system time with NTP server + block: + - name: Wait for ntp.service restart after reload minigraph + become: true + service: name=ntp state=started + + - name: Stop ntp.service on DUT + become: true + service: name=ntp state=stopped + + - name: Sync DUT system time with NTP server + become: true + command: ntpd -gq + ignore_errors: true + async: 60 + poll: 10 + + - name: Start ntp.service on DUT + become: true + service: name=ntp state=restarted enabled=true + + - name: config static route for trex traffic passthrough + become: true + command: "{{ item }}" + with_items: + - config route add prefix 48.0.0.0/8 nexthop 10.0.0.59 + - config route add prefix 16.0.0.0/8 nexthop 10.0.0.57 + when: topo == "wan-3link-tg" + - name: execute cli "config bgp startup all" to bring up all bgp sessions for test become: true shell: config bgp startup all + - name: Configure TACACS + become: true + shell: "{{ tacacs_config_cmd }}" + loop: + - config tacacs passkey {{ tacacs_passkey }} + - config tacacs authtype pap + - config aaa authentication login tacacs+ + loop_control: + loop_var: tacacs_config_cmd + ignore_errors: true + when: tacacs_enabled_by_default is defined and tacacs_enabled_by_default|bool == true + - name: execute configlet application script, which applies configlets in strict order. become: true shell: bash "/etc/sonic/apply_clet.sh" @@ -510,9 +564,21 @@ shell: config save -y when: save is defined and save|bool == true + - name: remove running golden config file if exists + become: True + file: path=/etc/sonic/running_golden_config.json + state=absent + - name: cleanup all cached facts shell: python ../tests/common/cache/facts_cache.py delegate_to: localhost ignore_errors: true + # In pr https://github.com/sonic-net/sonic-buildimage/pull/12109, it decrease the sshd timeout + # which may cause timeout when executing `generate_dump -s yesterday`. + # Increase this time during deploying minigraph + - name: Reset sshd timeout + become: True + shell: sed -i "s/^ClientAliveInterval [0-9].*/ClientAliveInterval 900/g" /etc/ssh/sshd_config && systemctl restart sshd + when: deploy is defined and deploy|bool == true diff --git a/tests/snappi/ecn/ecn_args/__init__.py b/ansible/devutil/devices/__init__.py similarity index 100% rename from tests/snappi/ecn/ecn_args/__init__.py rename to ansible/devutil/devices/__init__.py diff --git a/ansible/devutil/devices/ansible_hosts.py b/ansible/devutil/devices/ansible_hosts.py new file mode 100644 index 00000000000..3a605686c8a --- /dev/null +++ b/ansible/devutil/devices/ansible_hosts.py @@ -0,0 +1,1124 @@ +"""Basic classes and functions for running ansible modules on devices by python. + +This idea is mainly inspired by the pytest-ansible project. With the classes and functions defined here, we can run any +ansible modules on any hosts defined in inventory file. + +Instead of writing ansible playbook to operate on the testbed devices, we can just write python. + +Comparing with ansible playbook, we can take advantage of a real programming language. The drawback is that python +programming experience is required. + +Comparing with pytest-ansible, we do not need pytest. This design supports some ansible features not supported by +pytest-ansible: +* fork: Pytest-ansible does not support running ansible modules in parallel. This design uses ansible's builtin forking + capability to run modules in parallel. +* module attributes: Ansible supports additional module attributes that can be specified for each task in playbook. + These module attributes can affect execution of the modules, for example "become", "async", etc. Pytest-ansible + does not support these attributes. With this design, we can use keyword argument `module_attrs` to specify + module attributes while calling an ansible module. + +Not all ansible's builtin features are supported by this design. For example: +* Notify and event handler. (We can use python's libs to support that) + +This idea is still new. I haven't figured out all of its potentials and limitations. Feedbacks, suggestions and +contributions are more than welcomed. +""" +import copy +import inspect +import json +import logging +import os + +import six + +from ansible.executor.task_queue_manager import TaskQueueManager +from ansible.inventory.manager import InventoryManager +from ansible.parsing.dataloader import DataLoader +from ansible.vars.manager import VariableManager +from ansible.playbook.play import Play + +from ansible.plugins.callback import CallbackBase +from ansible.plugins.loader import module_loader +from ansible import context +from ansible.module_utils.common.collections import ImmutableDict + +if six.PY2: + FileNotFoundError = IOError + +logger = logging.getLogger("ansible_hosts") + +try: + from ansible.executor import task_result + task_result._IGNORE = ("skipped", ) +except Exception as e: + logging.error("Hack for https://github.com/ansible/pytest-ansible/issues/47 failed: {}".format(repr(e))) + + +class UnsupportedAnsibleModule(Exception): + pass + + +class RunAnsibleModuleFailed(Exception): + pass + + +class NoAnsibleHostError(Exception): + pass + + +class MultipleAnsibleHostsError(Exception): + pass + + +class ResultCollector(CallbackBase): + """Call back for getting single ansible module execution result on ansible hosts. + + Args: + CallbackBase (class): Base class for all callbacks defined in ansible. + """ + def __init__(self, *args, **kwargs): + super(ResultCollector, self).__init__(*args, **kwargs) + self._results = {} + + def v2_runner_on_ok(self, result): + hostname = result._host.get_name() + + res = dict(hostname=hostname, reachable=True, failed=False) + res.update(result._result) + self._results[hostname] = res + + def v2_runner_on_failed(self, result, *args, **kwargs): + hostname = result._host.get_name() + + res = dict(hostname=hostname, reachable=True, failed=True) + res.update(result._result) + self._results[hostname] = res + + def v2_runner_on_unreachable(self, result, *args, **kwargs): + hostname = result._host.get_name() + + res = dict(hostname=hostname, reachable=False, failed=True) + res.update(result._result) + self._results[hostname] = res + + @property + def results(self): + """Property for returning execution result of single ansible module on ansible hosts. + + The result is a dict keyed by hostname. Value is the ansible module execution result on that host. + + Returns: + dict: Result is a dict. Key is hostname. Value is ansible module execution result on that host. + """ + return self._results + + +class BatchResultsCollector(CallbackBase): + """Call back for getting multiple ansible module execution results on ansible hosts. + + Args: + CallbackBase (class): Base class for all callbacks defined in ansible. + """ + def __init__(self, *args, **kwargs): + super(BatchResultsCollector, self).__init__(*args, **kwargs) + self._results = {} + + def v2_runner_on_ok(self, result, *args, **kwargs): + hostname = result._host.get_name() + if hostname not in self._results: + self._results[hostname] = [] + + res = dict(hostname=hostname, reachable=True, failed=False) + res.update(result._result) + self._results[hostname].append(res) + + def v2_runner_on_failed(self, result, *args, **kwargs): + hostname = result._host.get_name() + if hostname not in self._results: + self._results[hostname] = [] + + res = dict(hostname=hostname, reachable=True, failed=True) + res.update(result._result) + self._results[hostname].append(res) + + def v2_runner_on_unreachable(self, result): + hostname = result._host.get_name() + if hostname not in self._results: + self._results[hostname] = [] + + res = dict(hostname=hostname, reachable=False, failed=True) + res.update(result._result) + self._results[hostname].append(result._result) + + @property + def results(self): + """Property for returning multiple ansible module execution results of multiple ansible hosts. + + The result is a dict keyed by hostname. Value is list of the ansible module execution results on that host. + + Returns: + dict: Result is a dict. Key is hostname. Value is ansible module execution result. + """ + return self._results + + +class AnsibleHostsBase(object): + """Base class for running ansible modules on hosts defined in ansible inventory file. + + This class defines the basic methods for running ansible modules on hosts defined in ansible inventory file. + + DO NOT use this class directly. Use AnsibleHosts or AnsibleHostsParallel instead. + """ + def __init__( + self, + inventories, + host_pattern, + loader=None, + inventory_manager=None, + variable_manager=None, + options={}, + hostvars={}): + """Constructor for AnsibleHostsBase. + + Args: + inventories (str or list): Path to ansible inventory file or list of inventory files. + host_pattern (str or list): Host pattern string or list of host pattern strings. Interpreted by ansible. + Follow the same rules of specifying ansible hosts in ansible play book. + Examples: + "vlab-01" + "VM0100, VM0101" + ["VM0100", "VM0101"] + "server_1:&vm_host" + loader (DataLoader, optional): Ansible DataLoader. Defaults to None. + inventory_manager (InventoryManager, optional): Ansible InventoryManager. Defaults to None. + variable_manager (VariableManager, optional): Ansible VariableManager. Defaults to None. + options (dict, optional): Options affecting ansible execution. Supports options that can be passed in from + ansible-playbook command line. Defaults to {}. + Examples: + options={"become": True, "forks": 10} + hostvars (dict, optional): Additional ansible variables for ansible hosts. Similar as using `-e` argument + of ansible-playbook command line to specify additional host variables. Defaults to {}. + """ + self.inventories = inventories + + # Check existence of inventories only when host_pattern is not "localhost" + if host_pattern != "localhost": + if isinstance(self.inventories, list): + for inventory in self.inventories: + if not os.path.exists(inventory): + raise FileNotFoundError("Inventory file {} not found.".format(inventory)) + else: + if not os.path.exists(self.inventories): + raise FileNotFoundError("Inventory file {} not found.".format(self.inventories)) + + self.host_pattern = host_pattern + if loader: + self.loader = loader + else: + self.loader = DataLoader() + + if inventory_manager: + if isinstance(self.inventories, list): + sources = self.inventories + else: + sources = [self.inventories] + if set(sources) != set(inventory_manager._sources): + inventory_manager._sources = sources + inventory_manager.parse_sources() + self.im = inventory_manager + else: + self.im = InventoryManager(loader=self.loader, sources=self.inventories) + + if variable_manager: + self.vm = variable_manager + else: + self.vm = VariableManager(loader=self.loader, inventory=self.im) + + self.options = { + "forks": 6, + "connection": "smart", + "verbosity": 2, + "become_method": "sudo" + } + if options: + self.options.update(options) + + if hostvars: + self.vm.extra_vars.update(hostvars) + + # Ansible inventory hosts, list of + self.ans_inv_hosts = self.im.get_hosts(self.host_pattern) + if len(self.ans_inv_hosts) == 0: + raise NoAnsibleHostError( + "No host '{}' in inventory files '{}'".format(self.host_pattern, self.inventories) + ) + self.hostnames = [host.name for host in self.ans_inv_hosts] + self.hosts_count = len(self.hostnames) + self.ips = [host.get_vars().get("ansible_host", None) for host in self.ans_inv_hosts] + + self._loaded_modules = [] + + @staticmethod + def build_task(module_name, args=[], kwargs={}, module_attrs={}): + """Build a dict represents a task in ansible playbook. + + Args: + module_name (str): Name of the ansible module to be executed in the task. + args (list, optional): Positional arguments of ansible module. Enclosed in a list. Defaults to []. + kwargs (dict, optional): Keyword arguments of ansible module. Enclosed in a dict. Defaults to {}. + module_attrs (dict, optional): Attributes affect module execution, eg: become, async, poll, etc. + Check ansible module reference documentation for module attributes applicable to modules. + * https://docs.ansible.com/ansible/2.9/modules/list_of_all_modules.html + * https://docs.ansible.com/ansible/latest/collections/index.html + + Returns: + dict: A dict represents a task in ansible playbook. + """ + kwargs = copy.deepcopy(kwargs) # Copy to avoid argument passed by reference issue + if args: + kwargs["_raw_params"] = " ".join(args) + + task_data = { + "action": { + "module": module_name, + "args": kwargs + }, + } + if module_attrs: + task_data.update(module_attrs) + + return task_data + + @staticmethod + def run_tasks( + hosts, + loader, + inventory_manager, + variable_manager, + options={ + "forks": 6, + "connection": "smart", + "verbosity": 2, + "become_method": "sudo" + }, + passwords={"vault_pass": "any"}, + gather_facts=False, + tasks=[]): + """Use ansible's TaskQueueManager to run tasks on hosts. + + Defined this method as a static method on purpose so that scripts may use this method to run ansible tasks + without initializing instances of AnsibleHosts or AnsibleHost. + + Args: + hosts (str or list): Host pattern string or list of host pattern strings. Interpreted by ansible. + loader (DataLoader): Ansible DataLoader. + inventory_manager (InventoryManager): Ansible InventoryManager. + variable_manager (VariableManager): Ansible VariableManager. + options (dict, optional): Options affecting ansible execution. Supports options that can be passed in from + ansible-playbook command line. Defaults to {}. + passwords (dict, optional): Passwords for ansible. Defaults to {"vault_pass": "any"}. + gather_facts (bool, optional): Whether to gather facts before running tasks. Defaults to False. + tasks (list, optional): List of tasks to be run on hosts. Defaults to []. + + Returns: + dict: Results of running ansible modules in playbook. If task list length is 1, ResultCollector is used + as result collector callback. If task list length is greater than 1, BatchResultsCollector is used as + result collector callback. + """ + tqm = None + try: + context.CLIARGS = ImmutableDict(**options) + + play = Play().load( + { + "hosts": hosts, + "gather_facts": "yes" if gather_facts else "no", + "tasks": tasks, + }, + variable_manager=variable_manager, + loader=loader, + ) + + play_tasks = play.get_tasks()[0] + + if len(play_tasks) > 1: + callback = BatchResultsCollector() + else: + callback = ResultCollector() + + tqm = TaskQueueManager( + inventory=inventory_manager, + variable_manager=variable_manager, + loader=loader, + passwords=passwords, + stdout_callback=callback, + forks=options.get("forks") + ) + tqm.run(play) + return callback.results + finally: + if tqm is not None: + tqm.cleanup() + + def _log_modules(self, caller_info, module_info, verbosity): + """Log ansible modules to be executed. + + Args: + caller_info (tuple): Caller information. Tuple of (filename, line_number, function_name, lines, index) got + from inspect.stack(). + module_info (dict or list): Information of ansible modules to be executed. If only one module is executed, + module_info is a dict. If multiple modules are executed, module_info is a list of dicts. + verbosity (int): Verbosity level. If verbosity is 0, no log will be printed. + """ + if verbosity <= 0: # Do not log anything + return + + filename, line_number, function_name, lines, index = caller_info + + if isinstance(self, AnsibleHosts): + hosts_str = json.dumps(self.hostnames) + elif isinstance(self, AnsibleHost): + hosts_str = json.dumps(self.hostnames[0]) + else: + raise TypeError("Unsupported type of object: {}".format(type(self))) + + if isinstance(module_info, dict): + module_names = json.dumps(module_info.get("module_name", "")) + hint_str = "AnsibleModule::{}".format(module_names) + elif isinstance(module_info, list): + module_names = ", ".join([module_item.get("module_name", "") for module_item in module_info]) + hint_str = "AnsibleModules::{}".format(json.dumps(module_names)) + else: + raise TypeError("Got {}, expected tuple or list of tuples, tuple items: " + "module_name, module_args, module_kwargs, module_attrs".format(type(module_info))) + + task_headline = "===== {} -> {} ".format(self.host_pattern, module_names) + task_headline += "=" * (120 - len(task_headline)) + logger.debug(task_headline) + + caller_str = "{}::{}#{}".format(filename, function_name, line_number) + + if verbosity == 1: # Log module name only. Do not log args. + logger.debug("{}: {} {}".format( + caller_str, + hosts_str, + hint_str, + )) + elif verbosity >= 2: + if verbosity == 2: # Log module name and args without indention + indent = None + newline = "" + elif verbosity >= 3: # Log module name and args with indention + indent = 4 + newline = "\n" + + logger.debug("{}: {} -> {}, {}{}{}".format( + caller_str, + hosts_str, + hint_str, + newline, + json.dumps(module_info, indent=indent), + newline + )) + + def _log_results(self, caller_info, module_info, results, verbosity): + """Log ansible module results. + + Args: + caller_info (tuple): Caller information. Tuple of (filename, line_number, function_name, lines, index) got + from inspect.stack(). + module_info (dict or list): Information of ansible modules to be executed. If only one module is executed, + module_info is a dict. If multiple modules are executed, module_info is a list of dicts. + results (dict): Results of ansible modules. + verbosity (int): Verbosity level. If verbosity is 0, no log will be printed. + """ + if verbosity <= 0: # Do not log anything + return + + if isinstance(self, AnsibleHosts): + hosts_str = json.dumps(self.hostnames) + elif isinstance(self, AnsibleHost): + hosts_str = json.dumps(self.hostnames[0]) + results = results.get(self.hostnames[0], {}) + else: + raise TypeError("Unsupported type of object: {}".format(type(self))) + + filename, line_number, function_name, lines, index = caller_info + caller_str = "{}::{}#{}".format(filename, function_name, line_number) + + if isinstance(module_info, dict): + module_names = json.dumps(module_info.get("module_name", "")) + hint_str = "AnsibleModule::{}".format(module_names) + elif isinstance(module_info, list): + module_names = ", ".join([module_item.get("module_name", "") for module_item in module_info]) + hint_str = "AnsibleModules::{}".format(json.dumps(module_names)) + else: + raise TypeError("Got {}, expected tuple or list of tuples, tuple items: " + "module_name, module_args, module_kwargs, module_attrs".format(type(module_info))) + + if verbosity == 1: # Log module only + logger.debug("{}: {} -> {} executed".format( + caller_str, + hosts_str, + hint_str + )) + elif verbosity >= 2: # Log result without indention + if verbosity == 2: + indent = None + newline = "" + elif verbosity >= 3: + indent = 4 + newline = "\n" + + logger.debug("{}: {} -> {} | Results =>{}{}{}".format( + caller_str, + hosts_str, + hint_str, + newline, + json.dumps(results, indent=indent), + newline + )) + + def _check_results(self, caller_info, module_info, results, module_ignore_errors, verbosity): + """Check ansible module results. + + Args: + caller_info (tuple): Caller information. Tuple of (filename, line_number, function_name, lines, index) got + from inspect.stack(). + module_info (dict or list): Information of ansible modules to be executed. If only one module is executed, + module_info is a dict. If multiple modules are executed, module_info is a list of dicts. + results (dict): Results of ansible modules. + module_ignore_errors (bool): Ignore module errors or not. If True, no error will be raised even if module + execution failed. + verbosity (int): Verbosity level. If verbosity is 0, details of failed modules will not be included in the + error message. + """ + if module_ignore_errors: + return + + filename, line_number, function_name, lines, index = caller_info + caller_str = "{}::{}#{}".format(filename, function_name, line_number) + + if isinstance(self, AnsibleHosts): + hosts_str = json.dumps(self.hostnames) + elif isinstance(self, AnsibleHost): + hosts_str = json.dumps(self.hostnames[0]) + results = results.get(self.hostnames[0], {}) + else: + raise TypeError("Unsupported type of object: {}".format(type(self))) + + if isinstance(module_info, dict): + module_names = json.dumps(module_info.get("module_name", "")) + hint_str = "AnsibleModule::{}".format(module_names) + elif isinstance(module_info, list): + module_names = ", ".join([module_item.get("module_name", "") for module_item in module_info]) + hint_str = "AnsibleModules::{}".format(json.dumps(module_names)) + else: + raise TypeError("Got {}, expected tuple or list of tuples, tuple items: " + "module_name, module_args, module_kwargs, module_attrs".format(type(module_info))) + + err_msg = "" + if verbosity <= 0: # No information of module and result + err_msg = "Run ansible module failed" + elif verbosity == 1: # Log module name only. Do not log args and result + err_msg = "{}: {} -> {} failed".format( + caller_str, + hosts_str, + hint_str + ) + elif verbosity >= 2: # Log module name, args and result + if verbosity == 2: + indent = None + elif verbosity >= 3: + indent = 4 + + err_msg = "{}: {} -> {} failed, Results => {}".format( + caller_str, + hosts_str, + hint_str, + json.dumps(results, indent=indent) + ) + + if isinstance(self, AnsibleHosts): + if isinstance(module_info, dict): + failed = any([res["failed"] for res in results.values()]) + else: + failed = any([any([res["failed"] for res in module_results]) for module_results in results.values()]) + elif isinstance(self, AnsibleHost): + if isinstance(module_info, dict): + failed = results["failed"] + else: + failed = any([res["failed"] for res in results]) + if failed: + raise RunAnsibleModuleFailed(err_msg) + + def _run_ansible_module(self, *args, **kwargs): + """Run ansible module. + + DO NOT call this function directly. Use instance_name.() instead. + + This class has "__getattr__" defined. This function will be called when an attribute is not found in the + instance. This function will parse the attribute name and consider the attribute name as an Ansible module name. + Then it will call this function to run the Ansible module. + + Special keyword arguments: + module_ignore_errors: If this argument is set to True, no RunAnsibleModuleFailed exception will be raised. + module_attrs: A dict for specifying module attributes that may affect execution of the ansible module. + Reference documents: + * https://docs.ansible.com/ansible/2.9/modules/list_of_all_modules.html + * https://docs.ansible.com/ansible/latest/collections/index.html + verbosity: integer from 0-3. + + Raises: + RunAnsibleModuleFailed: Raise this exception if result is failed AND keyword argument + `module_ignore_errors` is False. + + Args: + *args: Positional arguments of ansible module. + **kwargs: Keyword arguments of ansible module. + + Returns: + dict: Ansible module execution result. If this function is executed on AnsibleHosts instance, the result + is a dict of dicts. Key is hostname, value is ansible module execution result on that host. If this + function is executed on AnsibleHost instance (for single host), the result is a dict, which is ansible + module execution result on the host of AnsibleHost instance. + + Sample result for AnsibleHosts: + { + "VM0100": { + "stderr_lines": [], + "cmd": "pwd", + "stdout": "/root", + "delta": "0:00:00.001744", + "stdout_lines": [ + "/root" + ], + "ansible_facts": { + "discovered_interpreter_python": "/usr/bin/python" + }, + "end": "2023-03-20 01:11:01.748306", + "_ansible_no_log": false, + "start": "2023-03-20 01:11:01.746562", + "changed": true, + "failed": false, + "reachable": true, + "stderr": "", + "rc": 0, + "hostname": "VM0100", + "invocation": { + "module_args": { + "warn": true, + "executable": null, + "_uses_shell": true, + "strip_empty_ends": true, + "_raw_params": "pwd", + "removes": null, + "argv": null, + "creates": null, + "chdir": null, + "stdin_add_newline": true, + "stdin": null + } + } + }, + "VM0101": { + "stderr_lines": [], + "cmd": "pwd", + "stdout": "/root", + "delta": "0:00:00.001764", + "stdout_lines": [ + "/root" + ], + "ansible_facts": { + "discovered_interpreter_python": "/usr/bin/python" + }, + "end": "2023-03-20 01:11:01.748302", + "_ansible_no_log": false, + "start": "2023-03-20 01:11:01.746538", + "changed": true, + "failed": false, + "reachable": true, + "stderr": "", + "rc": 0, + "hostname": "VM0101", + "invocation": { + "module_args": { + "warn": true, + "executable": null, + "_uses_shell": true, + "strip_empty_ends": true, + "_raw_params": "pwd", + "removes": null, + "argv": null, + "creates": null, + "chdir": null, + "stdin_add_newline": true, + "stdin": null + } + } + } + } + + Sample result for AnsibleHost: + { + "stderr_lines": [], + "cmd": [ + "pwd" + ], + "stdout": "/home/admin", + "delta": "0:00:00.002754", + "stdout_lines": [ + "/home/admin" + ], + "ansible_facts": { + "discovered_interpreter_python": "/usr/bin/python" + }, + "end": "2023-03-20 01:17:02.602775", + "_ansible_no_log": false, + "start": "2023-03-20 01:17:02.600021", + "changed": true, + "failed": false, + "reachable": true, + "stderr": "", + "rc": 0, + "hostname": "vlab-01", + "invocation": { + "module_args": { + "creates": null, + "executable": null, + "_uses_shell": false, + "strip_empty_ends": true, + "_raw_params": "pwd", + "removes": null, + "argv": null, + "warn": true, + "chdir": null, + "stdin_add_newline": true, + "stdin": null + } + } + } + """ + caller_info = kwargs.pop("caller_info", None) + if not caller_info: + previous_frame = inspect.currentframe().f_back + caller_info = inspect.getframeinfo(previous_frame) + + module_args = copy.deepcopy(args) + module_kwargs = copy.deepcopy(kwargs) + + verbosity = module_kwargs.pop("verbosity", None) + if not verbosity: + verbosity = self.options.get("verbosity", 2) + module_ignore_errors = module_kwargs.pop("module_ignore_errors", False) + module_attrs = module_kwargs.pop("module_attrs", {}) + + module_info = { + "module_name": self.module_name, + "args": module_args, + "kwargs": module_kwargs, + "module_attrs": module_attrs + } + self._log_modules(caller_info, module_info, verbosity) + + task = self.build_task(**module_info) + results = self.run_tasks(self.host_pattern, self.loader, self.im, self.vm, self.options, tasks=[task]) + + self._log_results(caller_info, module_info, results, verbosity) + self._check_results(caller_info, module_info, results, module_ignore_errors, verbosity) + + if isinstance(self, AnsibleHost): + results = results[self.hostnames[0]] + + return results + + def __getattr__(self, attr): + """For finding ansible module and return a function for running that ansible module. + + Args: + attr (str): Attribute name of current object. Usually ansible module name. + + Raises: + UnsupportedAnsibleModule: Unable to find ansible module specified by `attr` from ansible builtin modules + or current visible customized modules. + + Returns: + callable: A function for running ansible module specified by `attr`. + """ + if not module_loader.has_plugin(attr): + raise UnsupportedAnsibleModule("Unsupported ansible module \"{}\"".format(attr)) + self.module_name = attr + + return self._run_ansible_module + + def run_module(self, module_name, args=[], kwargs={}): + """Run ansible module specified by `module_name`. + + Special keyword arguments: + module_ignore_errors: If this argument is set to True, no RunAnsibleModuleFailed exception will be raised. + module_attrs: A dict for specifying module attributes that may affect execution of the ansible module. + Reference documents: + * https://docs.ansible.com/ansible/2.9/modules/list_of_all_modules.html + * https://docs.ansible.com/ansible/latest/collections/index.html + verbosity: integer from 0-3. + + Args: + module_name (str): Ansible module name. + args (list): Ansible module arguments. + kwargs (dict): Ansible module keyword arguments. + + Raises: + UnsupportedAnsibleModule: Unable to find ansible module specified by `module_name` from ansible builtin + + Returns: + dict: A dict for ansible module execution result. Same as the result of `self._run_ansible_module`. + """ + if not module_loader.has_plugin(module_name): + raise UnsupportedAnsibleModule("Unsupported ansible module \"{}\"".format(module_name)) + self.module_name = module_name + + previous_frame = inspect.currentframe().f_back + caller_info = inspect.getframeinfo(previous_frame) + kwargs.update({"caller_info": caller_info}) + + return self._run_ansible_module(*args, **kwargs) + + def load_module(self, module_name, args=[], kwargs={}, module_attrs={}): + """Load a module with arguments into a list. + + This method load a module with arguments into a list. Method `self.run_loaded_modules` can run the loaded + modules in a single play. + + Comparing with `self.run_module` or `self._run_ansible_module`, special keyword arguments are not supported + in `kwargs` of `self.load_module`. The special keyword arguments are supported by the `self.run_loaded_modules` + method. + + Args: + module_name (str): Ansible module name. Can be builtin module or customized module. + args (list, optional): Positional arguments of ansible module. Defaults to []. + kwargs (dict, optional): Keyword arguments of ansible module. Defaults to {}. + module_attrs (dict, optional): Module attributes affect module execution. + """ + self._loaded_modules.append( + { + "module_name": module_name, + "args": args, + "kwargs": kwargs, + "module_attrs": module_attrs + } + ) + + def clear_loaded_modules(self): + """Clear the list of loaded ansible modules. + """ + self._loaded_modules = [] + + def run_loaded_modules(self, module_ignore_errors=False, verbosity=2): + """Run the list of loaded ansible modules. + + Args: + verbosity (int): Verbosity value from 0-3. + + Returns: + dict: Ansible module execution results. Sample result: + { + "vlab-01": [ + { + "stderr_lines": [], + "cmd": [ + "pwd" + ], + "stdout": "/home/admin", + "delta": "0:00:00.002754", + "stdout_lines": [ + "/home/admin" + ], + "ansible_facts": { + "discovered_interpreter_python": "/usr/bin/python" + }, + "end": "2023-03-20 01:17:02.602775", + "_ansible_no_log": false, + "start": "2023-03-20 01:17:02.600021", + "changed": true, + "failed": false, + "reachable": true, + "stderr": "", + "rc": 0, + "hostname": "vlab-01", + "invocation": { + "module_args": { + "creates": null, + "executable": null, + "_uses_shell": false, + "strip_empty_ends": true, + "_raw_params": "pwd", + "removes": null, + "argv": null, + "warn": true, + "chdir": null, + "stdin_add_newline": true, + "stdin": null + } + } + }, + { + "stderr_lines": [], + "cmd": "ls", + "end": "2023-03-20 01:17:02.812231", + "_ansible_no_log": false, + "stdout": "config.json\nmyfile", + "changed": true, + "rc": 0, + "failed": false, + "reachable": true, + "stderr": "", + "delta": "0:00:00.003928", + "hostname": "vlab-01", + "invocation": { + "module_args": { + "creates": null, + "executable": null, + "_uses_shell": true, + "strip_empty_ends": true, + "_raw_params": "ls", + "removes": null, + "argv": null, + "warn": true, + "chdir": null, + "stdin_add_newline": true, + "stdin": null + } + }, + "stdout_lines": [ + "config.json", + "myfile" + ], + "start": "2023-03-20 01:17:02.808303" + } + ] + } + """ + if len(self._loaded_modules) == 0: + logger.info("No loaded task.") + return {} + + previous_frame = inspect.currentframe().f_back + caller_info = inspect.getframeinfo(previous_frame) + + loaded_modules = copy.deepcopy(self._loaded_modules) + self.clear_loaded_modules() + self._log_modules(caller_info, self._loaded_modules, verbosity) + + tasks = [ + self.build_task(**module) for module in loaded_modules + ] + results = self.run_tasks(self.host_pattern, self.loader, self.im, self.vm, self.options, tasks=tasks) + + self._log_results(caller_info, loaded_modules, results, verbosity) + self._check_results(caller_info, loaded_modules, results, module_ignore_errors, verbosity) + + return results + + def get_inv_host(self, hostname, strict=False): + """Tool for getting ansible.inventory.host.Host object from self.inventories using ansible inventory manager. + + Args: + hostname (str): Hostname + strict (bool, optional): If strict==True, only get host with hostname matching self.host_pattern in + self.inventories. If strict=False, get any host with hostname from self.inventories. Defaults to False. + + Returns: + ansible.inventory.host.Host or None: Object of class ansible.inventory.host.Host or None. + """ + if strict: + # Only get host with hostname from self.ans_inv_hosts + for _host in self.ans_inv_hosts: + if _host.name == hostname: + return _host + else: + return None + else: + # Get host with hostname from whole inventory + return self.im.get_host(hostname) + + def get_inv_hosts(self, host_pattern): + """Tool for getting list of ansible.inventory.host.Host objects from self.inventories using ansible inventory + manager. Ansible inventory manager is used under the hood. + + Args: + host_pattern (str or list): Host pattern string or list of host pattern strings. Interpreted by ansible. + Follow the same rules of specifying ansible hosts in ansible play book. + + Returns: + list: List of ansible.inventory.host.Host objects. + """ + return self.im.get_hosts(host_pattern) + + def get_host_vars(self, hostname, strict=False): + """Tool for getting variables of specified host from self.inventories. Variables defined in group_vars and + host_vars are not included. Only ansible inventory manager is used under the hood. + + Args: + hostname (str): Hostname. + strict (bool, optional): If strict==True, only get variables of host with hostname from hosts matching + self.host_pattern in self.inventories. If strict=False, get variables of any host with hostname from + self.inventories. Defaults to False. + + Returns: + dict: Dict of variables. Key is variable name. Value is variable value. + """ + _host = self.get_inv_host(hostname, strict=strict) + if not _host: + return {} + return _host.get_vars() + + def get_host_var(self, hostname, var, strict=False): + """Tool for getting variable value of specified host from self.inventories. Variables defined in group_vars and + host_vars are not included. Only ansible inventory manager is used under the hood. + + Args: + hostname (str): Hostname. + var (str): Variable name. + strict (bool, optional): If strict==True, only get variable value of host with hostname from hosts matching + self.host_pattern in self.inventories. If strict=False, get variable value of any host with hostname + from self.inventories. Defaults to False. + + Returns: + Any: Variable value, possible types: str, int, bool, list, dict. + """ + return self.get_host_vars(hostname, strict).get(var, None) + + def get_host_visible_vars(self, hostname, strict=False): + """Tool for getting visible variables of specified host. Variables may be defined in inventory files, any + group_vars and host_vars files. Both ansible inventory manager and variable managers are used under the hood. + + Args: + hostname (str): Hostname. + strict (bool, optional): If strict==True, only get visible variables of host with hostname from hosts + matching self.host_pattern in self.inventories. If strict=False, get visible variables of any host + with hostname from self.inventories. Defaults to False. + + Returns: + dict: Dict of variables. Key is variable name. Value is variable value. + """ + _host = self.get_inv_host(hostname, strict=strict) + if not _host: + return {} + return self.vm.get_vars(host=_host) + + def get_host_visible_var(self, hostname, var, strict=False): + """Tool for getting visible variable value of specified host. Variable may be defined in inventory files, any + group_vars and host_vars files. Both ansible inventory manager and variable managers are used under the hood. + + Args: + hostname (str): Hostname. + var (str): Variable name. + strict (bool, optional): If strict==True, only get visible variable value of host with hostname from hosts + matching self.host_pattern in self.inventories. If strict=False, get visible variable value of any host + with hostname from self.inventories. Defaults to False. + + Returns: + Any: Variable value, possible types: str, int, bool, list, dict. + """ + return self.get_host_visible_vars(hostname, strict=strict).get(var, None) + + +class AnsibleHosts(AnsibleHostsBase): + + def __init__( + self, + inventories, + host_pattern, + loader=None, + inventory_manager=None, + variable_manager=None, + options={}, + hostvars={}): + + super(AnsibleHosts, self).__init__( + inventories, + host_pattern, + loader=loader, + inventory_manager=inventory_manager, + variable_manager=variable_manager, + options=options, + hostvars=hostvars + ) + + self.ans_hosts = [ + AnsibleHost( + inventories, + hostname, + self.loader, + self.im, + self.vm, + self.options, + hostvars, + ) for hostname in self.hostnames + ] + + # implement a list like interface based on attribute self.ans_hosts + def __len__(self): + return len(self.ans_hosts) + + def __iter__(self): + return iter(self.ans_hosts) + + def __getitem__(self, index): + + if isinstance(index, int): + if index < 0: + index = len(self.ans_hosts) + index + if index < 0 or index >= len(self.ans_hosts): + raise IndexError("AnsibleHosts index out of range") + return self.ans_hosts[index] + elif isinstance(index, str): + for ans_host in self.ans_hosts: + if ans_host.hostname == index: + return ans_host + raise KeyError("AnsibleHost with hostname '{}' not found".format(index)) + else: + raise TypeError("AnsibleHosts indices must be integers or strings, not {}".format(type(index))) + + def __str__(self): + return "".format(self.hostnames, self.inventories) + + def __repr__(self): + return self.__str__() + + +class AnsibleHost(AnsibleHostsBase): + + def __init__( + self, + inventories, + host_pattern, + loader=None, + inventory_manager=None, + variable_manager=None, + options={}, + hostvars={}): + + super(AnsibleHost, self).__init__( + inventories, + host_pattern, + loader=loader, + inventory_manager=inventory_manager, + variable_manager=variable_manager, + options=options, + hostvars=hostvars + ) + + if len(self.ans_inv_hosts) > 1: + raise MultipleAnsibleHostsError( + "More than one host match '{}' in inventory files '{}'".format(self.host_pattern, self.inventories) + ) + self.ans_inv_host = self.ans_inv_hosts[0] + self.hostname = self.ans_inv_host.name + self.ip = self.ans_inv_host.get_vars().get("ansible_host", None) + + def __str__(self): + return "".format(self.hostname, self.inventories) + + def __repr__(self): + return self.__str__() diff --git a/ansible/devutil/devices/factory.py b/ansible/devutil/devices/factory.py new file mode 100644 index 00000000000..bb6f0709473 --- /dev/null +++ b/ansible/devutil/devices/factory.py @@ -0,0 +1,89 @@ +import json +import logging +import os +import yaml + +from .ansible_hosts import AnsibleHost +from .ansible_hosts import AnsibleHosts +from .ansible_hosts import NoAnsibleHostError +from .ansible_hosts import MultipleAnsibleHostsError +from .sonic import SonicHosts + +logger = logging.getLogger(__name__) + +_self_dir = os.path.dirname(os.path.abspath(__file__)) +ansible_path = os.path.realpath(os.path.join(_self_dir, "../../")) + + +def init_localhost(inventories=None, options={}, hostvars={}): + try: + return AnsibleHost(inventories, "localhost", options=options.copy(), hostvars=hostvars.copy()) + except (NoAnsibleHostError, MultipleAnsibleHostsError) as e: + logger.error( + "Failed to initialize localhost from inventories '{}', exception: {}".format(str(inventories), repr(e)) + ) + return None + + +def init_host(inventories, host_pattern, options={}, hostvars={}): + try: + return AnsibleHost(inventories, host_pattern, options=options.copy(), hostvars=hostvars.copy()) + except NoAnsibleHostError as e: + logger.error( + "No host '{}' in inventories '{}', exception: {}".format(host_pattern, inventories, repr(e)) + ) + return None + except MultipleAnsibleHostsError as e: + logger.error( + "Multiple hosts '{}' in inventories '{}', exception: {}".format(host_pattern, inventories, repr(e)) + ) + return None + + +def init_hosts(inventories, host_pattern, options={}, hostvars={}): + try: + return AnsibleHosts(inventories, host_pattern, options=options.copy(), hostvars=hostvars.copy()) + except NoAnsibleHostError as e: + logger.error( + "No hosts '{}' in inventories '{}', exception: {}".format(host_pattern, inventories, repr(e)) + ) + return None + + +def init_sonichosts(inventories, host_pattern, options={}, hostvars={}): + try: + return SonicHosts(inventories, host_pattern, options=options.copy(), hostvars=hostvars.copy()) + except NoAnsibleHostError as e: + logger.error( + "No hosts '{}' in inventories '{}', exception: {}".format(host_pattern, inventories, repr(e)) + ) + return None + + +def init_testbed_sonichosts(inventories, testbed_name, testbed_file="testbed.yaml", options={}, hostvars={}): + testbed_file_path = os.path.join(ansible_path, testbed_file) + with open(testbed_file_path) as f: + testbeds = yaml.safe_load(f.read()) + + duts = None + for testbed in testbeds: + if testbed["conf-name"] == testbed_name: + duts = testbed["dut"] # Type is list, historic reason. + break + + if not duts: + logger.error("No testbed with name '{}' in testbed file {}".format(testbed_name, testbed_file_path)) + return None + + sonichosts = init_sonichosts(inventories, duts, options=options.copy(), hostvars=hostvars.copy()) + if sonichosts and sonichosts.hosts_count != len(duts): + logger.error( + "Unmatched testbed duts: '{}', inventory: '{}', found hostnames: '{}'".format( + json.dumps(duts), + inventories, + json.dumps(sonichosts.hostnames) + ) + ) + return None + + return sonichosts diff --git a/ansible/devutil/devices/sonic.py b/ansible/devutil/devices/sonic.py new file mode 100644 index 00000000000..7b3c96cbeb1 --- /dev/null +++ b/ansible/devutil/devices/sonic.py @@ -0,0 +1,213 @@ +import logging +import yaml + +from .ansible_hosts import AnsibleHosts +from .ansible_hosts import RunAnsibleModuleFailed + +logger = logging.getLogger(__name__) + + +class SonicHosts(AnsibleHosts): + SUPPORTED_UPGRADE_TYPES = ["onie", "sonic"] + + def __init__(self, inventories, host_pattern, options={}, hostvars={}): + super(SonicHosts, self).__init__(inventories, host_pattern, options=options.copy(), hostvars=hostvars.copy()) + + @property + def sonic_version(self): + try: + output = self.command("cat /etc/sonic/sonic_version.yml") + versions = {} + for hostname in self.hostnames: + versions[hostname] = yaml.safe_load(output[hostname]["stdout"]) + return versions + except Exception as e: + logger.error("Failed to run `cat /etc/sonic/sonic_version.yml`: {}".format(repr(e))) + return {} + + +def upgrade_by_sonic(sonichosts, image_url, disk_used_percent): + try: + sonichosts.reduce_and_add_sonic_images( + disk_used_pcent=disk_used_percent, + new_image_url=image_url, + module_attrs={"become": True} + ) + sonichosts.shell("reboot", module_attrs={"become": True, "async": 300, "poll": 0}) + return True + except RunAnsibleModuleFailed as e: + logger.error( + "SONiC upgrade image failed, devices: {}, url: {}, error: {}".format( + str(sonichosts.hostnames), image_url, repr(e) + ) + ) + return False + + +def upgrade_by_onie(sonichosts, localhost, image_url, pause_time): + try: + sonichosts.shell("grub-editenv /host/grub/grubenv set next_entry=ONIE", module_attrs={"become": True}) + sonichosts.shell( + 'sleep 2 && shutdown -r now "Boot into onie."', + module_attrs={"become": True, "async": 5, "poll": 0} + ) + + for i in range(len(sonichosts.ips)): + localhost.wait_for( + host=sonichosts.ips[i], + port=22, + state="started", + search_regex="OpenSSH", + delay=60 if i == 0 else 0, + timeout=300, + module_attrs={"changed_when": False} + ) + if pause_time > 0: + localhost.pause( + seconds=pause_time, prompt="Pause {} seconds for ONIE initialization".format(str(pause_time)) + ) + sonichosts.onie( + install="yes", + url=image_url, + module_attrs={"connection": "onie"} + ) + return True + except RunAnsibleModuleFailed as e: + logger.error( + "ONIE upgrade image failed, devices: {}, url: {}, error: {}".format( + str(sonichosts.hostnames), image_url, repr(e) + ) + ) + return False + + +def patch_rsyslog(sonichosts): + rsyslog_conf_files = [ + "/usr/share/sonic/templates/rsyslog.conf.j2", + "/etc/rsyslog.conf" + ] + + # Get sonic version, use version of the first host + sonic_build_version = list(sonichosts.shell( + "sonic-cfggen -y /etc/sonic/sonic_version.yml -v build_version" + ).values())[0]["stdout"] + + # Patch rsyslog to stop sending syslog to production and use new template for remote syslog + for conf_file in rsyslog_conf_files: + sonichosts.lineinfile( + path=conf_file, + state="present", + backrefs=True, + regexp=r"(^[^#]*@\[10\.20\.6\.16\]:514)", + line=r"# \g<1>", + module_attrs={"become": True} + ) + sonichosts.lineinfile( + path=conf_file, + state="present", + insertafter="# Define a custom template", + line=r'$template RemoteSONiCFileFormat,"<%PRI%>1 %TIMESTAMP:::date-rfc3339% %HOSTNAME% %APP-NAME% ' + r'%PROCID% %MSGID% [origin swVersion=\"{}\"] %msg%\n"'.format(sonic_build_version), + module_attrs={"become": True} + ) + + # Patch rsyslog.conf.j2 to use new template for remote syslog + sonichosts.lineinfile( + path="/usr/share/sonic/templates/rsyslog.conf.j2", + state="present", + backrefs=True, + regex=r"(\*\.\* @\[\{\{ server \}\}\]:514)", + line=r'\g<1>;RemoteSONiCFileFormat', + module_attrs={"become": True} + ) + + # Patch rsyslog.conf to use new template for remote syslog + sonichosts.shell( + r"sed -E -i 's/(^[^#]*@\[[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+\]:514).*$/\1;RemoteSONiCFileFormat/g' " + "/etc/rsyslog.conf", + module_attrs={"become": True} + ) + + # Workaround for PR https://msazure.visualstudio.com/One/_git/Networking-acs-buildimage/pullrequest/6631568 + # This PR updated the rsyslog.conf to use a new method for sending out syslog. Need to configure the new method + # to use RemoteSONiCFileFormat too. + remote_template = list(sonichosts.shell( + "echo `grep -c 'template=\".*SONiCFileFormat\"' /usr/share/sonic/templates/rsyslog.conf.j2`" + ).values())[0]["stdout"] + if remote_template == "0": + for conf_file in rsyslog_conf_files: + sonichosts.lineinfile( + path=conf_file, + state="present", + backrefs=True, + regex=r'^(\*\.\* action\(type="omfwd") target=(.*)$', + line=r'\g<1> template="RemoteSONiCFileFormat" target=\g<2>', + module_attrs={"become": True} + ) + elif remote_template != "0": + for conf_file in rsyslog_conf_files: + sonichosts.replace( + dest=conf_file, + regexp='template=".*SONiCFileFormat"', + replace='template="RemoteSONiCFileFormat"', + module_attrs={"become": True} + ) + sonichosts.shell("systemctl restart rsyslog", module_attrs={"become": True}) + + +def post_upgrade_actions(sonichosts, localhost, disk_used_percent): + try: + for i in range(len(sonichosts.ips)): + localhost.wait_for( + host=sonichosts.ips[i], + port=22, + state="started", + search_regex="OpenSSH", + delay=180 if i == 0 else 0, + timeout=600, + module_attrs={"changed_when": False} + ) + localhost.pause(seconds=60, prompt="Wait for SONiC initialization") + + # PR https://github.com/sonic-net/sonic-buildimage/pull/12109 decreased the sshd timeout + # This change may cause timeout when executing `generate_dump -s yesterday`. + # Increase this time after image upgrade + sonichosts.shell( + 'sed -i "s/^ClientAliveInterval [0-9].*/ClientAliveInterval 900/g" /etc/ssh/sshd_config ' + '&& systemctl restart sshd', + module_attrs={"become": True} + ) + + patch_rsyslog(sonichosts) + + sonichosts.command("config bgp startup all", module_attrs={"become": True}) + sonichosts.command("config save -y", module_attrs={"become": True}) + sonichosts.reduce_and_add_sonic_images( + disk_used_pcent=disk_used_percent, + module_attrs={"become": True} + ) + return True + except RunAnsibleModuleFailed as e: + logger.error( + "Post upgrade actions failed, devices: {}, error: {}".format(str(sonichosts.hostnames), repr(e)) + ) + return False + + +def upgrade_image(sonichosts, localhost, image_url, upgrade_type="sonic", disk_used_percent=50, onie_pause_time=0): + if upgrade_type not in sonichosts.SUPPORTED_UPGRADE_TYPES: + logger.error( + "Upgrade type '{}' is not in SUPPORTED_UPGRADE_TYPES={}".format( + upgrade_type, sonichosts.SUPPORTED_UPGRADE_TYPES + ) + ) + return False + + if upgrade_type == "sonic": + upgrade_result = upgrade_by_sonic(sonichosts, image_url, disk_used_percent) + elif upgrade_type == "onie": + upgrade_result = upgrade_by_onie(sonichosts, localhost, image_url, onie_pause_time) + if not upgrade_result: + return False + + return post_upgrade_actions(sonichosts, localhost, disk_used_percent) diff --git a/ansible/devutils b/ansible/devutils index db722dae2d1..c103260a98c 100755 --- a/ansible/devutils +++ b/ansible/devutils @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # Supress warning import warnings @@ -25,6 +25,7 @@ g_task_runner = None g_pdu_dict = {} g_conn_graph_facts = {} + def run_cmd(cmd): ''' @summary: Utility that runs a command in a subprocess @@ -36,6 +37,7 @@ def run_cmd(cmd): stdout, stderr = out.communicate() return out.returncode, stdout, stderr + def get_conn_graph_facts(hosts): global g_conn_graph_facts @@ -45,15 +47,18 @@ def get_conn_graph_facts(hosts): g_conn_graph_facts = conn_graph_helper.get_conn_graph_facts(hostnames) return g_conn_graph_facts + def build_global_vars(concurrency, inventory): global g_task_runner, g_inv_mgr g_task_runner = TaskRunner(max_worker=concurrency) g_inv_mgr = HostManager(inventory) + def retrieve_hosts(group, limit): global g_inv_mgr return g_inv_mgr.get_host_list(group, limit) + def get_pdu_info_from_conn_graph(hostname): """ Read pdu info from conn graph. @@ -98,12 +103,14 @@ def get_pdu_info_from_inventory(attrs): pdus[ph] = pdu return (True, pdus) + def get_pdu_info(dut_hostname, attrs): results = get_pdu_info_from_conn_graph(dut_hostname) if results: return (True, results) return get_pdu_info_from_inventory(attrs) + def get_console_info_from_conn_graph(hostname): """ Read console info from conn_graph_facts. @@ -115,6 +122,7 @@ def get_console_info_from_conn_graph(hostname): console_info['console_port'] = g_conn_graph_facts['device_console_link'][hostname]['ConsolePort']['peerport'] return console_info + def get_console_info_from_inventory(attrs): """ Read console info from inventory file. This should be a fallback of get_console_info_from_conn_graph. @@ -126,6 +134,7 @@ def get_console_info_from_inventory(attrs): console_info[k] = attrs[k] return console_info + def get_console_info(hostname, attrs): console_info = get_console_info_from_conn_graph(hostname) if not console_info: @@ -134,6 +143,7 @@ def get_console_info(hostname, attrs): print("Failed to get console info for {}".format(hostname)) return console_info + def show_data_output(header, data, json_output=False): if json_output: print(json.dumps(sorted(data, key=lambda x: x['Host']), indent=4)) @@ -217,7 +227,7 @@ def pdu_action_on_dut(host, attrs, action): if not succeed: ret['Summary'] = pdu_info return ret - pduman = pdu_manager_factory(host, pdu_info, g_conn_graph_facts, pdu_info.values()[0]) + pduman = pdu_manager_factory(host, pdu_info, g_conn_graph_facts, pdu_info) if not pduman: ret['Summary'].append('Failed to communicate with PDU controller {}'.format(pdu_info.keys())) @@ -332,6 +342,20 @@ def validate_args(args): return True +def setup_logging(level): + if level == 'close': + return + import logging + if level == 'debug': + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) + elif level == 'info': + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + elif level == 'warn': + logging.basicConfig(stream=sys.stdout, level=logging.WARN) + elif level == 'error': + logging.basicConfig(stream=sys.stdout, level=logging.ERROR) + + def main(): parser = argparse.ArgumentParser(description='Device utilities') parser.add_argument('-6', '--ipv6', help='Include IPv6', action='store_true', @@ -348,6 +372,9 @@ def main(): type=str, required=False, default='lab') parser.add_argument('-l', '--limit', help='Host: limit to a single dut host name, default all', type=str, required=False) + parser.add_argument('--log-level', help='Log level: print logs to STDOUT (if not set to close)', + type=str, required=False, default='warn', + choices=['debug', 'info', 'warn', 'error', 'close']) parser.add_argument('-u', '--user', help='User: user account to login to host with, default admin', type=str, required=False, default='admin') parser.add_argument( @@ -359,6 +386,7 @@ def main(): args = parser.parse_args() if not validate_args(args): return + setup_logging(args.log_level) build_global_vars(args.concurrency, args.inventory) # Add limit argument check for pdu_reboot, pdu_on, pdu_off actions # If no limit argument for these actions, will not execute the process for all devices diff --git a/ansible/files/creategraph.py b/ansible/files/creategraph.py index 2a1a598e96f..113e33adccd 100755 --- a/ansible/files/creategraph.py +++ b/ansible/files/creategraph.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 import csv import sys @@ -6,9 +6,17 @@ import argparse from lxml import etree +try: + from ansible.module_utils.port_utils import get_port_alias_to_name_map +except ImportError: + # Add parent dir for using outside Ansible + sys.path.append('..') + from module_utils.port_utils import get_port_alias_to_name_map + DEFAULT_DEVICECSV = 'sonic_lab_devices.csv' DEFAULT_LINKCSV = 'sonic_lab_links.csv' DEFAULT_CONSOLECSV = 'sonic_lab_console_links.csv' +DEFAULT_BMCCSV = 'sonic_lab_bmc_links.csv' DEFAULT_PDUCSV = 'sonic_lab_pdu_links.csv' LAB_CONNECTION_GRAPH_ROOT_NAME = 'LabConnectionGraph' @@ -17,66 +25,160 @@ class LabGraph(object): - """ + """ This is used to create "graph" file of lab for all connections and vlan info from csv file We(both engineer and lab technician) maintian and modify the csv file to keep track of the lab - infrastucture for Sonic development and testing environment. + infrastucture for Sonic development and testing environment. """ - def __init__(self, dev_csvfile=None, link_csvfile=None, cons_csvfile=None, pdu_csvfile=None, graph_xmlfile=None): - #TODO:make generated xml file name as parameters in the future to make it more flexible - self.devices = [] - self.links = [] - self.consoles = [] - self.pdus = [] + def __init__(self, dev_csvfile=None, link_csvfile=None, cons_csvfile=None, bmc_csvfile=None, pdu_csvfile=None, graph_xmlfile=None): + self.devices = {} + self.links = [] + self.consoles = [] + self.bmcs = [] + self.pdus = [] self.devcsv = dev_csvfile self.linkcsv = link_csvfile self.conscsv = cons_csvfile + self.bmccsv = bmc_csvfile self.pducsv = pdu_csvfile self.png_xmlfile = 'str_sonic_png.xml' self.dpg_xmlfile = 'str_sonic_dpg.xml' self.one_xmlfile = graph_xmlfile + self._cache_port_name_to_alias = {} + self._cache_port_alias_to_name = {} self.pngroot = etree.Element('PhysicalNetworkGraphDeclaration') self.dpgroot = etree.Element('DataPlaneGraph') self.csgroot = etree.Element('ConsoleGraphDeclaration') + self.bmcgroot = etree.Element('BmcGraphDeclaration') self.pcgroot = etree.Element('PowerControlGraphDeclaration') + def _get_port_alias_to_name_map(self, hwsku): + """ + Retrive port alias to name map for specific hwsku. + """ + if hwsku in self._cache_port_alias_to_name: + return self._cache_port_alias_to_name[hwsku] + port_alias_to_name_map, _, _ = get_port_alias_to_name_map(hwsku) + self._cache_port_alias_to_name[hwsku] = port_alias_to_name_map + return port_alias_to_name_map + + def _get_port_name_to_alias_map(self, hwsku): + """ + Retrive port name to alias map for specific hwsku. + """ + if hwsku in self._cache_port_name_to_alias: + return self._cache_port_name_to_alias[hwsku] + port_alias_to_name_map = self._get_port_alias_to_name_map(hwsku) + port_name_to_alias_map = dict([(name, alias) for alias, name in port_alias_to_name_map.items()]) + self._cache_port_name_to_alias[hwsku] = port_name_to_alias_map + return port_name_to_alias_map + + def _get_port_name_set(self, device_hostname): + """ + Retrive port name set of a specific hwsku. + """ + hwsku = self.devices[device_hostname]['HwSku'] + return set(self._get_port_name_to_alias_map(hwsku).keys()) + + def _get_port_alias_set(self, device_hostname): + """ + Retrive port alias set of a specific hwsku. + """ + hwsku = self.devices[device_hostname]['HwSku'] + return set(self._get_port_alias_to_name_map(hwsku).keys()) + + def _convert_port_alias_to_name(self, device_hostname, port_alias): + """ + Given the device hostname and port alias, return the corresponding port name. + """ + os = self.devices[device_hostname].get('Os', '').lower() + if os != 'sonic': + raise Exception("Cannot convert port alias to name for non-SONiC device {}".format(device_hostname)) + hwsku = self.devices[device_hostname]['HwSku'] + port_alias_to_name_map = self._get_port_alias_to_name_map(hwsku) + return port_alias_to_name_map[port_alias] + def read_devices(self): with open(self.devcsv) as csv_dev: - csv_devices = csv.DictReader(filter(lambda row: row[0]!='#' and len(row.strip())!=0, csv_dev)) + csv_devices = csv.DictReader(filter(lambda row: row[0] != '#' and len(row.strip()) != 0, csv_dev)) devices_root = etree.SubElement(self.pngroot, 'Devices') pdus_root = etree.SubElement(self.pcgroot, 'DevicesPowerControlInfo') cons_root = etree.SubElement(self.csgroot, 'DevicesConsoleInfo') + bmc_root = etree.SubElement(self.bmcgroot, 'DevicesBmcInfo') for row in csv_devices: attrs = {} - self.devices.append(row) - devtype=row['Type'].lower() + self.devices[row['Hostname']] = row + devtype = row['Type'].lower() if 'pdu' in devtype: - for key in row: - attrs[key]=row[key].decode('utf-8') + for key in row: + attrs[key] = row[key].decode('utf-8') etree.SubElement(pdus_root, 'DevicePowerControlInfo', attrs) elif 'consoleserver' in devtype: - for key in row: - attrs[key]=row[key].decode('utf-8') + for key in row: + attrs[key] = row[key].decode('utf-8') + etree.SubElement(cons_root, 'DeviceConsoleInfo', attrs) + elif 'mgmttstorrouter' in devtype: + for key in row: + attrs[key] = row[key].decode('utf-8') etree.SubElement(cons_root, 'DeviceConsoleInfo', attrs) + etree.SubElement(bmc_root, 'DeviceBmcInfo', attrs) else: - for key in row: - if key.lower() != 'managementip' and key.lower() !='protocol': - attrs[key]=row[key].decode('utf-8') + for key in row: + if key.lower() != 'managementip' and key.lower() != 'protocol': + attrs[key] = row[key].decode('utf-8') etree.SubElement(devices_root, 'Device', attrs) - + def read_links(self): + # Read and parse link.csv file with open(self.linkcsv) as csv_file: - csv_links = csv.DictReader(filter(lambda row: row[0]!='#' and len(row.strip())!=0, csv_file)) - links_root = etree.SubElement(self.pngroot, 'DeviceInterfaceLinks') + csv_links = csv.DictReader(filter(lambda row: row[0] != '#' and len(row.strip()) != 0, csv_file)) + links_group_by_devices = {} for link in csv_links: - attrs = {} - for key in link: - if key.lower() != 'vlanid' and key.lower() != 'vlanmode': - attrs[key]=link[key].decode('utf-8') - etree.SubElement(links_root, 'DeviceInterfaceLink', attrs) self.links.append(link) - + if link['StartDevice'] not in links_group_by_devices: + links_group_by_devices[link['StartDevice']] = [] + links_group_by_devices[link['StartDevice']].append(link) + if link['EndDevice'] not in links_group_by_devices: + links_group_by_devices[link['EndDevice']] = [] + links_group_by_devices[link['EndDevice']].append(link) + + # For SONiC devices (DUT/Fanout), convert port alias to port name. Updates in `links_group_by_devices` will + # also be reflected in `self.links`, because they are holding reference to the same underlying `link` variable. + for device, links in links_group_by_devices.items(): + os = self.devices[device].get('Os', '').lower() + if os != 'sonic': + continue + ports = [] + for link in links: + if device == link['StartDevice']: + ports.append(link['StartPort']) + elif device == link['EndDevice']: + ports.append(link['EndPort']) + if any([port not in self._get_port_alias_set(device).union(self._get_port_name_set(device)) for port in ports]): + # If any port of a device is neither port name nor port alias, skip conversion for this device. + continue + if all([port in self._get_port_alias_set(device) for port in ports]): + # If all ports of a device are port alias, convert them to port name. + for link in links: + if device == link['StartDevice']: + link['StartPort'] = self._convert_port_alias_to_name(device, link['StartPort']) + elif device == link['EndDevice']: + link['EndPort'] = self._convert_port_alias_to_name(device, link['EndPort']) + elif not all([port in self._get_port_name_set(device) for port in ports]): + # If some ports use port name and others use port alias, raise an Exception. + raise Exception("[Failed] For device {}, please check {} and ensure all ports use port name, " + "or ensure all ports use port alias.".format(device, self.linkcsv)) + + # Generate DeviceInterfaceLink XML nodes for connection graph + links_root = etree.SubElement(self.pngroot, 'DeviceInterfaceLinks') + for link in self.links: + attrs = {} + for key in link: + if key.lower() != 'vlanid' and key.lower() != 'vlanmode': + attrs[key] = link[key].decode('utf-8') + etree.SubElement(links_root, 'DeviceInterfaceLink', attrs) + def read_consolelinks(self): if not os.path.exists(self.conscsv): return @@ -86,10 +188,23 @@ def read_consolelinks(self): for cons in csv_cons: attrs = {} for key in cons: - attrs[key]=cons[key].decode('utf-8') + attrs[key] = cons[key].decode('utf-8') etree.SubElement(conslinks_root, 'ConsoleLinkInfo', attrs) self.consoles.append(cons) + def read_bmclinks(self): + if not os.path.exists(self.bmccsv): + return + with open(self.bmccsv) as csv_file: + csv_bmc = csv.DictReader(csv_file) + bmclinks_root = etree.SubElement(self.bmcgroot, 'BmcLinksInfo') + for bmc in csv_bmc: + attrs = {} + for key in bmc: + attrs[key] = bmc[key].decode('utf-8') + etree.SubElement(bmclinks_root, 'BmcLinkInfo', attrs) + self.bmcs.append(bmc) + def read_pdulinks(self): if not os.path.exists(self.pducsv): return @@ -99,15 +214,14 @@ def read_pdulinks(self): for pdu_link in csv_pdus: attrs = {} for key in pdu_link: - attrs[key]=pdu_link[key].decode('utf-8') + attrs[key] = pdu_link[key].decode('utf-8') etree.SubElement(pduslinks_root, 'PowerControlLinkInfo', attrs) self.pdus.append(pdu_link) def generate_dpg(self): - for dev in self.devices: - hostname = dev.get('Hostname', '') - managementip = dev.get('ManagementIp', '') - devtype = dev['Type'].lower() + for hostname in self.devices: + managementip = self.devices[hostname].get('ManagementIp', '') + devtype = self.devices[hostname]['Type'].lower() if not hostname: continue if devtype in ('server', 'devsonic'): @@ -150,20 +264,24 @@ def create_xml(self): root.append(self.pngroot) root.append(self.dpgroot) root.append(self.csgroot) + root.append(self.bmcgroot) root.append(self.pcgroot) result = etree.tostring(root, pretty_print=True) onexml.write(result) + def get_file_names(args): if not args.inventory: - device, links, console, pdu = args.device, args.links, args.console, args.pdu + device, links, console, bmc, pdu = args.device, args.links, args.console, args.bmc, args.pdu else: device = 'sonic_{}_devices.csv'.format(args.inventory) links = 'sonic_{}_links.csv'.format(args.inventory) console = 'sonic_{}_console_links.csv'.format(args.inventory) + bmc = 'sonic_{}_bmc_links.csv'.format(args.inventory) pdu = 'sonic_{}_pdu_links.csv'.format(args.inventory) - return device, links, console, pdu + return device, links, console, bmc, pdu + def main(): @@ -171,17 +289,19 @@ def main(): parser.add_argument("-d", "--device", help="device file [deprecate warning: use -i instead]", default=DEFAULT_DEVICECSV) parser.add_argument("-l", "--links", help="link file [deprecate warning: use -i instead]", default=DEFAULT_LINKCSV) parser.add_argument("-c", "--console", help="console connection file [deprecate warning: use -i instead]", default=DEFAULT_CONSOLECSV) + parser.add_argument("-b", "--bmc", help="bmc connection file [deprecate warning: use -i instead]", default=DEFAULT_BMCCSV) parser.add_argument("-p", "--pdu", help="pdu connection file [deprecate warning: use -i instead]", default=DEFAULT_PDUCSV) parser.add_argument("-i", "--inventory", help="specify inventory namei to generate device/link/console/pdu file names, default none", default=None) parser.add_argument("-o", "--output", help="output xml file", required=True) args = parser.parse_args() - device, links, console, pdu = get_file_names(args) - mygraph = LabGraph(device, links, console, pdu, args.output) + device, links, console, bmc, pdu = get_file_names(args) + mygraph = LabGraph(device, links, console, bmc, pdu, args.output) mygraph.read_devices() mygraph.read_links() mygraph.read_consolelinks() + mygraph.read_bmclinks() mygraph.read_pdulinks() mygraph.generate_dpg() mygraph.create_xml() diff --git a/ansible/files/lab_connection_graph.xml b/ansible/files/lab_connection_graph.xml index 014ae1f539b..73d62cfe775 100644 --- a/ansible/files/lab_connection_graph.xml +++ b/ansible/files/lab_connection_graph.xml @@ -1,56 +1,59 @@ - - - - + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + + + + - + @@ -85,31 +88,39 @@ - + - + - - - - + + + - - - + + + + + + + + + + + + - - + + diff --git a/ansible/files/sonic_lab_bmc_links.csv b/ansible/files/sonic_lab_bmc_links.csv new file mode 100644 index 00000000000..447aeb8bf87 --- /dev/null +++ b/ansible/files/sonic_lab_bmc_links.csv @@ -0,0 +1,2 @@ +StartDevice,StartPort,BmcIp,EndDevice,EndPort +management-1,Ethernet10,192.168.11.1/23,str-acs-serv-01,iDRAC diff --git a/ansible/files/sonic_lab_console_links.csv b/ansible/files/sonic_lab_console_links.csv index 55778ee34c9..390abebc905 100644 --- a/ansible/files/sonic_lab_console_links.csv +++ b/ansible/files/sonic_lab_console_links.csv @@ -1,4 +1,5 @@ -StartDevice,StartPort,EndDevice,Console_type,Proxy -console-1,10,str-msn2700-01,ssh,root -console-1,11,str-7260-10,ssh,root -console-1,12,str-7260-11,ssh,root \ No newline at end of file +StartDevice,StartPort,EndDevice,Console_type,Proxy,BaudRate +console-1,10,str-msn2700-01,ssh,root,9600 +console-2,11,str-7260-10,ssh,root,9600 +console-1,12,str-7260-11,ssh,root, +management-1,13,str-acs-serv-01,ssh,root,9600 diff --git a/ansible/files/sonic_lab_devices.csv b/ansible/files/sonic_lab_devices.csv index 8b056828b08..a4c8c2dadd5 100644 --- a/ansible/files/sonic_lab_devices.csv +++ b/ansible/files/sonic_lab_devices.csv @@ -1,8 +1,10 @@ -Hostname,ManagementIp,HwSku,Type,Protocol -dev-msn2700-01,10.251.0.188/23,Mellanox-2700,DevSonic, -dev-7260-10,10.251.0.13/23,Arista-7260QX-64,FanoutLeaf, -dev-7260-11,10.251.0.234/23,Arista-7260QX-64,FanoutRoot, -dev-acs-serv-01,10.251.0.245/23,TestServ,Server, -pdu-1,192.168.9.2,Apc,Pdu,snmp -pdu-2,192.168.9.3,Sentry,Pdu,snmp -console-1,192.168.10.1,Cisco,ConsoleServer,ssh +Hostname,ManagementIp,HwSku,Type,Protocol,Os +str-msn2700-01,10.251.0.188/23,Mellanox-2700,DevSonic,,sonic +str-7260-10,10.251.0.13/23,Arista-7260QX-64,FanoutLeaf,,sonic +str-7260-11,10.251.0.234/23,Arista-7260QX-64,FanoutRoot,,eos +str-acs-serv-01,10.251.0.245/23,TestServ,Server,,ubuntu +pdu-1,192.168.9.2,Apc,Pdu,snmp, +pdu-2,192.168.9.3,Sentry,Pdu,snmp, +console-1,192.168.10.1/23,Cisco,ConsoleServer,ssh,sonic +console-2,192.168.10.2/23,Sonic,ConsoleServer,ssh,cisco +management-1,192.168.10.3/23,Sonic,MgmtTsToRRouter,,sonic diff --git a/ansible/files/sonic_lab_links.csv b/ansible/files/sonic_lab_links.csv index 2bcbb6ac51f..f82e968740f 100644 --- a/ansible/files/sonic_lab_links.csv +++ b/ansible/files/sonic_lab_links.csv @@ -1,35 +1,35 @@ StartDevice,StartPort,EndDevice,EndPort,BandWidth,VlanID,VlanMode -dev-msn2700-01,Ethernet0,dev-7260-10,Ethernet1,40000,1681,Access -dev-msn2700-01,Ethernet4,dev-7260-10,Ethernet2,40000,1682,Access -dev-msn2700-01,Ethernet8,dev-7260-10,Ethernet3,40000,1683,Access -dev-msn2700-01,Ethernet12,dev-7260-10,Ethernet4,40000,1684,Access -dev-msn2700-01,Ethernet16,dev-7260-10,Ethernet5,40000,1685,Access -dev-msn2700-01,Ethernet20,dev-7260-10,Ethernet6,40000,1686,Access -dev-msn2700-01,Ethernet24,dev-7260-10,Ethernet7,40000,1687,Access -dev-msn2700-01,Ethernet28,dev-7260-10,Ethernet8,40000,1688,Access -dev-msn2700-01,Ethernet32,dev-7260-10,Ethernet9,40000,1689,Access -dev-msn2700-01,Ethernet36,dev-7260-10,Ethernet10,40000,1690,Access -dev-msn2700-01,Ethernet40,dev-7260-10,Ethernet11,40000,1691,Access -dev-msn2700-01,Ethernet44,dev-7260-10,Ethernet12,40000,1692,Access -dev-msn2700-01,Ethernet48,dev-7260-10,Ethernet13,40000,1693,Access -dev-msn2700-01,Ethernet52,dev-7260-10,Ethernet14,40000,1694,Access -dev-msn2700-01,Ethernet56,dev-7260-10,Ethernet15,40000,1695,Access -dev-msn2700-01,Ethernet60,dev-7260-10,Ethernet16,40000,1696,Access -dev-msn2700-01,Ethernet64,dev-7260-10,Ethernet17,40000,1697,Access -dev-msn2700-01,Ethernet68,dev-7260-10,Ethernet18,40000,1698,Access -dev-msn2700-01,Ethernet72,dev-7260-10,Ethernet19,40000,1699,Access -dev-msn2700-01,Ethernet76,dev-7260-10,Ethernet20,40000,1700,Access -dev-msn2700-01,Ethernet80,dev-7260-10,Ethernet21,40000,1701,Access -dev-msn2700-01,Ethernet84,dev-7260-10,Ethernet22,40000,1702,Access -dev-msn2700-01,Ethernet88,dev-7260-10,Ethernet23,40000,1703,Access -dev-msn2700-01,Ethernet92,dev-7260-10,Ethernet24,40000,1704,Access -dev-msn2700-01,Ethernet96,dev-7260-10,Ethernet25,40000,1705,Access -dev-msn2700-01,Ethernet100,dev-7260-10,Ethernet26,40000,1706,Access -dev-msn2700-01,Ethernet104,dev-7260-10,Ethernet27,40000,1707,Access -dev-msn2700-01,Ethernet108,dev-7260-10,Ethernet28,40000,1708,Access -dev-msn2700-01,Ethernet112,dev-7260-10,Ethernet29,40000,1709,Access -dev-msn2700-01,Ethernet116,dev-7260-10,Ethernet30,40000,1710,Access -dev-msn2700-01,Ethernet120,dev-7260-10,Ethernet31,40000,1711,Access -dev-msn2700-01,Ethernet124,dev-7260-10,Ethernet32,40000,1712,Access -dev-7260-11,Ethernet19,dev-acs-serv-01,p4p1,40000,,Trunk -dev-7260-11,Ethernet30,dev-7260-10,Ethernet64,40000,1681-1712,Trunk +str-msn2700-01,Ethernet0,str-7260-10,Ethernet1,40000,1681,Access +str-msn2700-01,Ethernet4,str-7260-10,Ethernet2,40000,1682,Access +str-msn2700-01,Ethernet8,str-7260-10,Ethernet3,40000,1683,Access +str-msn2700-01,Ethernet12,str-7260-10,Ethernet4,40000,1684,Access +str-msn2700-01,Ethernet16,str-7260-10,Ethernet5,40000,1685,Access +str-msn2700-01,Ethernet20,str-7260-10,Ethernet6,40000,1686,Access +str-msn2700-01,Ethernet24,str-7260-10,Ethernet7,40000,1687,Access +str-msn2700-01,Ethernet28,str-7260-10,Ethernet8,40000,1688,Access +str-msn2700-01,Ethernet32,str-7260-10,Ethernet9,40000,1689,Access +str-msn2700-01,Ethernet36,str-7260-10,Ethernet10,40000,1690,Access +str-msn2700-01,Ethernet40,str-7260-10,Ethernet11,40000,1691,Access +str-msn2700-01,Ethernet44,str-7260-10,Ethernet12,40000,1692,Access +str-msn2700-01,Ethernet48,str-7260-10,Ethernet13,40000,1693,Access +str-msn2700-01,Ethernet52,str-7260-10,Ethernet14,40000,1694,Access +str-msn2700-01,Ethernet56,str-7260-10,Ethernet15,40000,1695,Access +str-msn2700-01,Ethernet60,str-7260-10,Ethernet16,40000,1696,Access +str-msn2700-01,Ethernet64,str-7260-10,Ethernet17,40000,1697,Access +str-msn2700-01,Ethernet68,str-7260-10,Ethernet18,40000,1698,Access +str-msn2700-01,Ethernet72,str-7260-10,Ethernet19,40000,1699,Access +str-msn2700-01,Ethernet76,str-7260-10,Ethernet20,40000,1700,Access +str-msn2700-01,Ethernet80,str-7260-10,Ethernet21,40000,1701,Access +str-msn2700-01,Ethernet84,str-7260-10,Ethernet22,40000,1702,Access +str-msn2700-01,Ethernet88,str-7260-10,Ethernet23,40000,1703,Access +str-msn2700-01,Ethernet92,str-7260-10,Ethernet24,40000,1704,Access +str-msn2700-01,Ethernet96,str-7260-10,Ethernet25,40000,1705,Access +str-msn2700-01,Ethernet100,str-7260-10,Ethernet26,40000,1706,Access +str-msn2700-01,Ethernet104,str-7260-10,Ethernet27,40000,1707,Access +str-msn2700-01,Ethernet108,str-7260-10,Ethernet28,40000,1708,Access +str-msn2700-01,Ethernet112,str-7260-10,Ethernet29,40000,1709,Access +str-msn2700-01,Ethernet116,str-7260-10,Ethernet30,40000,1710,Access +str-msn2700-01,Ethernet120,str-7260-10,Ethernet31,40000,1711,Access +str-msn2700-01,Ethernet124,str-7260-10,Ethernet32,40000,1712,Access +str-7260-11,Ethernet19,str-acs-serv-01,p4p1,40000,,Trunk +str-7260-11,Ethernet30,str-7260-10,Ethernet64,40000,1681-1712,Trunk diff --git a/ansible/group_vars/all/ceos.yml b/ansible/group_vars/all/ceos.yml index 1bbc1c7d03c..61559a09f8f 100644 --- a/ansible/group_vars/all/ceos.yml +++ b/ansible/group_vars/all/ceos.yml @@ -3,7 +3,10 @@ #ceos_image_filename: cEOS64-lab-4.23.2F.tar.xz #ceos_image_orig: ceosimage:4.23.2F #ceos_image: ceosimage:4.23.2F-1 -ceos_image_filename: cEOS64-lab-4.25.5.1M.tar -ceos_image_orig: ceosimage:4.25.5.1M -ceos_image: ceosimage:4.25.5.1M-1 +#ceos_image_filename: cEOS64-lab-4.25.5.1M.tar +#ceos_image_orig: ceosimage:4.25.5.1M +#ceos_image: ceosimage:4.25.5.1M-1 +ceos_image_filename: cEOS64-lab-4.29.3M.tar +ceos_image_orig: ceosimage:4.29.3M +ceos_image: ceosimage:4.29.3M-1 skip_ceos_image_downloading: false diff --git a/ansible/group_vars/fanout/secrets.yml b/ansible/group_vars/fanout/secrets.yml index 735cd95babc..f1218d53c5d 100644 --- a/ansible/group_vars/fanout/secrets.yml +++ b/ansible/group_vars/fanout/secrets.yml @@ -14,3 +14,6 @@ fanout_network_password: netpassword # Credential for accessing the Linux shell fanout_shell_user: shelladmin fanout_shell_password: shellpassword + +#fanout_tacacs_sonic_user: admin +#fanout_tacacs_sonic_password: password diff --git a/ansible/group_vars/ixia/creds.yml b/ansible/group_vars/ixia/creds.yml index 13759d235b0..e7c2f91739e 100644 --- a/ansible/group_vars/ixia/creds.yml +++ b/ansible/group_vars/ixia/creds.yml @@ -4,3 +4,8 @@ ixia_api_server: rest_port: 443 session_id: none +snappi_api_server: + user: admin + password: admin + rest_port: 443 + session_id: none diff --git a/ansible/group_vars/lab/lab.yml b/ansible/group_vars/lab/lab.yml index 49027256cb9..c02f6970ed8 100644 --- a/ansible/group_vars/lab/lab.yml +++ b/ansible/group_vars/lab/lab.yml @@ -12,7 +12,7 @@ syslog_servers: ['10.0.0.5', '10.0.0.6'] dns_servers: ['10.0.0.5', '10.0.0.6'] # forced_mgmt_routes -forced_mgmt_routes: ['172.17.0.1'] +forced_mgmt_routes: ['172.17.0.1/24'] # ErspanDestinationIpv4 erspan_dest: ['10.0.0.7'] @@ -21,13 +21,17 @@ radius_servers: [] radius_passkey: testing123 -tacacs_servers: ['10.0.0.9', '10.0.0.8'] - +# It can be a real lab tacacs server. +tacacs_servers: ['172.17.0.6'] tacacs_passkey: testing123 # tacacs grous tacacs_group: 'testlab' +# Determine whether enable tacacs authentication during deploy-minigraph. If false, use local authentication. +# If yes, authenticate using servers configured in `tacacs_servers` +tacacs_enabled_by_default: false + # snmp servers snmp_servers: ['10.0.0.9'] diff --git a/ansible/group_vars/sonic/sku-sensors-data.yml b/ansible/group_vars/sonic/sku-sensors-data.yml index 608f324d006..7caef6d52b3 100644 --- a/ansible/group_vars/sonic/sku-sensors-data.yml +++ b/ansible/group_vars/sonic/sku-sensors-data.yml @@ -2288,176 +2288,6 @@ sensors_checks: - mp2975-i2c-5-6e/PMIC-5 PSU 12V Rail Curr (in1)/curr1_alarm - mp2975-i2c-5-6e/PMIC-5 PSU 12V Rail Pwr (in1)/power1_alarm - x86_64-arista_7050_qx32: - alarms: - fan: [] - power: - - dps460-i2c-8-58/vin/in1_min_alarm - - dps460-i2c-8-58/vin/in1_max_alarm - - dps460-i2c-8-58/vin/in1_lcrit_alarm - - dps460-i2c-8-58/vin/in1_crit_alarm - - dps460-i2c-8-58/vout1/in3_min_alarm - - dps460-i2c-8-58/vout1/in3_max_alarm - - dps460-i2c-8-58/vout1/in3_lcrit_alarm - - dps460-i2c-8-58/vout1/in3_crit_alarm - - dps460-i2c-8-58/iin/curr1_max_alarm - - dps460-i2c-8-58/iin/curr1_crit_alarm - - dps460-i2c-8-58/iout1/curr2_max_alarm - - dps460-i2c-8-58/iout1/curr2_lcrit_alarm - - dps460-i2c-8-58/iout1/curr2_crit_alarm - - dps460-i2c-9-58/vin/in1_min_alarm - - dps460-i2c-9-58/vin/in1_max_alarm - - dps460-i2c-9-58/vin/in1_lcrit_alarm - - dps460-i2c-9-58/vin/in1_crit_alarm - - dps460-i2c-9-58/vout1/in3_min_alarm - - dps460-i2c-9-58/vout1/in3_max_alarm - - dps460-i2c-9-58/vout1/in3_lcrit_alarm - - dps460-i2c-9-58/vout1/in3_crit_alarm - - dps460-i2c-9-58/iin/curr1_max_alarm - - dps460-i2c-9-58/iin/curr1_crit_alarm - - dps460-i2c-9-58/iout1/curr2_max_alarm - - dps460-i2c-9-58/iout1/curr2_lcrit_alarm - - dps460-i2c-9-58/iout1/curr2_crit_alarm - - temp: - - dps460-i2c-8-58/Power supply 1 inlet temp sensor/temp1_max_alarm - - dps460-i2c-8-58/Power supply 1 inlet temp sensor/temp1_min_alarm - - dps460-i2c-8-58/Power supply 1 inlet temp sensor/temp1_crit_alarm - - dps460-i2c-8-58/Power supply 1 inlet temp sensor/temp1_lcrit_alarm - - dps460-i2c-8-58/Power supply 1 internal sensor/temp2_max_alarm - - dps460-i2c-8-58/Power supply 1 internal sensor/temp2_min_alarm - - dps460-i2c-8-58/Power supply 1 internal sensor/temp2_crit_alarm - - dps460-i2c-8-58/Power supply 1 internal sensor/temp2_lcrit_alarm - - dps460-i2c-9-58/Power supply 2 inlet temp sensor/temp1_max_alarm - - dps460-i2c-9-58/Power supply 2 inlet temp sensor/temp1_min_alarm - - dps460-i2c-9-58/Power supply 2 inlet temp sensor/temp1_crit_alarm - - dps460-i2c-9-58/Power supply 2 inlet temp sensor/temp1_lcrit_alarm - - dps460-i2c-9-58/Power supply 2 internal sensor/temp2_max_alarm - - dps460-i2c-9-58/Power supply 2 internal sensor/temp2_min_alarm - - dps460-i2c-9-58/Power supply 2 internal sensor/temp2_crit_alarm - - dps460-i2c-9-58/Power supply 2 internal sensor/temp2_lcrit_alarm - - compares: - fan: [] - power: [] - temp: - - - k10temp-pci-00c3/Cpu temp sensor/temp1_input - - k10temp-pci-00c3/Cpu temp sensor/temp1_max - - - lm73-i2c-6-48/Back panel temp sensor/temp1_input - - lm73-i2c-6-48/Back panel temp sensor/temp1_max - - - max6658-i2c-5-4c/Board temp sensor/temp1_input - - max6658-i2c-5-4c/Board temp sensor/temp1_max - - - max6658-i2c-5-4c/Front panel temp sensor/temp2_input - - max6658-i2c-5-4c/Front panel temp sensor/temp2_max - - - dps460-i2c-8-58/Power supply 1 inlet temp sensor/temp1_input - - dps460-i2c-8-58/Power supply 1 inlet temp sensor/temp1_max - - - dps460-i2c-8-58/Power supply 1 internal sensor/temp2_input - - dps460-i2c-8-58/Power supply 1 internal sensor/temp2_max - - - dps460-i2c-9-58/Power supply 2 inlet temp sensor/temp1_input - - dps460-i2c-9-58/Power supply 2 inlet temp sensor/temp1_max - - - dps460-i2c-9-58/Power supply 2 internal sensor/temp2_input - - dps460-i2c-9-58/Power supply 2 internal sensor/temp2_max - - non_zero: - fan: - - dps460-i2c-8-58/fan1/fan1_input - - dps460-i2c-9-58/fan1/fan1_input - power: [] - temp: [] - - psu_skips: {} - sensor_skip_per_version: {} - - x86_64-arista_7260cx3_64: - alarms: - fan: - - pmbus-i2c-3-58/fan1/fan1_alarm - - pmbus-i2c-4-58/fan1/fan1_alarm - - pmbus-i2c-3-58/fan1/fan1_fault - - pmbus-i2c-4-58/fan1/fan1_fault - - la_cpld-i2c-85-60/fan1/fan1_fault - - la_cpld-i2c-85-60/fan2/fan2_fault - - la_cpld-i2c-85-60/fan3/fan3_fault - - la_cpld-i2c-85-60/fan4/fan4_fault - power: - - pmbus-i2c-3-58/iin/curr1_max_alarm - - pmbus-i2c-3-58/iout1/curr2_max_alarm - - pmbus-i2c-3-58/iout1/curr2_crit_alarm - - pmbus-i2c-3-58/iout2/curr3_crit_alarm - - pmbus-i2c-3-58/vin/in1_alarm - - pmbus-i2c-3-58/vout1/in2_lcrit_alarm - - pmbus-i2c-3-58/vout1/in2_crit_alarm - - pmbus-i2c-4-58/iin/curr1_max_alarm - - pmbus-i2c-4-58/iout1/curr2_max_alarm - - pmbus-i2c-4-58/iout1/curr2_crit_alarm - - pmbus-i2c-4-58/iout2/curr3_crit_alarm - - pmbus-i2c-4-58/vin/in1_alarm - - pmbus-i2c-4-58/vout1/in2_lcrit_alarm - - pmbus-i2c-4-58/vout1/in2_crit_alarm - temp: - - coretemp-isa-0000/Package id 0/temp1_crit_alarm - - coretemp-isa-0000/Core 0/temp2_crit_alarm - - coretemp-isa-0000/Core 1/temp3_crit_alarm - - lm73-i2c-88-48/Front panel temp sensor/temp1_min_alarm - - lm73-i2c-88-48/Front panel temp sensor/temp1_max_alarm - - max6658-i2c-1-4c/Asic temp sensor/temp1_min_alarm - - max6658-i2c-1-4c/Asic temp sensor/temp1_max_alarm - - max6658-i2c-1-4c/Asic temp sensor/temp1_crit_alarm - - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_min_alarm - - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_max_alarm - - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_crit_alarm - - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_min_alarm - - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_max_alarm - - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_crit_alarm - - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_fault - - pmbus-i2c-3-58/Power supply 1 exhaust temp sensor/temp3_alarm - - pmbus-i2c-3-58/Power supply 1 inlet temp sensor/temp2_alarm - - pmbus-i2c-3-58/Power supply 1 hotspot sensor/temp1_alarm - - pmbus-i2c-4-58/Power supply 2 exhaust temp sensor/temp3_alarm - - pmbus-i2c-4-58/Power supply 2 inlet temp sensor/temp2_alarm - - pmbus-i2c-4-58/Power supply 2 hotspot sensor/temp1_alarm - - compares: - fan: [] - power: - - - pmbus-i2c-3-58/iin/curr1_input - - pmbus-i2c-3-58/iin/curr1_max - - - pmbus-i2c-3-58/iout1/curr2_input - - pmbus-i2c-3-58/iout1/curr2_max - - - pmbus-i2c-4-58/iin/curr1_input - - pmbus-i2c-4-58/iin/curr1_max - - - pmbus-i2c-4-58/iout1/curr2_input - - pmbus-i2c-4-58/iout1/curr2_max - temp: - - - coretemp-isa-0000/Package id 0/temp1_input - - coretemp-isa-0000/Package id 0/temp1_max - - - coretemp-isa-0000/Core 0/temp2_input - - coretemp-isa-0000/Core 0/temp2_max - - - coretemp-isa-0000/Core 1/temp3_input - - coretemp-isa-0000/Core 1/temp3_max - - - lm73-i2c-88-48/Front panel temp sensor/temp1_input - - lm73-i2c-88-48/Front panel temp sensor/temp1_max - - - max6658-i2c-1-4c/Asic temp sensor/temp1_input - - max6658-i2c-1-4c/Asic temp sensor/temp1_max - - - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_input - - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_max - - - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_input - - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_max - - non_zero: - fan: - - pmbus-i2c-3-58/fan1/fan1_input - - pmbus-i2c-4-58/fan1/fan1_input - - la_cpld-i2c-85-60/fan1/fan1_input - - la_cpld-i2c-85-60/fan2/fan2_input - - la_cpld-i2c-85-60/fan3/fan3_input - - la_cpld-i2c-85-60/fan4/fan4_input - power: [] - temp: [] - - psu_skips: {} - sensor_skip_per_version: {} - x86_64-ingrasys_s9100-r0: alarms: fan: @@ -2813,76 +2643,6 @@ sensors_checks: psu_skips: {} sensor_skip_per_version: {} - x86_64-arista_7060_cx32s: - alarms: - fan: - - pmbus-i2c-5-58/fan1/fan1_alarm - - pmbus-i2c-6-58/fan1/fan1_alarm - - pmbus-i2c-5-58/fan1/fan1_fault - - pmbus-i2c-6-58/fan1/fan1_fault - power: - - pmbus-i2c-5-58/iin/curr1_max_alarm - - pmbus-i2c-5-58/iout1/curr2_max_alarm - - pmbus-i2c-5-58/iout1/curr2_crit_alarm - - pmbus-i2c-5-58/iout2/curr3_crit_alarm - - pmbus-i2c-5-58/vin/in1_alarm - - pmbus-i2c-5-58/vout1/in2_crit_alarm - - pmbus-i2c-5-58/vout1/in2_lcrit_alarm - - pmbus-i2c-6-58/iin/curr1_max_alarm - - pmbus-i2c-6-58/iout1/curr2_max_alarm - - pmbus-i2c-6-58/iout1/curr2_crit_alarm - - pmbus-i2c-6-58/iout2/curr3_crit_alarm - - pmbus-i2c-6-58/vin/in1_alarm - - pmbus-i2c-6-58/vout1/in2_crit_alarm - - pmbus-i2c-6-58/vout1/in2_lcrit_alarm - temp: - - max6658-i2c-3-4c/Cpu board temp sensor/temp1_min_alarm - - max6658-i2c-3-4c/Cpu board temp sensor/temp1_max_alarm - - max6658-i2c-3-4c/Cpu board temp sensor/temp1_crit_alarm - - max6658-i2c-3-4c/Back panel temp sensor/temp2_min_alarm - - max6658-i2c-3-4c/Back panel temp sensor/temp2_max_alarm - - max6658-i2c-3-4c/Back panel temp sensor/temp2_crit_alarm - - max6658-i2c-3-4c/Back panel temp sensor/temp2_fault - - pmbus-i2c-5-58/Power supply 2 hotspot sensor/temp1_alarm - - pmbus-i2c-5-58/Power supply 2 inlet temp sensor/temp2_alarm - - pmbus-i2c-5-58/Power supply 2 exhaust temp sensor/temp3_alarm - - pmbus-i2c-6-58/Power supply 1 hotspot sensor/temp1_alarm - - pmbus-i2c-6-58/Power supply 1 inlet temp sensor/temp2_alarm - - pmbus-i2c-6-58/Power supply 1 exhaust temp sensor/temp3_alarm - - compares: - fan: [] - power: - - - pmbus-i2c-5-58/iin/curr1_input - - pmbus-i2c-5-58/iin/curr1_max - - - pmbus-i2c-5-58/iout1/curr2_input - - pmbus-i2c-5-58/iout1/curr2_max - - - pmbus-i2c-6-58/iin/curr1_input - - pmbus-i2c-6-58/iin/curr1_max - - - pmbus-i2c-6-58/iout1/curr2_input - - pmbus-i2c-6-58/iout1/curr2_max - temp: - - - k10temp-pci-00c3/Cpu temp sensor/temp1_input - - k10temp-pci-00c3/Cpu temp sensor/temp1_max - - - max6658-i2c-3-4c/Cpu board temp sensor/temp1_input - - max6658-i2c-3-4c/Cpu board temp sensor/temp1_max - - - max6658-i2c-3-4c/Back panel temp sensor/temp2_input - - max6658-i2c-3-4c/Back panel temp sensor/temp2_max - - non_zero: - fan: - - pmbus-i2c-5-58/fan1/fan1_input - - pmbus-i2c-6-58/fan1/fan1_input - - crow_cpld-i2c-3-60/fan1/fan1_input - - crow_cpld-i2c-3-60/fan2/fan2_input - - crow_cpld-i2c-3-60/fan3/fan3_input - - crow_cpld-i2c-3-60/fan4/fan4_input - power: [] - temp: [] - - psu_skips: {} - sensor_skip_per_version: {} - x86_64-accton_as7712_32x-r0: alarms: fan: @@ -3137,93 +2897,6 @@ sensors_checks: psu_skips: {} sensor_skip_per_version: {} - x86_64-arista_7170_64c: - alarms: - fan: - # to specify regular expression use backslash '\' at the beginning and end of expression - # Platform has two chip names: - # dps1900-i2c-X-58 for Sonic 201911 - # pmbus-i2c-X-58 for Sonic master - - \[a-zA-Z0-9]*\-i2c-6-58\/fan1/fan1_alarm - - \[a-zA-Z0-9]*\-i2c-7-58/fan1/fan1_alarm - - la_cpld-i2c-93-60/fan1/fan1_fault - - la_cpld-i2c-93-60/fan2/fan2_fault - - la_cpld-i2c-93-60/fan3/fan3_fault - - la_cpld-i2c-93-60/fan4/fan4_fault - power: - - \[a-zA-Z0-9]*\-i2c-6-58/iin/curr1_max_alarm - - \[a-zA-Z0-9]*\-i2c-6-58/iout1/curr2_crit_alarm - - \[a-zA-Z0-9]*\-i2c-6-58/iout1/curr2_max_alarm - - \[a-zA-Z0-9]*\-i2c-6-58/vin/in1_alarm - - \[a-zA-Z0-9]*\-i2c-6-58/vout1/in2_crit_alarm - - \[a-zA-Z0-9]*\-i2c-6-58/vout1/in2_lcrit_alarm - - \[a-zA-Z0-9]*\-i2c-7-58/iin/curr1_max_alarm - - \[a-zA-Z0-9]*\-i2c-7-58/iout1/curr2_crit_alarm - - \[a-zA-Z0-9]*\-i2c-7-58/iout1/curr2_max_alarm - - \[a-zA-Z0-9]*\-i2c-7-58/vin/in1_alarm - - \[a-zA-Z0-9]*\-i2c-7-58/vout1/in2_crit_alarm - - \[a-zA-Z0-9]*\-i2c-7-58/vout1/in2_lcrit_alarm - temp: - # to specify regular expression use backslash '\' at the beginning and end of expression - - coretemp-isa-0000/\P[a-z]*\ id 0/temp1_crit_alarm - - coretemp-isa-0000/Core 0/temp2_crit_alarm - - coretemp-isa-0000/Core 1/temp3_crit_alarm - - \[a-zA-Z0-9]*\-i2c-6-58/PSU1 primary hotspot temp/temp1_alarm - - \[a-zA-Z0-9]*\-i2c-6-58/PSU1 inlet temp/temp2_alarm - - \[a-zA-Z0-9]*\-i2c-7-58/PSU2 primary hotspot temp/temp1_alarm - - \[a-zA-Z0-9]*\-i2c-7-58/PSU2 inlet temp/temp2_alarm - - lm73-i2c-96-48/Front air temp/temp1_max_alarm - - lm73-i2c-96-48/Front air temp/temp1_min_alarm - - max6658-i2c-8-4c/Temp sensor near ASIC/temp1_crit_alarm - - max6658-i2c-8-4c/Temp sensor near ASIC/temp1_max_alarm - - max6658-i2c-8-4c/Temp sensor near ASIC/temp1_min_alarm - - max6658-i2c-81-4c/Rear air temp1/temp1_crit_alarm - - max6658-i2c-81-4c/Rear air temp1/temp1_max_alarm - - max6658-i2c-81-4c/Rear air temp1/temp1_min_alarm - - max6658-i2c-81-4c/Rear air temp2/temp2_crit_alarm - - max6658-i2c-81-4c/Rear air temp2/temp2_max_alarm - - max6658-i2c-81-4c/Rear air temp2/temp2_min_alarm - - compares: - fan: [] - power: - - - \[a-zA-Z0-9]*\-i2c-6-58/iin/curr1_input - - \[a-zA-Z0-9]*\-i2c-6-58/iin/curr1_max - - - \[a-zA-Z0-9]*\-i2c-6-58/iout1/curr2_input - - \[a-zA-Z0-9]*\-i2c-6-58/iout1/curr2_max - - - \[a-zA-Z0-9]*\-i2c-7-58/iin/curr1_input - - \[a-zA-Z0-9]*\-i2c-7-58/iin/curr1_max - - - \[a-zA-Z0-9]*\-i2c-7-58/iout1/curr2_input - - \[a-zA-Z0-9]*\-i2c-7-58/iout1/curr2_max - temp: - # to specify regular expression use backslash '\' at the beginning and end of expression - - - coretemp-isa-0000/\P[a-z]*\ id 0/temp1_input - - coretemp-isa-0000/\P[a-z]*\ id 0/temp1_max - - - coretemp-isa-0000/Core 0/temp2_input - - coretemp-isa-0000/Core 0/temp2_max - - - coretemp-isa-0000/Core 1/temp3_input - - coretemp-isa-0000/Core 1/temp3_max - - - lm73-i2c-96-48/Front air temp/temp1_input - - lm73-i2c-96-48/Front air temp/temp1_max - - - max6658-i2c-8-4c/Temp sensor near ASIC/temp1_input - - max6658-i2c-8-4c/Temp sensor near ASIC/temp1_max - - - max6658-i2c-81-4c/Rear air temp1/temp1_input - - max6658-i2c-81-4c/Rear air temp1/temp1_max - - - max6658-i2c-81-4c/Rear air temp2/temp2_input - - max6658-i2c-81-4c/Rear air temp2/temp2_max - - non_zero: - fan: - - la_cpld-i2c-93-60/fan1/fan1_input - - la_cpld-i2c-93-60/fan2/fan2_input - - la_cpld-i2c-93-60/fan3/fan3_input - - la_cpld-i2c-93-60/fan4/fan4_input - power: [] - temp: [] - - psu_skips: {} - sensor_skip_per_version: {} - x86_64-cel_e1031-r0: alarms: fan: [] @@ -3250,90 +2923,6 @@ sensors_checks: psu_skips: {} sensor_skip_per_version: {} - x86_64-arista_7050_qx32s: - alarms: - fan: - - pmbus-i2c-5-58/fan1/fan1_alarm - - pmbus-i2c-6-58/fan1/fan1_alarm - - power: - - pmbus-i2c-5-58/iin/curr1_max_alarm - - pmbus-i2c-5-58/iout1/curr2_crit_alarm - - pmbus-i2c-5-58/iout1/curr2_max_alarm - - pmbus-i2c-5-58/iout2/curr3_crit_alarm - - pmbus-i2c-5-58/vin/in1_alarm - - pmbus-i2c-5-58/vout1/in2_crit_alarm - - pmbus-i2c-5-58/vout1/in2_lcrit_alarm - - temp: - - max6658-i2c-2-4c/Board temp sensor/temp1_max_alarm - - max6658-i2c-2-4c/Board temp sensor/temp1_min_alarm - - max6658-i2c-2-4c/Board temp sensor/temp1_crit_alarm - - max6658-i2c-2-4c/Front panel temp sensor/temp2_max_alarm - - max6658-i2c-2-4c/Front panel temp sensor/temp2_min_alarm - - max6658-i2c-2-4c/Front panel temp sensor/temp2_crit_alarm - - max6658-i2c-3-4c/Cpu board temp sensor/temp1_max_alarm - - max6658-i2c-3-4c/Cpu board temp sensor/temp1_min_alarm - - max6658-i2c-3-4c/Cpu board temp sensor/temp1_crit_alarm - - max6658-i2c-3-4c/Back panel temp sensor/temp2_max_alarm - - max6658-i2c-3-4c/Back panel temp sensor/temp2_min_alarm - - max6658-i2c-3-4c/Back panel temp sensor/temp2_crit_alarm - - pmbus-i2c-5-58/Power supply 2 hotspot sensor/temp1_alarm - - pmbus-i2c-5-58/Power supply 2 inlet temp sensor/temp2_alarm - - pmbus-i2c-5-58/Power supply 2 sensor/temp3_alarm - - compares: - fan: [] - power: - - - pmbus-i2c-5-58/iin/curr1_input - - pmbus-i2c-5-58/iin/curr1_max - - - pmbus-i2c-5-58/iout1/curr2_input - - pmbus-i2c-5-58/iout1/curr2_max - - temp: - - - k10temp-pci-00c3/Cpu temp sensor/temp1_input - - k10temp-pci-00c3/Cpu temp sensor/temp1_max - - - max6658-i2c-2-4c/Board temp sensor/temp1_input - - max6658-i2c-2-4c/Board temp sensor/temp1_max - - - max6658-i2c-2-4c/Front panel temp sensor/temp2_input - - max6658-i2c-2-4c/Front panel temp sensor/temp2_max - - - max6658-i2c-3-4c/Cpu board temp sensor/temp1_input - - max6658-i2c-3-4c/Cpu board temp sensor/temp1_max - - - max6658-i2c-3-4c/Back panel temp sensor/temp2_input - - max6658-i2c-3-4c/Back panel temp sensor/temp2_max - - non_zero: - fan: - - crow_cpld-i2c-3-60/fan1/fan1_input - - crow_cpld-i2c-3-60/fan2/fan2_input - - crow_cpld-i2c-3-60/fan3/fan3_input - - crow_cpld-i2c-3-60/fan4/fan4_input - - pmbus-i2c-5-58/fan1/fan1_input - - pmbus-i2c-6-58/fan1/fan1_input - - power: - - pmbus-i2c-5-58/iin/curr1_input - - pmbus-i2c-5-58/iout1/curr2_input - - pmbus-i2c-5-58/iout2/curr3_input - - pmbus-i2c-5-58/pin/power1_input - - pmbus-i2c-5-58/pout1/power2_input - - pmbus-i2c-5-58/pout2/power3_input - - pmbus-i2c-5-58/vin/in1_input - - pmbus-i2c-5-58/vout1/in2_input - - temp: - - k10temp-pci-00c3/Cpu temp sensor/temp1_input - - max6658-i2c-2-4c/Board temp sensor/temp1_input - - max6658-i2c-2-4c/Front panel temp sensor/temp2_input - - max6658-i2c-3-4c/Cpu board temp sensor/temp1_input - - max6658-i2c-3-4c/Back panel temp sensor/temp2_input - - pmbus-i2c-5-58/Power supply 2 hotspot sensor/temp1_input - - pmbus-i2c-5-58/Power supply 2 inlet temp sensor/temp2_input - - pmbus-i2c-5-58/Power supply 2 sensor/temp3_input - - psu_skips: {} - sensor_skip_per_version: {} - et6448m: alarms: fan: diff --git a/ansible/group_vars/sonic/variables b/ansible/group_vars/sonic/variables index dc9a69411fd..99f2986a69e 100644 --- a/ansible/group_vars/sonic/variables +++ b/ansible/group_vars/sonic/variables @@ -6,7 +6,7 @@ sonic_version: "v2" broadcom_hwskus: [ "Force10-S6000", "Accton-AS7712-32X", "Celestica-DX010-C32", "Seastone-DX010", "Celestica-E1031-T48S4"] -broadcom_td2_hwskus: ['Force10-S6000', 'Force10-S6000-Q24S32', 'Arista-7050-QX32', 'Arista-7050-QX-32S'] +broadcom_td2_hwskus: ['Force10-S6000', 'Force10-S6000-Q24S32', 'Arista-7050-QX32', 'Arista-7050-QX-32S', 'Arista-7050QX32S-Q32'] broadcom_td3_hwskus: ['Arista-7050CX3-32S-C32', 'Arista-7050CX3-32S-D48C8'] broadcom_th_hwskus: ['Force10-S6100', 'Arista-7060CX-32S-C32', 'Arista-7060CX-32S-C32-T1', 'Arista-7060CX-32S-D48C8', 'Celestica-DX010-C32', "Seastone-DX010" ] broadcom_th2_hwskus: ['Arista-7260CX3-D108C8', 'Arista-7260CX3-C64', 'Arista-7260CX3-Q64'] @@ -14,7 +14,7 @@ broadcom_th3_hwskus: ['DellEMC-Z9332f-M-O16C64', 'DellEMC-Z9332f-O32'] mellanox_spc1_hwskus: [ 'ACS-MSN2700', 'ACS-MSN2740', 'ACS-MSN2100', 'ACS-MSN2410', 'ACS-MSN2010', 'Mellanox-SN2700', 'Mellanox-SN2700-D48C8' ] mellanox_spc2_hwskus: [ 'ACS-MSN3700', 'ACS-MSN3700C', 'ACS-MSN3800', 'Mellanox-SN3800-D112C8' , 'ACS-MSN3420'] -mellanox_spc3_hwskus: [ 'ACS-MSN4700', 'ACS-MSN4600', 'ACS-MSN4600C', 'ACS-MSN4410', 'Mellanox-SN4600C-D112C8'] +mellanox_spc3_hwskus: [ 'ACS-MSN4700', 'ACS-MSN4600', 'ACS-MSN4600C', 'ACS-MSN4410', 'Mellanox-SN4600C-D112C8', 'Mellanox-SN4600C-C64'] mellanox_hwskus: "{{ mellanox_spc1_hwskus + mellanox_spc2_hwskus + mellanox_spc3_hwskus }}" cavium_hwskus: [ "AS7512", "XP-SIM" ] @@ -23,7 +23,8 @@ barefoot_hwskus: [ "montara", "mavericks", "Arista-7170-64C", "newport", "Arista marvell_hwskus: [ "et6448m" ] -cisco_hwskus: ["64x100Gb"] +cisco_hwskus: ["Cisco-8102-C64"] +cisco-8000_gb_hwskus: ["Cisco-8102-C64"] ## Note: ## Docker volumes should be list instead of dict. However, if we want to keep code DRY, we diff --git a/ansible/library/announce_routes.py b/ansible/library/announce_routes.py index 331df6ebbda..3ae7d3b8c59 100644 --- a/ansible/library/announce_routes.py +++ b/ansible/library/announce_routes.py @@ -5,10 +5,18 @@ import yaml import re import requests -import time +import ipaddress +import json +import sys +import socket from ansible.module_utils.basic import * +if sys.version_info.major == 3: + UNICODE_TYPE = str +else: + UNICODE_TYPE = unicode + DOCUMENTATION = ''' module: announce_routes short_description: announce routes to exabgp processes running in PTF container @@ -58,22 +66,48 @@ IPV4_BASE_PORT = 5000 IPV6_BASE_PORT = 6000 +# Describe default number of COLOs +COLO_NUMBER = 30 +# Describe default number of M0 devices in 1 colo +M0_NUMBER = 16 +# Describe default number of subnet in a M0 device +M0_SUBNET_NUMBER = 2 +# Describe default number of members in a M0 subnet +M0_SUBNET_SIZE = 64 +# Describe default number of MX device connected to M0 device +MX_NUMBER = 2 +# Describe default number of subnet in a MX device +MX_SUBNET_NUMBER = 2 +# Describe default number of subnet members +MX_SUBNET_SIZE = 64 +# Describe default start asn of MXs +MX_ASN_START = 68000 +# Describe default start asn of M0s +M0_ASN_START = 64600 +# Describe default IPv6 subnet prefix length of MX +MX_SUBNET_PREFIX_LEN_V6 = 64 +# Describe default IPv6 subnet prefix length of M0 +M0_SUBNET_PREFIX_LEN_V6 = 64 + def wait_for_http(host_ip, http_port, timeout=10): """Waits for HTTP server to open. Tries until timeout is reached and returns whether localhost received HTTP response""" started = False tries = 0 + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(2) while not started and tries < timeout: - if os.system("curl {}:{}".format(host_ip, http_port)) == 0: + try: + s.connect((host_ip, http_port)) started = True - tries += 1 - time.sleep(1) + except socket.error: + tries += 1 return started def get_topo_type(topo_name): - pattern = re.compile(r'^(t0|t1|ptf|fullmesh|dualtor|t2|mgmttor)') + pattern = re.compile(r'^(t0|t1|ptf|fullmesh|dualtor|t2|mgmttor|m0)') match = pattern.match(topo_name) if not match: return "unsupported" @@ -102,9 +136,19 @@ def change_routes(action, ptf_ip, port, routes): messages.append("{} route {} next-hop {}".format(action, prefix, nexthop)) wait_for_http(ptf_ip, port, timeout=60) url = "http://%s:%d" % (ptf_ip, port) - data = { "commands": ";".join(messages) } - r = requests.post(url, data=data, timeout=90) - assert r.status_code == 200 + data = {"commands": ";".join(messages)} + r = requests.post(url, data=data, timeout=90, proxies={"http": None, "https": None}) + if r.status_code != 200: + raise Exception( + "Change routes failed: url={}, data={}, r.status_code={}, r.reason={}, r.headers={}, r.text={}".format( + url, + json.dumps(data), + r.status_code, + r.reason, + r.headers, + r.text + ) + ) # AS path from Leaf router for T0 topology @@ -138,6 +182,105 @@ def get_uplink_router_as_path(uplink_router_type, spine_asn): return default_route_as_path +# Generate prefixs of route +def generate_prefix(subnet_size, ip_base, offset): + ip = get_new_ip(ip_base, offset) + prefixlen = (ip_base.max_prefixlen - int(math.log(subnet_size, 2))) + prefix = "{}/{}".format(ip, prefixlen) + + return prefix + + +def generate_m0_upstream_routes(nexthop, colo_number, m0_number, m0_subnet_number, m0_asn_start, mx_number, + mx_subnet_number, ip_base, m0_subnet_size, mx_subnet_size, mx_asn_start): + routes = [] + + # Generate default route + routes.append(("0.0.0.0/0" if ip_base.version == 4 else "::/0", nexthop, None)) + + # Number of direct subnet members connected to a M0 device + m0_direct_subnet_member_count = m0_subnet_number * m0_subnet_size + # Number of MX subnet members connected to a M0 device + m0_mx_subnet_member_count = mx_number * mx_subnet_number * mx_subnet_size + # Total number of subnet members connected to a M0 device + m0_subnet_member_count = m0_direct_subnet_member_count + m0_mx_subnet_member_count + for colo in range(0, colo_number): + # Number of subnet members of colo that has been calculated + colo_subnet_member_offset = colo * m0_number * m0_subnet_member_count + for m0_index in range(0, m0_number): + # Skip M0 direct routes + if colo == 0 and m0_index == 0: + continue + + # Number of subnet members of M0 in current colo that has been caculated + m0_subnet_member_offset = m0_index * m0_subnet_member_count + curr_m0_asn = m0_asn_start + m0_index + prefix = None + for m0_subnet in range(0, m0_subnet_number): + # Number of subnet members of subnet in current M0 that has been caculated + subnet_member_offset = m0_subnet * m0_subnet_size + offset = colo_subnet_member_offset + m0_subnet_member_offset + subnet_member_offset + + prefix = generate_prefix(m0_subnet_size, ip_base, offset) + + aspath = "{}".format(curr_m0_asn) + routes.append((prefix, nexthop, aspath)) + + # Start ip of MX subnets + ip_base_mx = ip_base if prefix is None else get_next_ip_by_net(prefix) + # Number of subnet members connected to a MX device + mx_subnet_member_count = mx_subnet_number * mx_subnet_size + for mx in range(mx_number): + # Number of subnet members of MX that has been calculated + mx_subnet_member_offset = mx * mx_subnet_member_count + for mx_subnet in range(mx_subnet_number): + # Number of subnet members of subnet in current MX that has been calculated + subnet_member_offset = mx_subnet * mx_subnet_size + offset = mx_subnet_member_offset + subnet_member_offset + + prefix = generate_prefix(mx_subnet_size, ip_base_mx, offset) + curr_mx_asn = mx_asn_start + mx + aspath = "{} {}".format(curr_m0_asn, curr_mx_asn) + + routes.append((prefix, nexthop, aspath)) + + return routes + + +def generate_m0_downstream_routes(nexthop, mx_subnet_number, mx_subnet_size, m0_subnet_number, m0_subnet_size, ip_base, + mx_index): + routes = [] + + # Number of direct subnet members connected to a M0 device + m0_direct_subnet_member_count = m0_subnet_number * m0_subnet_size + # Number of subnet members connected to a MX device + mx_subnet_member_count = mx_subnet_number * mx_subnet_size + # Number of subnet members of MX that has been calculated + mx_subnet_member_offset = mx_index * mx_subnet_member_count + for subnet in range(0, mx_subnet_number): + # Not need after asn path of MX + # Number of subnet members of subnet in current MX that has been caculated + subnet_member_offset = subnet * mx_subnet_size + offset = m0_direct_subnet_member_count + mx_subnet_member_offset + subnet_member_offset + prefix = generate_prefix(mx_subnet_size, ip_base, offset) + routes.append((prefix, nexthop, None)) + + return routes + + +def generate_m0_routes(nexthop, colo_number, m0_number, m0_subnet_number, m0_asn_start, router_type, m0_subnet_size, + mx_number, mx_subnet_number, ip_base, mx_subnet_size, mx_asn_start, mx_index): + if router_type == "m1": + return generate_m0_upstream_routes(nexthop, colo_number, m0_number, m0_subnet_number, m0_asn_start, mx_number, + mx_subnet_number, ip_base, m0_subnet_size, mx_subnet_size, mx_asn_start) + + if router_type == "mx": + return generate_m0_downstream_routes(nexthop, mx_subnet_number, mx_subnet_size, m0_subnet_number, + m0_subnet_size, ip_base, mx_index) + + return [] + + def generate_routes(family, podset_number, tor_number, tor_subnet_number, spine_asn, leaf_asn_start, tor_asn_start, nexthop, nexthop_v6, tor_subnet_size, max_tor_subnet_number, topo, @@ -174,7 +317,7 @@ def generate_routes(family, podset_number, tor_number, tor_subnet_number, continue elif podset > first_third_podset_number and podset < second_third_podset_number and set_num != 1: continue - if router_type == "spine": + if router_type == "spine" or router_type == "mgmtleaf": # Skip podset 0 for T2 if podset == 0: continue @@ -198,6 +341,9 @@ def generate_routes(family, podset_number, tor_number, tor_subnet_number, # Skip non podset 0 for T0 if podset != 0: continue + # Skip subnet 0 (vlan ip) for M0 + elif topo == "m0" and subnet == 0: + continue elif tor != tor_index: continue @@ -220,7 +366,7 @@ def generate_routes(family, podset_number, tor_number, tor_subnet_number, aspath = None if router_type == "core": aspath = "{} {}".format(leaf_asn, core_ra_asn) - elif router_type == "spine": + elif router_type == "spine" or router_type == "mgmtleaf": aspath = "{} {}".format(leaf_asn, tor_asn) elif router_type == "leaf": if topo == "t2": @@ -317,6 +463,140 @@ def fib_t1_lag(topo, ptf_ip, no_default_route=False, action="announce"): change_routes(action, ptf_ip, port, routes_vips) +def get_new_ip(curr_ip, skip_count): + """ + Get the [skip_count]th ip after curr_ip + """ + new_ip = ipaddress.ip_address(int(curr_ip) + skip_count) + return new_ip + + +def get_next_ip_by_net(net_str): + """ + Get the nearest next non-overlapping ip address based on the net_str + Sample input: + str, "192.168.0.1/24" + Sample output: + , 192.168.3.0/32 + """ + net = ipaddress.ip_network(UNICODE_TYPE(net_str), strict=False) + net_size = int(net.broadcast_address) + 1 - int(net.network_address) + next_net = get_new_ip(net.network_address, net_size) + return next_net + + +def get_next_ip(skip_nets): + """ + Get minimum ip addresss which is bigger than any ip address in skip_nets. + Sample input: + [ + "192.168.0.1/24", + "192.168.0.1/25", + "192.168.0.128/25", + "192.168.2.1/24", + ] + Sample output: + , 192.168.3.0/32 + """ + max_next_ip = None + for vlan in skip_nets: + next_ip = get_next_ip_by_net(vlan) + if max_next_ip is None: + max_next_ip = next_ip + elif next_ip > max_next_ip: + max_next_ip = next_ip + return max_next_ip + + +""" +For M0, we have 2 sets of routes that we are going to advertised + - 1st set routes are advertised by the upstream VMs (M1 devices) + - 2nd set routes are advertised by the downstream VMs (MX devices) + +The total number of routes are controlled by the colo_number, m0_number, mx_subnet_number, m0_subnet_number and number +of MX devices from the topology file. +We would have the following distribution: +- M1 Routes: + - 1 default route, prefix: 0.0.0.0/0 + - Subnet routes of M0 devices connected to M1 devices other than DUT, + count: (colo_number * m0_number - 1) * m0_subnet_number + - Subnet routes of MX devices connected to M0 devices connected M1 devices, + count: (colo_number * m0_number - 1) * mx_number * mx_subnet_number +- MX Routes: + - Subunet routes of MX, count: mx_subnet_number +""" + + +def fib_m0(topo, ptf_ip, action="announce"): + common_config = topo['configuration_properties'].get('common', {}) + colo_number = common_config.get("colo_number", COLO_NUMBER) + m0_number = common_config.get("m0_number", M0_NUMBER) + nhipv4 = common_config.get("nhipv4", NHIPV4) + nhipv6 = common_config.get("nhipv6", NHIPV6) + m0_asn_start = common_config.get("m0_asn_start", M0_ASN_START) + m0_subnet_number = common_config.get("m0_subnet_number", M0_SUBNET_NUMBER) + m0_subnet_size = common_config.get("m0_subnet_size", M0_SUBNET_SIZE) + mx_subnet_size = common_config.get("mx_subnet_size", MX_SUBNET_SIZE) + mx_subnet_number = common_config.get("mx_subnet_number", MX_SUBNET_NUMBER) + mx_asn_start = common_config.get("mx_asn_start", MX_ASN_START) + m0_subnet_prefix_len_v6 = common_config.get("m0_subnet_prefix_len_v6", M0_SUBNET_PREFIX_LEN_V6) + mx_subnet_prefix_len_v6 = common_config.get("mx_subnet_prefix_len_v6", MX_SUBNET_PREFIX_LEN_V6) + + vms = topo['topology']['VMs'] + vms_config = topo['configuration'] + mx_list = list(filter(lambda x: "MX" in x, vms_config.keys())) + mx_number = len(mx_list) + + # In order to avoid overlapping the routes announced and the vlan of m0, get ip_start + vlan_configs = dict( + filter(lambda x: "default" not in x[0], topo["topology"]["DUT"]["vlan_configs"].items())) + vlan_prefixs = [] + for _, vlans in vlan_configs.items(): + for _, config in vlans.items(): + vlan_prefixs.append(config["prefix"]) + + ip_base = get_next_ip(vlan_prefixs) + ip_base_v6 = ipaddress.IPv6Address(UNICODE_TYPE("20c0:a800::0")) + + m1_routes_v4 = None + m1_routes_v6 = None + mx_index = -1 + for k, v in vms_config.items(): + vm_offset = vms[k]['vm_offset'] + port = IPV4_BASE_PORT + vm_offset + port6 = IPV6_BASE_PORT + vm_offset + + router_type = None + # Upstream + if "m1" in v["properties"]: + router_type = "m1" + # Downstream + elif "mx" in v["properties"]: + router_type = "mx" + mx_index += 1 + + # Routes announced by different M1s are the same, can reuse generated routes + if router_type == "m1" and m1_routes_v4 is not None: + routes_v4 = m1_routes_v4 + routes_v6 = m1_routes_v6 + else: + m0_subnet_size_v6 = 2 ** (128 - m0_subnet_prefix_len_v6) + mx_subnet_size_v6 = 2 ** (128 - mx_subnet_prefix_len_v6) + routes_v4 = generate_m0_routes(nhipv4, colo_number, m0_number, m0_subnet_number, m0_asn_start, router_type, + m0_subnet_size, mx_number, mx_subnet_number, ip_base, mx_subnet_size, + mx_asn_start, mx_index) + routes_v6 = generate_m0_routes(nhipv6, colo_number, m0_number, m0_subnet_number, m0_asn_start, router_type, + m0_subnet_size_v6, mx_number, mx_subnet_number, ip_base_v6, + mx_subnet_size_v6, mx_asn_start, mx_index) + + if router_type == "m1": + m1_routes_v4 = routes_v4 + m1_routes_v6 = routes_v6 + + change_routes(action, ptf_ip, port, routes_v4) + change_routes(action, ptf_ip, port6, routes_v6) + + """ For T2, we have 3 sets of routes that we are going to advertise - 1st set of 1/3 routes are advertised by the first 1/3 of the VMs @@ -468,6 +748,9 @@ def main(): elif topo_type == "t2": fib_t2_lag(topo, ptf_ip, action=action) module.exit_json(changed=True) + elif topo_type == "m0": + fib_m0(topo, ptf_ip, action=action) + module.exit_json(changed=True) else: module.exit_json(msg='Unsupported topology "{}" - skipping announcing routes'.format(topo_name)) except Exception as e: diff --git a/ansible/library/conn_graph_facts.py b/ansible/library/conn_graph_facts.py index eaf2ec0542b..f6a4f49a38b 100755 --- a/ansible/library/conn_graph_facts.py +++ b/ansible/library/conn_graph_facts.py @@ -64,6 +64,8 @@ server_links: each server port vlan ids device_console_info: The device's console server type, mgmtip, hwsku and protocol device_console_link: The console server port connected to the device + device_bmc_info: The device's bmc server type, mgmtip, hwsku and protocol + device_bmc_link: The bmc server port connected to the device device_pdu_info: The device's pdu server type, mgmtip, hwsku and protocol device_pdu_links: The pdu server ports connected to the device @@ -79,18 +81,15 @@ "HwSku": "Arista-7260QX-64", "Type": "FanoutLeaf" }, - "device_conn": [ - { - "StartPort": "Ethernet0", - "EndPort": "Ethernet33", - "StartDevice": "str-s6000-on-1", - "VlanID": "233", - "BandWidth": "40000", - "VlanMode": "Access", - "EndDevice": "str-7260-01" - }, - {...} - ], + "device_conn": { + "str-7260-11": { + "Ethernet0": { + "peerdevice": "str-7050qx-2", + "peerport": "Ethernet4", + "speed": "40000" + }, + } + }, "device_vlan_range": { "VlanRange": "201-980,1041-1100" }, @@ -129,12 +128,14 @@ def __init__(self, xmlfile): self.vlanrange = {} self.links = {} self.consolelinks = {} + self.bmclinks = {} self.pdulinks = {} self.server = defaultdict(dict) self.pngtag = 'PhysicalNetworkGraphDeclaration' self.dpgtag = 'DataPlaneGraph' self.pcgtag = 'PowerControlGraphDeclaration' self.csgtag = 'ConsoleGraphDeclaration' + self.bmcgtag = 'BmcGraphDeclaration' def port_vlanlist(self, vlanrange): vlans = [] @@ -160,21 +161,16 @@ def parse_graph(self): devices = deviceroot.findall('Device') if devices is not None: for dev in devices: - hostname = dev.attrib['Hostname'] + attributes = dev.attrib + hostname = attributes['Hostname'] if hostname is not None: deviceinfo[hostname] = {} - hwsku = dev.attrib['HwSku'] - devtype = dev.attrib['Type'] - card_type = "Linecard" - if 'CardType' in dev.attrib: - card_type = dev.attrib['CardType'] - hwsku_type = "predefined" - if "HwSkuType" in dev.attrib: - hwsku_type = dev.attrib["HwSkuType"] - deviceinfo[hostname]['HwSku'] = hwsku - deviceinfo[hostname]['Type'] = devtype - deviceinfo[hostname]['CardType'] = card_type - deviceinfo[hostname]["HwSkuType"] = hwsku_type + deviceinfo[hostname]["Hostname"] = hostname + deviceinfo[hostname]['HwSku'] = attributes.get('HwSku') + deviceinfo[hostname]['Type'] = attributes.get('Type') + deviceinfo[hostname]['CardType'] = attributes.get('CardType', 'Linecard') + deviceinfo[hostname]['HwSkuType'] = attributes.get('HwSkuType', 'predefined') + deviceinfo[hostname]['Os'] = attributes.get('Os') self.links[hostname] = {} devicel2info = {} devicel3s = self.root.find(self.dpgtag).findall('DevicesL3Info') @@ -195,6 +191,7 @@ def parse_graph(self): for l3info in devicel3s: hostname = l3info.attrib['Hostname'] if hostname is not None: + deviceinfo[hostname]["Hostname"] = hostname management_ip = l3info.find('ManagementIPInterface').attrib['Prefix'] deviceinfo[hostname]['ManagementIp'] = management_ip mgmtip = ipaddress.IPNetwork(management_ip) @@ -216,36 +213,101 @@ def parse_graph(self): devicescsg = devicecsgroot.findall('DeviceConsoleInfo') if devicescsg is not None: for dev in devicescsg: - hostname = dev.attrib['Hostname'] + attributes = dev.attrib + hostname = attributes['Hostname'] if hostname is not None: deviceinfo[hostname] = {} - hwsku = dev.attrib['HwSku'] - devtype = dev.attrib['Type'] - protocol = dev.attrib['Protocol'] - mgmt_ip = dev.attrib['ManagementIp'] - deviceinfo[hostname]['HwSku'] = hwsku - deviceinfo[hostname]['Type'] = devtype - deviceinfo[hostname]['Protocol'] = protocol + deviceinfo[hostname]["Hostname"] = hostname + deviceinfo[hostname]['HwSku'] = attributes.get('HwSku') + deviceinfo[hostname]['Type'] = attributes.get('Type') + deviceinfo[hostname]['Protocol'] = attributes.get('Protocol') + deviceinfo[hostname]['Os'] = attributes.get('Os') + mgmt_ip = attributes.get('ManagementIp') + management_gw = str(ipaddress.IPNetwork(mgmt_ip).network+1) deviceinfo[hostname]['ManagementIp'] = mgmt_ip + deviceinfo[hostname]['ManagementGw'] = management_gw self.consolelinks[hostname] = {} console_link_root = console_root.find('ConsoleLinksInfo') if console_link_root: allconsolelinks = console_link_root.findall('ConsoleLinkInfo') if allconsolelinks is not None: for consolelink in allconsolelinks: - start_dev = consolelink.attrib['StartDevice'] - end_dev = consolelink.attrib['EndDevice'] - console_proxy = consolelink.attrib['Proxy'] - console_type = consolelink.attrib['Console_type'] + attributes = consolelink.attrib + start_dev = attributes.get('StartDevice') + start_port = attributes.get('StartPort') + end_dev = attributes.get('EndDevice') + end_port = 'ConsolePort' + console_proxy = attributes.get('Proxy') + console_type = attributes.get('Console_type') + baud_rate = attributes.get('BaudRate') if start_dev: if start_dev not in self.consolelinks: self.consolelinks.update({start_dev : {}}) - self.consolelinks[start_dev][consolelink.attrib['StartPort']] = {'peerdevice':consolelink.attrib['EndDevice'], 'peerport': 'ConsolePort', 'proxy':console_proxy, 'type':console_type} + self.consolelinks[start_dev][start_port] = { + 'peerdevice': end_dev, + 'peerport': end_port, + 'proxy':console_proxy, + 'type':console_type, + 'baud_rate': baud_rate + } if end_dev: if end_dev not in self.consolelinks: self.consolelinks.update({end_dev : {}}) - self.consolelinks[end_dev]['ConsolePort'] = {'peerdevice': consolelink.attrib['StartDevice'], 'peerport': consolelink.attrib['StartPort'], 'proxy':console_proxy, 'type':console_type} + self.consolelinks[end_dev][end_port] = { + 'peerdevice': start_dev, + 'peerport': start_port, + 'proxy':console_proxy, + 'type':console_type, + 'baud_rate': baud_rate + } + bmc_root = self.root.find(self.bmcgtag) + if bmc_root: + devicebmcgroot = bmc_root.find('DevicesBmcInfo') + devicesbmcg = devicebmcgroot.findall('DeviceBmcInfo') + if devicesbmcg is not None: + for dev in devicesbmcg: + attributes = dev.attrib + hostname = attributes['Hostname'] + if hostname is not None: + deviceinfo[hostname] = {} + deviceinfo[hostname]["Hostname"] = hostname + deviceinfo[hostname]['HwSku'] = attributes.get('HwSku') + deviceinfo[hostname]['Type'] = attributes.get('Type') + deviceinfo[hostname]['Protocol'] = attributes.get('Protocol') + deviceinfo[hostname]['Os'] = attributes.get('Os') + mgmt_ip = attributes.get('ManagementIp') + management_gw = str(ipaddress.IPNetwork(mgmt_ip).network+1) + deviceinfo[hostname]['ManagementIp'] = mgmt_ip + deviceinfo[hostname]['ManagementGw'] = management_gw + self.bmclinks[hostname] = {} + bmc_link_root = bmc_root.find('BmcLinksInfo') + if bmc_link_root: + allbmclinks = bmc_link_root.findall('BmcLinkInfo') + if allbmclinks is not None: + for bmclink in allbmclinks: + attributes = bmclink.attrib + start_dev = attributes.get('StartDevice') + start_port = attributes.get('StartPort') + end_dev = attributes.get('EndDevice') + end_port = attributes.get('EndPort') + bmc_ip = attributes.get("BmcIp") + if start_dev: + if start_dev not in self.bmclinks: + self.bmclinks.update({start_dev : {}}) + self.bmclinks[start_dev][start_port] = { + 'peerdevice': end_dev, + 'peerport': end_port, + 'bmc_ip': bmc_ip + } + if end_dev: + if end_dev not in self.bmclinks: + self.bmclinks.update({end_dev : {}}) + self.bmclinks[end_dev][end_port] = { + 'peerdevice': start_dev, + 'peerport': start_port, + 'bmc_ip': bmc_ip + } pdu_root = self.root.find(self.pcgtag) if pdu_root: @@ -256,6 +318,7 @@ def parse_graph(self): hostname = dev.attrib['Hostname'] if hostname is not None: deviceinfo[hostname] = {} + deviceinfo[hostname]["Hostname"] = hostname hwsku = dev.attrib['HwSku'] devtype = dev.attrib['Type'] protocol = dev.attrib['Protocol'] @@ -356,7 +419,7 @@ def contains_hosts(self, hostnames, part): count += 1 return hostnames and (count * 1.0 / len(hostnames) >= THRESHOLD) - + # get the console of a device, if it exists, host is being managed by the returned device def get_host_console_info(self, hostname): """ return the given hostname console info of mgmtip, protocol, hwsku and type @@ -375,6 +438,7 @@ def get_host_console_info(self, hostname): """ return {} + # return the list of devices that is managed by host through console def get_host_console_link(self, hostname): """ return the given hostname console link info of console server and port @@ -385,13 +449,43 @@ def get_host_console_link(self, hostname): # Please be noted that an empty dict is returned when hostname is not found return {} + # get the bmc of a device, if it exists, host is being managed by the returned device + def get_host_bmc_info(self, hostname): + """ + return the given hostname bmc info of mgmtip, protocol, hwsku and type + """ + if hostname in self.devices: + try: + # currently we only support end port iDRAC + ret = self.devices[self.bmclinks[hostname]['iDRAC']['peerdevice']] + except KeyError: + ret = {} + return ret + else: + """ + Please be noted that an empty dict is returned when hostname is not found + The behavior is different with get_host_vlan. + """ + return {} + + # return the list of devices that is managed by host through bmc + def get_host_bmc_link(self, hostname): + """ + return the given hostname bmc link info of management server and port + """ + if hostname in self.bmclinks: + return self.bmclinks[hostname] + else: + # Please be noted that an empty dict is returned when hostname is not found + return {} + def get_host_pdu_info(self, hostname): """ return the given hostname pdu info of mgmtip, protocol, hwsku and type """ if hostname in self.devices: ret = {} - for key in ['PSU1', 'PSU2']: + for key in ['PSU1', 'PSU2', 'PSU3', 'PSU4']: try: ret.update({key : self.devices[self.pdulinks[hostname][key]['peerdevice']]}) except KeyError: @@ -475,6 +569,8 @@ def build_results(lab_graph, hostnames, ignore_error=False): device_vlan_map_list = {} device_console_info = {} device_console_link = {} + device_bmc_info = {} + device_bmc_link = {} device_pdu_info = {} device_pdu_links = {} msg = {} @@ -517,6 +613,8 @@ def build_results(lab_graph, hostnames, ignore_error=False): device_port_vlans[hostname] = port_vlans device_console_info[hostname] = lab_graph.get_host_console_info(hostname) device_console_link[hostname] = lab_graph.get_host_console_link(hostname) + device_bmc_info[hostname] = lab_graph.get_host_bmc_info(hostname) + device_bmc_link[hostname] = lab_graph.get_host_bmc_link(hostname) device_pdu_info[hostname] = lab_graph.get_host_pdu_info(hostname) device_pdu_links[hostname] = lab_graph.get_host_pdu_links(hostname) results = {k: v for k, v in locals().items() diff --git a/ansible/library/exabgp.py b/ansible/library/exabgp.py index 46aa33075bd..999ffec4416 100644 --- a/ansible/library/exabgp.py +++ b/ansible/library/exabgp.py @@ -218,7 +218,7 @@ def main(): module = AnsibleModule( argument_spec=dict( name=dict(required=True, type='str'), - state=dict(required=True, choices=['started', 'restarted', 'stopped', 'present', 'absent', 'status'], type='str'), + state=dict(required=True, choices=['started', 'restarted', 'stopped', 'present', 'absent', 'status', 'configure'], type='str'), router_id=dict(required=False, type='str'), local_ip=dict(required=False, type='str'), peer_ip=dict(required=False, type='str'), @@ -259,6 +259,9 @@ def main(): setup_exabgp_conf(name, router_id, local_ip, peer_ip, local_asn, peer_asn, port, dump_script=dump_script, passive=passive) setup_exabgp_supervisord_conf(name) refresh_supervisord(module) + elif state == 'configure': + setup_exabgp_conf(name, router_id, local_ip, peer_ip, local_asn, peer_asn, port, dump_script=dump_script, passive=passive) + setup_exabgp_supervisord_conf(name) elif state == 'stopped': stop_exabgp(module, name) elif state == 'absent': diff --git a/ansible/library/extract_log.py b/ansible/library/extract_log.py index 7fb855a89ed..c4bd6f8df79 100644 --- a/ansible/library/extract_log.py +++ b/ansible/library/extract_log.py @@ -116,7 +116,6 @@ def extract_number(s): else: return int(ns[0]) - def convert_date(fct, s): dt = None re_result = re.findall(r'^\S{3}\s{1,2}\d{1,2} \d{2}:\d{2}:\d{2}\.?\d*', s) @@ -137,9 +136,16 @@ def convert_date(fct, s): if (dt - fct).days > 183: dt.replace(year = dt.year - 1) else: - re_result = re.findall(r'^\d{4}-\d{2}-\d{2}\.\d{2}:\d{2}:\d{2}\.\d{6}', s) - str_date = re_result[0] - dt = datetime.datetime.strptime(str_date, '%Y-%m-%d.%X.%f') + re_result = re.findall(r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}', s) + if len(re_result) > 0: + str_date = re_result[0] + str_date = str_date.replace("T", " ") + dt = datetime.datetime.strptime(str_date, '%Y-%m-%d %X.%f') + else: + re_result = re.findall(r'^\d{4}-\d{2}-\d{2}\.\d{2}:\d{2}:\d{2}\.\d{6}', s) + if len(re_result) > 0: + str_date = re_result[0] + dt = datetime.datetime.strptime(str_date, '%Y-%m-%d.%X.%f') locale.setlocale(locale.LC_ALL, loc) return dt diff --git a/ansible/library/fdb_facts.py b/ansible/library/fdb_facts.py index ac9c2140b15..681224c10c3 100644 --- a/ansible/library/fdb_facts.py +++ b/ansible/library/fdb_facts.py @@ -25,11 +25,13 @@ The output: { '24:8A:07:4C:F5:06': - { - 'vlan': '1000', - 'type': 'Dynamic', - 'port': 'Ethernet24' - } + [ + { + 'vlan': '1000', + 'type': 'Dynamic', + 'port': 'Ethernet24' + } + ] } ''' @@ -61,11 +63,13 @@ def run(self): if not d[0].strip().isdigit(): continue mac = d[2].strip() - ret[mac] = { + val = { 'vlan': int(d[1].strip()) if d[1] != "" else 0, 'port': d[3].strip(), 'type': d[4].strip() } + ret[mac] = ret.get(mac, []) + ret[mac].append(val) self.module.exit_json(ansible_facts=ret) diff --git a/ansible/library/reduce_and_add_sonic_images.py b/ansible/library/reduce_and_add_sonic_images.py index 2c00b0a5233..a19ea4bbac8 100644 --- a/ansible/library/reduce_and_add_sonic_images.py +++ b/ansible/library/reduce_and_add_sonic_images.py @@ -26,7 +26,7 @@ from os import path from ansible.module_utils.basic import * -results = {"downloaded_image_version": "Unknown"} +results = {"downloaded_image_version": "Unknown", "current_stage": "Unknown"} def exec_command(module, cmd, ignore_error=False, msg="executing command"): rc, out, err = module.run_command(cmd, use_unsafe_shell=True) @@ -74,6 +74,19 @@ def setup_swap_if_necessary(module): def reduce_installed_sonic_images(module): + _, out, _ = exec_command(module, cmd="sonic_installer list", ignore_error=True) + lines = out.split('\n') + + # if next boot image not same with current, set current as next boot, and delete the orinal next image + for line in lines: + if 'Current:' in line: + curr_image = line.split(':')[1].strip() + elif 'Next:' in line: + next_image = line.split(':')[1].strip() + + if curr_image != next_image: + exec_command(module, cmd="sonic_installer set-next-boot {}".format(curr_image), ignore_error=True) + exec_command(module, cmd="sonic_installer cleanup -y", ignore_error=True) @@ -156,6 +169,7 @@ def get_disk_used_percent(module): exec_command(module, "rm -f /var/log/*.gz", ignore_error=True) exec_command(module, "rm -f /var/core/*", ignore_error=True) exec_command(module, "rm -rf /var/dump/*", ignore_error=True) + exec_command(module, "rm -rf /home/admin/*", ignore_error=True) def main(): @@ -172,15 +186,19 @@ def main(): save_as = module.params['save_as'] try: + results["current_stage"] = "start" work_around_for_slow_disks(module) reduce_installed_sonic_images(module) + results["current_stage"] = "prepare" if new_image_url or save_as: free_up_disk_space(module, disk_used_pcent) setup_swap_if_necessary(module) + results["current_stage"] = "install" install_new_sonic_image(module, new_image_url, save_as) + results["current_stage"] = "complete" except: err = str(sys.exc_info()) - module.fail_json(msg="Error: %s" % err) + module.fail_json(msg="Results: %s; Error: %s" % (results, err)) module.exit_json(ansible_facts=results) diff --git a/ansible/library/snmp_facts.py b/ansible/library/snmp_facts.py index a94608a7c79..68d77adf301 100644 --- a/ansible/library/snmp_facts.py +++ b/ansible/library/snmp_facts.py @@ -206,12 +206,16 @@ def __init__(self,dotprefix=False): self.ChStackUnitCpuUtil5sec = dp + "1.3.6.1.4.1.6027.3.10.1.2.9.1.2.1" # Memory Check - self.sysTotalMemery = dp + "1.3.6.1.4.1.2021.4.5.0" - self.sysTotalFreeMemery = dp + "1.3.6.1.4.1.2021.4.6.0" + self.sysTotalMemory = dp + "1.3.6.1.4.1.2021.4.5.0" + self.sysTotalFreeMemory = dp + "1.3.6.1.4.1.2021.4.6.0" self.sysTotalSharedMemory = dp + "1.3.6.1.4.1.2021.4.13.0" self.sysTotalBuffMemory = dp + "1.3.6.1.4.1.2021.4.14.0" self.sysCachedMemory = dp + "1.3.6.1.4.1.2021.4.15.0" + # Swap Info + self.sysTotalSwap = dp + "1.3.6.1.4.1.2021.4.3.0" + self.sysTotalFreeSwap = dp + "1.3.6.1.4.1.2021.4.4.0" + # From Cisco private MIB (PFC and queue counters) self.cpfcIfRequests = dp + "1.3.6.1.4.1.9.9.813.1.1.1.1" # + .ifindex self.cpfcIfIndications = dp + "1.3.6.1.4.1.9.9.813.1.1.1.2" # + .ifindex @@ -312,6 +316,7 @@ def main(): privkey=dict(required=False), is_dell=dict(required=False, default=False, type='bool'), is_eos=dict(required=False, default=False, type='bool'), + include_swap=dict(required=False, default=False, type='bool'), removeplaceholder=dict(required=False)), required_together = ( ['username','level','integrity','authkey'],['privacy','privkey'],), supports_check_mode=False) @@ -913,8 +918,8 @@ def main(): errorIndication, errorStatus, errorIndex, varBinds = cmdGen.getCmd( snmp_auth, cmdgen.UdpTransportTarget((m_args['host'], 161)), - cmdgen.MibVariable(p.sysTotalMemery,), - cmdgen.MibVariable(p.sysTotalFreeMemery,), + cmdgen.MibVariable(p.sysTotalMemory,), + cmdgen.MibVariable(p.sysTotalFreeMemory,), cmdgen.MibVariable(p.sysTotalSharedMemory,), cmdgen.MibVariable(p.sysTotalBuffMemory,), cmdgen.MibVariable(p.sysCachedMemory,), @@ -926,10 +931,10 @@ def main(): for oid, val in varBinds: current_oid = oid.prettyPrint() - if current_oid == v.sysTotalMemery: - results['ansible_sysTotalMemery'] = decode_type(module, current_oid, val) - elif current_oid == v.sysTotalFreeMemery: - results['ansible_sysTotalFreeMemery'] = decode_type(module, current_oid, val) + if current_oid == v.sysTotalMemory: + results['ansible_sysTotalMemory'] = decode_type(module, current_oid, val) + elif current_oid == v.sysTotalFreeMemory: + results['ansible_sysTotalFreeMemory'] = decode_type(module, current_oid, val) elif current_oid == v.sysTotalSharedMemory: results['ansible_sysTotalSharedMemory'] = decode_type(module, current_oid, val) elif current_oid == v.sysTotalBuffMemory: @@ -937,6 +942,25 @@ def main(): elif current_oid == v.sysCachedMemory: results['ansible_sysCachedMemory'] = decode_type(module, current_oid, val) + if m_args['include_swap']: + errorIndication, errorStatus, errorIndex, varBinds = cmdGen.getCmd( + snmp_auth, + cmdgen.UdpTransportTarget((m_args['host'], 161)), + cmdgen.MibVariable(p.sysTotalSwap,), + cmdgen.MibVariable(p.sysTotalFreeSwap,), + lookupMib=False, lexicographicMode=False + ) + + if errorIndication: + module.fail_json(msg=str(errorIndication) + ' querying system infomation.') + + for oid, val in varBinds: + current_oid = oid.prettyPrint() + if current_oid == v.sysTotalSwap: + results['ansible_sysTotalSwap'] = decode_type(module, current_oid, val) + elif current_oid == v.sysTotalFreeSwap: + results['ansible_sysTotalFreeSwap'] = decode_type(module, current_oid, val) + errorIndication, errorStatus, errorIndex, varTable = cmdGen.nextCmd( snmp_auth, cmdgen.UdpTransportTarget((m_args['host'], 161)), diff --git a/ansible/library/vlan_facts.py b/ansible/library/vlan_facts.py new file mode 100644 index 00000000000..61e35d62ff8 --- /dev/null +++ b/ansible/library/vlan_facts.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python +# This ansible module is for gathering VLAN related facts from SONiC device. + +from ansible.module_utils.basic import * +from collections import defaultdict + + +DOCUMENTATION = ''' +--- +module: vlan_facts +version_added: "1.0" +author: Bing Wang (bingwang@microsoft.com) +short_description: Retrive VLAN facts from a device. +description: + - Retrieve VLAN facts for a device, the facts will be + inserted to the ansible_facts key. +options: + N/A +''' + +EXAMPLES = ''' +# Gather VLAN facts +- name: Gathering VLAN facts from the device + vlan_facts: + { + "vlan1000": { + "name": "vlan1000", + "vlanid": "1000", + "members": [ + "Ethernet0": { + "tagging_mode": "untagged" + }, + "Ethernet2": { + "tagging_mode": "untagged" + } + ], + "interfaces": [ + { + "addr": "192.168.0.1", + "prefixlen": 21 + }, + { + "prefixlen": 64, + "addr": "fc02:1000::1", + } + ] + } + } +''' + + +def get_all_vlan(module, config): + """ + @summary: Read all running vlan with sonic-cfggen. + @param module: The AnsibleModule object + @param config: The retrieved vlan config + @return: None + """ + rc, stdout, stderr = module.run_command('sonic-cfggen -d --var-json \"VLAN\"') + if rc != 0: + module.fail_json(msg='Failed to get DUT running config, rc=%s, stdout=%s, stderr=%s' % (rc, stdout, stderr)) + + try: + vlan_config = module.from_json(stdout) + for k, v in vlan_config.items(): + config[k] = { + 'name': k, + 'vlanid': v['vlanid'] + } + except Exception as e: + module.fail_json(msg='Failed to parse config from output of "sonic-cfggen -d --var-json VLAN", err=' + str(e)) + +def get_vlan_interfaces(module, config): + """ + @summary: Read all running vlan interface IP with sonic-cfggen. + @param module: The AnsibleModule object + @param config: The retrieved vlan config + @return: None + """ + rc, stdout, stderr = module.run_command('sonic-cfggen -d --var-json \"VLAN_INTERFACE\"') + if rc != 0: + module.fail_json(msg='Failed to get DUT running config, rc=%s, stdout=%s, stderr=%s' % (rc, stdout, stderr)) + + try: + vlan_config = module.from_json(stdout) + for k, v in vlan_config.items(): + vlan_ip = k.split('|') + if len(vlan_ip) != 2: + continue + vlan = vlan_ip[0] + if 'interfaces' not in config[vlan]: + config[vlan]['interfaces'] = [] + ip_prefix = vlan_ip[1].split('/') + config[vlan]['interfaces'].append( + { + "addr": ip_prefix[0], + "prefixlen": 32 if len(ip_prefix) < 2 else int(ip_prefix[1]) + } + ) + + except Exception as e: + module.fail_json(msg='Failed to parse config from output of "sonic-cfggen -d --var-json VLAN_INTERFACE", err=' + str(e)) + +def get_vlan_members(module, config): + """ + @summary: Read all running vlan members with sonic-cfggen. + @param module: The AnsibleModule object + @param config: The retrieved vlan config + @return: None + """ + rc, stdout, stderr = module.run_command('sonic-cfggen -d --var-json \"VLAN_MEMBER\"') + if rc != 0: + module.fail_json(msg='Failed to get DUT running config, rc=%s, stdout=%s, stderr=%s' % (rc, stdout, stderr)) + + try: + vlan_config = module.from_json(stdout) + for k, v in vlan_config.items(): + vlan_intf = k.split('|') + if len(vlan_intf) < 2: + continue + if 'members' not in config[vlan_intf[0]]: + config[vlan_intf[0]]['members'] = {} + config[vlan_intf[0]]['members'].update( + {vlan_intf[1]: {"tagging_mode": v['tagging_mode']}} + ) + + except Exception as e: + module.fail_json(msg='Failed to parse config from output of "sonic-cfggen -d --var-json VLAN_MEMBER", err=' + str(e)) + + +def main(): + + module = AnsibleModule(argument_spec=dict()) + + vlan_config = defaultdict(dict) + + get_all_vlan(module, vlan_config) + get_vlan_interfaces(module, vlan_config) + get_vlan_members(module, vlan_config) + + module.exit_json(ansible_facts={'ansible_vlan_facts': vlan_config}) + + +if __name__ == '__main__': + main() diff --git a/ansible/library/vmhost_server_info.py b/ansible/library/vmhost_server_info.py index b0dd996a379..4591a64a800 100644 --- a/ansible/library/vmhost_server_info.py +++ b/ansible/library/vmhost_server_info.py @@ -22,9 +22,6 @@ vmhost_server_info: vmhost_server_name='server_1' vm_file='veos' ''' -# Here we assume that the group name of host server starts with 'vm_host_'. -VMHOST_PREFIX = "vm_host_" - VM_INV_FILE = 'veos' def main(): @@ -36,15 +33,22 @@ def main(): supports_check_mode=True ) m_args = module.params - vmhost_group_name = VMHOST_PREFIX + m_args['vmhost_server_name'].split('_')[-1] - inv_mgr = InventoryManager(loader=DataLoader(), sources=m_args['vm_file']) - all_hosts = inv_mgr.get_hosts(pattern=vmhost_group_name) - if len(all_hosts) != 1: - module.fail_json(msg="{} host servers are found in {}, which should be 1".format(len(all_hosts), vmhost_group_name)) + vmhost_server_name = m_args["vmhost_server_name"] + vm_file = m_args["vm_file"] + + inv_mgr = InventoryManager(loader=DataLoader(), sources=vm_file) + + all_hosts = inv_mgr.get_hosts(pattern=vmhost_server_name) + if len(all_hosts) == 0: + module.fail_json(msg="No host matches {} in inventory file {}".format(vmhost_server_name, vm_file)) else: - module.exit_json(ansible_facts={'vmhost_server_address':all_hosts[0].get_vars()['ansible_host']}) + for host in all_hosts: + if host.name.startswith('VM'): + continue + module.exit_json(ansible_facts={"vmhost_server_address": host.get_vars()["ansible_host"]}) + + module.fail_json(msg="Unable to find IP address of host server {} in inventory file {}".format(vmhost_server_name, vm_file)) from ansible.module_utils.basic import * if __name__ == "__main__": main() - diff --git a/ansible/linkstate/testbed_inv.py b/ansible/linkstate/testbed_inv.py index 96f3740d3c5..c81e8c07b68 100755 --- a/ansible/linkstate/testbed_inv.py +++ b/ansible/linkstate/testbed_inv.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 import sys import json diff --git a/ansible/module_utils/port_utils.py b/ansible/module_utils/port_utils.py index 623004e79bc..5aeb3bead06 100644 --- a/ansible/module_utils/port_utils.py +++ b/ansible/module_utils/port_utils.py @@ -1,3 +1,5 @@ + + def _port_alias_to_name_map_50G(all_ports, s100G_ports,): new_map = {} # 50G ports @@ -12,13 +14,15 @@ def _port_alias_to_name_map_50G(all_ports, s100G_ports,): return new_map + def get_port_alias_to_name_map(hwsku, asic_name=None): port_alias_to_name_map = {} port_alias_asic_map = {} - port_name_to_index_map = {} + port_name_to_index_map = {} + HWSKU_WITH_PORT_INDEX_FROM_PORT_CONFIG = ["8800-LC-48H-O", "88-LC0-36FH-MO"] try: from sonic_py_common import multi_asic - from ansible.module_utils.multi_asic_utils import load_db_config + from ansible.module_utils.multi_asic_utils import load_db_config load_db_config() ports_info = multi_asic.get_port_table(namespace=asic_name) for port, port_data in ports_info.items(): @@ -26,7 +30,7 @@ def get_port_alias_to_name_map(hwsku, asic_name=None): port_alias_to_name_map[port_data["alias"]] = port if "asic_port_name" in port_data: port_alias_asic_map[port_data["asic_port_name"]] = port - if "index" in port_data: + if "index" in port_data and hwsku in HWSKU_WITH_PORT_INDEX_FROM_PORT_CONFIG: port_name_to_index_map[port] = int(port_data["index"]) except ImportError: if hwsku == "Force10-S6000": @@ -59,7 +63,7 @@ def get_port_alias_to_name_map(hwsku, asic_name=None): for i in s10G_ports: alias = "etp{}".format(33 if i == 256 else 34) port_alias_to_name_map[alias] = "Ethernet{}".format(i) - elif hwsku == "DellEMC-Z9332f-O32": + elif hwsku == "DellEMC-Z9332f-O32" or hwsku == "DellEMC-Z9332f-C32": for i in range(0, 256, 8): alias = "etp{}".format((i // 8) + 1) port_alias_to_name_map[alias] = "Ethernet{}".format(i) @@ -71,24 +75,30 @@ def get_port_alias_to_name_map(hwsku, asic_name=None): port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) for i in range(25, 33): port_alias_to_name_map["Ethernet%d" % i] = "Ethernet%d" % ((i - 1) * 4) - elif hwsku == "Arista-7050-QX-32S": + elif hwsku == "Arista-7050-QX-32S" or hwsku == "Arista-7050QX-32S-S4Q31": for i in range(0, 4): - port_alias_to_name_map["Ethernet1/%d" % (i + 1)] = "Ethernet%d" % i + port_alias_to_name_map["Ethernet%d" % (i + 1)] = "Ethernet%d" % i for i in range(6, 29): port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 5) * 4) for i in range(29, 37): port_alias_to_name_map["Ethernet%d" % i] = "Ethernet%d" % ((i - 5) * 4) + elif hwsku == "Arista-7050QX32S-Q32": + for i in range(5, 29): + port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 5) * 4) + for i in range(29, 37): + port_alias_to_name_map["Ethernet%d" % i] = "Ethernet%d" % ((i - 5) * 4) elif hwsku == "Arista-7280CR3-C40": for i in range(1, 33): port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) for i in range(33, 41, 2): port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) port_alias_to_name_map["Ethernet%d/5" % i] = "Ethernet%d" % (i * 4) - elif hwsku == "Arista-7260CX3-C64" or hwsku == "Arista-7170-64C": + elif hwsku == "Arista-7260CX3-C64" or hwsku == "Arista-7170-64C" or hwsku == "Arista-7260CX3-Q64": for i in range(1, 65): port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) - elif hwsku == "Arista-7060CX-32S-C32" or hwsku == "Arista-7060CX-32S-Q32" or hwsku == "Arista-7060CX-32S-C32-T1" or hwsku == "Arista-7170-32CD-C32" \ - or hwsku == "Arista-7050CX3-32S-C32": + elif hwsku == "Arista-7060CX-32S-C32" or hwsku == "Arista-7060CX-32S-Q32" \ + or hwsku == "Arista-7060CX-32S-C32-T1" or hwsku == "Arista-7170-32CD-C32" \ + or hwsku == "Arista-7050CX3-32S-C32": for i in range(1, 33): port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) elif hwsku == "Mellanox-SN2700-D40C8S8": @@ -110,6 +120,19 @@ def get_port_alias_to_name_map(hwsku, asic_name=None): for i in s100G_ports: alias = "etp%d" % (i / 4 + 1) port_alias_to_name_map[alias] = "Ethernet%d" % i + elif hwsku == "Mellanox-SN2700-D44C10": + # 50G ports + s50G_ports = [x for x in range(8, 24, 2)] + [x for x in range(40, 88, 2)] + [x for x in range(104, 128, 2)] + + # 100G ports + s100G_ports = [0, 4] + [x for x in range(24, 40, 4)] + [x for x in range(88, 104, 4)] + + for i in s50G_ports: + alias = "etp%d" % (i / 4 + 1) + ("a" if i % 4 == 0 else "b") + port_alias_to_name_map[alias] = "Ethernet%d" % i + for i in s100G_ports: + alias = "etp%d" % (i / 4 + 1) + port_alias_to_name_map[alias] = "Ethernet%d" % i elif hwsku == "Mellanox-SN2700-D48C8": # 50G ports s50G_ports = [x for x in range(0, 24, 2)] + [x for x in range(40, 88, 2)] + [x for x in range(104, 128, 2)] @@ -143,10 +166,9 @@ def get_port_alias_to_name_map(hwsku, asic_name=None): s100G_ports = [x for x in range(13, 21)] port_alias_to_name_map = _port_alias_to_name_map_50G(all_ports, s100G_ports) - elif hwsku == "Arista-7800R3-48CQ-LC" or\ - hwsku == "Arista-7800R3K-48CQ-LC": - for i in range(1, 48): - port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) + elif hwsku == "Arista-7800R3-48CQ-LC" or hwsku == "Arista-7800R3K-48CQ-LC": + for i in range(1, 48): + port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) elif hwsku == "INGRASYS-S9100-C32": for i in range(1, 33): port_alias_to_name_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) @@ -169,6 +191,20 @@ def get_port_alias_to_name_map(hwsku, asic_name=None): elif hwsku == "Celestica-DX010-C32": for i in range(1, 33): port_alias_to_name_map["etp%d" % i] = "Ethernet%d" % ((i - 1) * 4) + elif hwsku == "Celestica-DX010-D48C8": + for i in range(1, 21): + port_alias_to_name_map["etp{}{}".format((i + 1)//2, "a" if i % 2 == 1 else "b")] = \ + "Ethernet%d" % ((i - 1) * 2) + for i in range(21, 25): + port_alias_to_name_map["etp{}".format(i - 10)] = "Ethernet%d" % ((i - 10 - 1) * 4) + for i in range(25, 33): + port_alias_to_name_map["etp{}{}".format((i + 4 + 1)//2, "a" if i % 2 == 1 else "b")] = \ + "Ethernet%d" % ((i + 4 - 1) * 2) + for i in range(33, 37): + port_alias_to_name_map["etp{}".format(i - 14)] = "Ethernet%d" % ((i - 14 - 1) * 4) + for i in range(37, 57): + port_alias_to_name_map["etp{}{}".format((i + 8 + 1)//2, "a" if i % 2 == 1 else "b")] = \ + "Ethernet%d" % ((i + 8 - 1) * 2) elif hwsku == "Seastone-DX010": for i in range(1, 33): port_alias_to_name_map["Eth%d" % i] = "Ethernet%d" % ((i - 1) * 4) @@ -192,26 +228,38 @@ def get_port_alias_to_name_map(hwsku, asic_name=None): elif hwsku == "36x100Gb": for i in range(0, 36): port_alias_to_name_map["Ethernet%d" % i] = "Ethernet%d" % i - elif hwsku == "64x100Gb": + elif hwsku == "Cisco-8102-C64": for i in range(0, 64): - port_alias_to_name_map["Ethernet%d" % i] = "Ethernet%d" % i - elif hwsku == "8800-LC-48H-O": + port_alias_to_name_map["etp%d" % i] = "Ethernet%d" % (i * 4) + elif hwsku in ["8800-LC-48H-O", "88-LC0-36FH-MO"]: for i in range(0, 48, 1): port_alias_to_name_map["Ethernet%d" % i] = "Ethernet%d" % i elif hwsku in ["msft_multi_asic_vs"]: - for i in range(1,65): + for i in range(1, 65): port_alias_to_name_map["Ethernet1/%d" % i] = "Ethernet%d" % ((i - 1) * 4) elif hwsku == "msft_four_asic_vs": - for i in range(1,9): + for i in range(1, 9): port_alias_to_name_map["Ethernet1/%d" % i] = "Ethernet%d" % ((i - 1) * 4) elif hwsku == "B6510-48VS8CQ" or hwsku == "RA-B6510-48V8C": - for i in range(1,49): + for i in range(1, 49): port_alias_to_name_map["twentyfiveGigE0/%d" % i] = "Ethernet%d" % i - for i in range(49,57): + for i in range(49, 57): port_alias_to_name_map["hundredGigE0/%d" % (i-48)] = "Ethernet%d" % i + elif hwsku == "RA-B6510-32C": + for i in range(1, 33): + port_alias_to_name_map["hundredGigE%d" % i] = "Ethernet%d" % i elif hwsku == "RA-B6910-64C": - for i in range(1,65): + for i in range(1, 65): + port_alias_to_name_map["hundredGigE%d" % i] = "Ethernet%d" % i + elif hwsku == "RA-B6920-4S": + for i in range(1, 129): port_alias_to_name_map["hundredGigE%d" % i] = "Ethernet%d" % i + elif hwsku in ["Wistron_sw_to3200k_32x100", "Wistron_sw_to3200k"]: + for i in range(0, 256, 8): + port_alias_to_name_map["Ethernet%d" % i] = "Ethernet%d" % i + elif hwsku == "Arista-720DT-48S": + for i in range(1, 53): + port_alias_to_name_map["etp%d" % i] = "Ethernet%d" % (i - 1) else: for i in range(0, 128, 4): port_alias_to_name_map["Ethernet%d" % i] = "Ethernet%d" % i @@ -226,7 +274,7 @@ def get_port_indices_for_asic(asic_id, port_name_list_sorted): # Create mapping between port alias and physical index port_index_map = {} if asic_id: - index_offset = int(asic_id) *len(front_end_port_name_list) + index_offset = int(asic_id) * len(front_end_port_name_list) for idx, val in enumerate(front_end_port_name_list, index_offset): port_index_map[val] = idx for idx, val in enumerate(back_end_port_name_list, index_offset): diff --git a/ansible/recover_server.py b/ansible/recover_server.py index 482ecb8378a..2a5392cf52a 100755 --- a/ansible/recover_server.py +++ b/ansible/recover_server.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 """ Script used to recover testbed servers after reboot/upgrade/black-out. - Cleanup server diff --git a/ansible/restart_nightly_ptf.py b/ansible/restart_nightly_ptf.py old mode 100644 new mode 100755 index 9844db4a6e1..9efbd5f3eb3 --- a/ansible/restart_nightly_ptf.py +++ b/ansible/restart_nightly_ptf.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python2 + import argparse import logging import imp diff --git a/ansible/roles/eos/tasks/ceos.yml b/ansible/roles/eos/tasks/ceos.yml index 16711009fc0..e1732adc9ed 100644 --- a/ansible/roles/eos/tasks/ceos.yml +++ b/ansible/roles/eos/tasks/ceos.yml @@ -28,7 +28,7 @@ - net_admin privileged: yes memory: 2G - memory_swap: 2G + memory_swap: 4G env: CEOS=1 container=docker diff --git a/ansible/roles/eos/tasks/ceos_config.yml b/ansible/roles/eos/tasks/ceos_config.yml index 46369acd2fc..0dca3a84c70 100644 --- a/ansible/roles/eos/tasks/ceos_config.yml +++ b/ansible/roles/eos/tasks/ceos_config.yml @@ -18,6 +18,13 @@ - name: Set EOS backplane port name set_fact: bp_ifname="Ethernet{{ fp_num.stdout|int - 1}}" +- name: cleanup previous ceos mount dir + become: yes + file: + path: "/{{ ceos_image_mount_dir }}/ceos_{{ vm_set_name }}_{{ inventory_hostname }}/" + state: absent + delegate_to: "{{ VM_host[0] }}" + - name: create directory for ceos config become: yes file: diff --git a/ansible/roles/eos/templates/m0-m1.j2 b/ansible/roles/eos/templates/m0-m1.j2 new file mode 120000 index 00000000000..8430cb1debd --- /dev/null +++ b/ansible/roles/eos/templates/m0-m1.j2 @@ -0,0 +1 @@ +t0-leaf.j2 \ No newline at end of file diff --git a/ansible/roles/eos/templates/m0-mx.j2 b/ansible/roles/eos/templates/m0-mx.j2 new file mode 100644 index 00000000000..641059da5d7 --- /dev/null +++ b/ansible/roles/eos/templates/m0-mx.j2 @@ -0,0 +1,134 @@ +{% set host = configuration[hostname] %} +{% set mgmt_ip = ansible_host %} +{% if vm_type is defined and vm_type == "ceos" %} +{% set mgmt_if_index = 0 %} +{% else %} +{% set mgmt_if_index = 1 %} +{% endif %} +no schedule tech-support +! +{% if vm_type is defined and vm_type == "ceos" %} +agent LicenseManager shutdown +agent PowerFuse shutdown +agent PowerManager shutdown +agent Thermostat shutdown +agent LedPolicy shutdown +agent StandbyCpld shutdown +agent Bfd shutdown +{% endif %} +! +hostname {{ hostname }} +! +vrf definition MGMT + rd 1:1 +! +spanning-tree mode mstp +! +aaa root secret 0 123456 +! +username admin privilege 15 role network-admin secret 0 123456 +! +clock timezone UTC +! +lldp run +lldp management-address Management{{ mgmt_if_index }} +lldp management-address vrf MGMT +! +snmp-server community {{ snmp_rocommunity }} ro +snmp-server vrf MGMT +! +ip routing +ip routing vrf MGMT +ipv6 unicast-routing +! +{% if vm_mgmt_gw is defined %} +ip route vrf MGMT 0.0.0.0/0 {{ vm_mgmt_gw }} +{% else %} +ip route vrf MGMT 0.0.0.0/0 {{ mgmt_gw }} +{% endif %} +! +interface Management {{ mgmt_if_index }} + description TO LAB MGMT SWITCH +{% if vm_type is defined and vm_type == "ceos" %} + vrf MGMT +{% else %} + vrf forwarding MGMT +{% endif %} + ip address {{ mgmt_ip }}/{{ mgmt_prefixlen }} + no shutdown +! +{% for name, iface in host['interfaces'].items() %} +interface {{ name }} +{% if name.startswith('Loopback') %} + description LOOPBACK +{% else %} + mtu 9214 + no switchport + no shutdown +{% endif %} +{% if iface['ipv4'] is defined %} + ip address {{ iface['ipv4'] }} +{% endif %} +{% if iface['ipv6'] is defined %} + ipv6 enable + ipv6 address {{ iface['ipv6'] }} + ipv6 nd ra suppress +{% endif %} + no shutdown +! +{% endfor %} +! +interface {{ bp_ifname }} + description backplane + no switchport + no shutdown +{% if host['bp_interface']['ipv4'] is defined %} + ip address {{ host['bp_interface']['ipv4'] }} +{% endif %} +{% if host['bp_interface']['ipv6'] is defined %} + ipv6 enable + ipv6 address {{ host['bp_interface']['ipv6'] }} + ipv6 nd ra suppress +{% endif %} + no shutdown +! +router bgp {{ host['bgp']['asn'] }} + router-id {{ host['interfaces']['Loopback0']['ipv4'] | ipaddr('address') }} + ! + graceful-restart restart-time {{ bgp_gr_timer }} + graceful-restart + ! +{% for asn, remote_ips in host['bgp']['peers'].items() %} +{% for remote_ip in remote_ips %} + neighbor {{ remote_ip }} remote-as {{ asn }} + neighbor {{ remote_ip }} description {{ asn }} +{% if remote_ip | ipv6 %} + address-family ipv6 + neighbor {{ remote_ip }} activate + exit +{% endif %} +{% endfor %} +{% endfor %} + neighbor {{ props.nhipv4 }} remote-as {{ host['bgp']['asn'] }} + neighbor {{ props.nhipv4 }} description exabgp_v4 + neighbor {{ props.nhipv6 }} remote-as {{ host['bgp']['asn'] }} + neighbor {{ props.nhipv6 }} description exabgp_v6 + address-family ipv6 + neighbor {{ props.nhipv6 }} activate + exit + ! +{% for name, iface in host['interfaces'].items() if name.startswith('Loopback') %} +{% if iface['ipv4'] is defined %} + network {{ iface['ipv4'] }} +{% endif %} +{% if iface['ipv6'] is defined %} + network {{ iface['ipv6'] }} +{% endif %} +{% endfor %} +! +management api http-commands + no protocol https + protocol http + no shutdown +! +end diff --git a/ansible/roles/eos/templates/t0-54-po2vlan-leaf.j2 b/ansible/roles/eos/templates/t0-54-po2vlan-leaf.j2 new file mode 120000 index 00000000000..8430cb1debd --- /dev/null +++ b/ansible/roles/eos/templates/t0-54-po2vlan-leaf.j2 @@ -0,0 +1 @@ +t0-leaf.j2 \ No newline at end of file diff --git a/ansible/roles/eos/templates/t0-56-d48c8-leaf.j2 b/ansible/roles/eos/templates/t0-56-d48c8-leaf.j2 new file mode 120000 index 00000000000..8430cb1debd --- /dev/null +++ b/ansible/roles/eos/templates/t0-56-d48c8-leaf.j2 @@ -0,0 +1 @@ +t0-leaf.j2 \ No newline at end of file diff --git a/ansible/roles/eos/templates/t0-8-lag-leaf.j2 b/ansible/roles/eos/templates/t0-8-lag-leaf.j2 new file mode 100644 index 00000000000..3e1d0f62d61 --- /dev/null +++ b/ansible/roles/eos/templates/t0-8-lag-leaf.j2 @@ -0,0 +1,138 @@ +{% set host = configuration[hostname] %} +{% set mgmt_ip = ansible_host %} +{% if vm_type is defined and vm_type == "ceos" %} +{% set mgmt_if_index = 0 %} +{% else %} +{% set mgmt_if_index = 1 %} +{% endif %} +no schedule tech-support +! +{% if vm_type is defined and vm_type == "ceos" %} +agent LicenseManager shutdown +agent PowerFuse shutdown +agent PowerManager shutdown +agent Thermostat shutdown +agent LedPolicy shutdown +agent StandbyCpld shutdown +agent Bfd shutdown +{% endif %} +! +hostname {{ hostname }} +! +vrf definition MGMT + rd 1:1 +! +spanning-tree mode mstp +! +aaa root secret 0 123456 +! +username admin privilege 15 role network-admin secret 0 123456 +! +clock timezone UTC +! +lldp run +lldp management-address Management{{ mgmt_if_index }} +lldp management-address vrf MGMT +! +snmp-server community {{ snmp_rocommunity }} ro +snmp-server vrf MGMT +! +ip routing +ip routing vrf MGMT +ipv6 unicast-routing +! +{% if vm_mgmt_gw is defined %} +ip route vrf MGMT 0.0.0.0/0 {{ vm_mgmt_gw }} +{% else %} +ip route vrf MGMT 0.0.0.0/0 {{ mgmt_gw }} +{% endif %} +! +interface Management {{ mgmt_if_index }} + description TO LAB MGMT SWITCH +{% if vm_type is defined and vm_type == "ceos" %} + vrf MGMT +{% else %} + vrf forwarding MGMT +{% endif %} + ip address {{ mgmt_ip }}/{{ mgmt_prefixlen }} + no shutdown +! +{% for name, iface in host['interfaces'].items() %} +interface {{ name }} +{% if name.startswith('Loopback') %} + description LOOPBACK +{% else %} + mtu 9214 + no switchport + no shutdown +{% endif %} +{% if name.startswith('Port-Channel') %} + port-channel min-links 2 +{% endif %} +{% if iface['lacp'] is defined %} + channel-group {{ iface['lacp'] }} mode active + lacp rate normal +{% endif %} +{% if iface['ipv4'] is defined %} + ip address {{ iface['ipv4'] }} +{% endif %} +{% if iface['ipv6'] is defined %} + ipv6 enable + ipv6 address {{ iface['ipv6'] }} + ipv6 nd ra suppress +{% endif %} + no shutdown +! +{% endfor %} +! +interface {{ bp_ifname }} + description backplane + no switchport + no shutdown +{% if host['bp_interface']['ipv4'] is defined %} + ip address {{ host['bp_interface']['ipv4'] }} +{% endif %} +{% if host['bp_interface']['ipv6'] is defined %} + ipv6 enable + ipv6 address {{ host['bp_interface']['ipv6'] }} + ipv6 nd ra suppress +{% endif %} + no shutdown +! +router bgp {{ host['bgp']['asn'] }} + router-id {{ host['interfaces']['Loopback0']['ipv4'] | ipaddr('address') }} + ! +{% for asn, remote_ips in host['bgp']['peers'].items() %} +{% for remote_ip in remote_ips %} + neighbor {{ remote_ip }} remote-as {{ asn }} + neighbor {{ remote_ip }} description {{ asn }} +{% if remote_ip | ipv6 %} + address-family ipv6 + neighbor {{ remote_ip }} activate + exit +{% endif %} +{% endfor %} +{% endfor %} + neighbor {{ props.nhipv4 }} remote-as {{ host['bgp']['asn'] }} + neighbor {{ props.nhipv4 }} description exabgp_v4 + neighbor {{ props.nhipv6 }} remote-as {{ host['bgp']['asn'] }} + neighbor {{ props.nhipv6 }} description exabgp_v6 + address-family ipv6 + neighbor {{ props.nhipv6 }} activate + exit + ! +{% for name, iface in host['interfaces'].items() if name.startswith('Loopback') %} +{% if iface['ipv4'] is defined %} + network {{ iface['ipv4'] }} +{% endif %} +{% if iface['ipv6'] is defined %} + network {{ iface['ipv6'] }} +{% endif %} +{% endfor %} +! +management api http-commands + no protocol https + protocol http + no shutdown +! +end diff --git a/ansible/roles/eos/templates/t0-backend-leaf.j2 b/ansible/roles/eos/templates/t0-backend-leaf.j2 index 99744676b75..7f5b0e9e8dc 100644 --- a/ansible/roles/eos/templates/t0-backend-leaf.j2 +++ b/ansible/roles/eos/templates/t0-backend-leaf.j2 @@ -71,6 +71,7 @@ interface {{ name }} no shutdown {% else %} {% set is_loopback = false %} + mtu 9214 no switchport no shutdown {% endif %} diff --git a/ansible/roles/eos/templates/t0-leaf-lag-2.j2 b/ansible/roles/eos/templates/t0-leaf-lag-2.j2 index 17a06c08092..3e1d0f62d61 100644 --- a/ansible/roles/eos/templates/t0-leaf-lag-2.j2 +++ b/ansible/roles/eos/templates/t0-leaf-lag-2.j2 @@ -62,6 +62,7 @@ interface {{ name }} {% if name.startswith('Loopback') %} description LOOPBACK {% else %} + mtu 9214 no switchport no shutdown {% endif %} diff --git a/ansible/roles/eos/templates/t0-leaf.j2 b/ansible/roles/eos/templates/t0-leaf.j2 index ffc89e25a8f..0b6ec97538b 100644 --- a/ansible/roles/eos/templates/t0-leaf.j2 +++ b/ansible/roles/eos/templates/t0-leaf.j2 @@ -62,6 +62,7 @@ interface {{ name }} {% if name.startswith('Loopback') %} description LOOPBACK {% else %} + mtu 9214 no switchport no shutdown {% endif %} diff --git a/ansible/roles/eos/templates/t0-mclag-leaf.j2 b/ansible/roles/eos/templates/t0-mclag-leaf.j2 new file mode 100644 index 00000000000..a4fcad4edf3 --- /dev/null +++ b/ansible/roles/eos/templates/t0-mclag-leaf.j2 @@ -0,0 +1,138 @@ +{% set host = configuration[hostname] %} +{% set mgmt_ip = ansible_host %} +{% if vm_type is defined and vm_type == "ceos" %} +{% set mgmt_if_index = 0 %} +{% else %} +{% set mgmt_if_index = 1 %} +{% endif %} +no schedule tech-support +! +{% if vm_type is defined and vm_type == "ceos" %} +agent LicenseManager shutdown +agent PowerFuse shutdown +agent PowerManager shutdown +agent Thermostat shutdown +agent LedPolicy shutdown +agent StandbyCpld shutdown +agent Bfd shutdown +{% endif %} +! +hostname {{ hostname }} +! +vrf definition MGMT + rd 1:1 +! +spanning-tree mode mstp +! +aaa root secret 0 123456 +! +username admin privilege 15 role network-admin secret 0 123456 +! +clock timezone UTC +! +lldp run +lldp management-address Management{{ mgmt_if_index }} +lldp management-address vrf MGMT +! +snmp-server community {{ snmp_rocommunity }} ro +snmp-server vrf MGMT +! +ip routing +ip routing vrf MGMT +ipv6 unicast-routing +! +{% if vm_mgmt_gw is defined %} +ip route vrf MGMT 0.0.0.0/0 {{ vm_mgmt_gw }} +{% else %} +ip route vrf MGMT 0.0.0.0/0 {{ mgmt_gw }} +{% endif %} +! +interface Management {{ mgmt_if_index }} + description TO LAB MGMT SWITCH +{% if vm_type is defined and vm_type == "ceos" %} + vrf MGMT +{% else %} + vrf forwarding MGMT +{% endif %} + ip address {{ mgmt_ip }}/{{ mgmt_prefixlen }} + no shutdown +! +{% for name, iface in host['interfaces'].items() %} +interface {{ name }} +{% if name.startswith('Loopback') %} + description LOOPBACK +{% else %} + mtu 9214 + no switchport + no shutdown +{% endif %} +{% if name.startswith('Port-Channel') %} + port-channel min-links 1 +{% endif %} +{% if iface['lacp'] is defined %} + channel-group {{ iface['lacp'] }} mode active + lacp rate normal +{% endif %} +{% if iface['ipv4'] is defined %} + ip address {{ iface['ipv4'] }} +{% endif %} +{% if iface['ipv6'] is defined %} + ipv6 enable + ipv6 address {{ iface['ipv6'] }} + ipv6 nd ra suppress +{% endif %} + no shutdown +! +{% endfor %} +! +interface {{ bp_ifname }} + description backplane + no switchport + no shutdown +{% if host['bp_interface']['ipv4'] is defined %} + ip address {{ host['bp_interface']['ipv4'] }} +{% endif %} +{% if host['bp_interface']['ipv6'] is defined %} + ipv6 enable + ipv6 address {{ host['bp_interface']['ipv6'] }} + ipv6 nd ra suppress +{% endif %} + no shutdown +! +router bgp {{ host['bgp']['asn'] }} + router-id {{ host['interfaces']['Loopback0']['ipv4'] | ipaddr('address') }} + ! +{% for asn, remote_ips in host['bgp']['peers'].items() %} +{% for remote_ip in remote_ips %} + neighbor {{ remote_ip }} remote-as {{ asn }} + neighbor {{ remote_ip }} description {{ asn }} +{% if remote_ip | ipv6 %} + address-family ipv6 + neighbor {{ remote_ip }} activate + exit +{% endif %} +{% endfor %} +{% endfor %} + neighbor {{ props.nhipv4 }} remote-as {{ host['bgp']['asn'] }} + neighbor {{ props.nhipv4 }} description exabgp_v4 + neighbor {{ props.nhipv6 }} remote-as {{ host['bgp']['asn'] }} + neighbor {{ props.nhipv6 }} description exabgp_v6 + address-family ipv6 + neighbor {{ props.nhipv6 }} activate + exit + ! +{% for name, iface in host['interfaces'].items() if name.startswith('Loopback') %} +{% if iface['ipv4'] is defined %} + network {{ iface['ipv4'] }} +{% endif %} +{% if iface['ipv6'] is defined %} + network {{ iface['ipv6'] }} +{% endif %} +{% endfor %} +! +management api http-commands + no protocol https + protocol http + no shutdown +! +end \ No newline at end of file diff --git a/ansible/roles/eos/templates/t1-64-lag-tor.j2 b/ansible/roles/eos/templates/t1-64-lag-tor.j2 index 90872c0933e..be3dcc64397 100644 --- a/ansible/roles/eos/templates/t1-64-lag-tor.j2 +++ b/ansible/roles/eos/templates/t1-64-lag-tor.j2 @@ -63,6 +63,7 @@ interface {{ name }} {% if name.startswith('Loopback') %} description LOOPBACK {% else %} + mtu 9214 no switchport no shutdown {% endif %} diff --git a/ansible/roles/eos/templates/t1-8-lag-spine.j2 b/ansible/roles/eos/templates/t1-8-lag-spine.j2 index 6e7d28cdd2d..b156e6d51d6 100644 --- a/ansible/roles/eos/templates/t1-8-lag-spine.j2 +++ b/ansible/roles/eos/templates/t1-8-lag-spine.j2 @@ -62,6 +62,7 @@ interface {{ name }} {% if name.startswith('Loopback') %} description LOOPBACK {% else %} + mtu 9214 no switchport {% endif %} {% if name.startswith('Port-Channel') %} diff --git a/ansible/roles/eos/templates/t1-8-lag-tor.j2 b/ansible/roles/eos/templates/t1-8-lag-tor.j2 index b5a91f9ffd3..0becdfa2221 100644 --- a/ansible/roles/eos/templates/t1-8-lag-tor.j2 +++ b/ansible/roles/eos/templates/t1-8-lag-tor.j2 @@ -63,6 +63,7 @@ interface {{ name }} {% if name.startswith('Loopback') %} description LOOPBACK {% else %} + mtu 9214 no switchport {% endif %} {% if name.startswith('Port-Channel') %} diff --git a/ansible/roles/eos/templates/t1-backend-tor.j2 b/ansible/roles/eos/templates/t1-backend-tor.j2 index 99744676b75..7f5b0e9e8dc 100644 --- a/ansible/roles/eos/templates/t1-backend-tor.j2 +++ b/ansible/roles/eos/templates/t1-backend-tor.j2 @@ -71,6 +71,7 @@ interface {{ name }} no shutdown {% else %} {% set is_loopback = false %} + mtu 9214 no switchport no shutdown {% endif %} diff --git a/ansible/roles/eos/templates/t1-lag-spine.j2 b/ansible/roles/eos/templates/t1-lag-spine.j2 index 17a06c08092..3e1d0f62d61 100644 --- a/ansible/roles/eos/templates/t1-lag-spine.j2 +++ b/ansible/roles/eos/templates/t1-lag-spine.j2 @@ -62,6 +62,7 @@ interface {{ name }} {% if name.startswith('Loopback') %} description LOOPBACK {% else %} + mtu 9214 no switchport no shutdown {% endif %} diff --git a/ansible/roles/eos/templates/t1-lag-tor.j2 b/ansible/roles/eos/templates/t1-lag-tor.j2 index 59b635cfaeb..641059da5d7 100644 --- a/ansible/roles/eos/templates/t1-lag-tor.j2 +++ b/ansible/roles/eos/templates/t1-lag-tor.j2 @@ -62,6 +62,7 @@ interface {{ name }} {% if name.startswith('Loopback') %} description LOOPBACK {% else %} + mtu 9214 no switchport no shutdown {% endif %} diff --git a/ansible/roles/eos/templates/t2-core.j2 b/ansible/roles/eos/templates/t2-core.j2 index 97490e1cf16..192236da66e 100644 --- a/ansible/roles/eos/templates/t2-core.j2 +++ b/ansible/roles/eos/templates/t2-core.j2 @@ -62,6 +62,7 @@ interface {{ name }} {% if name.startswith('Loopback') %} description LOOPBACK {% else %} + mtu 9214 no switchport {% endif %} {% if name.startswith('Port-Channel') %} diff --git a/ansible/roles/eos/templates/t2-leaf.j2 b/ansible/roles/eos/templates/t2-leaf.j2 index dc3115434f1..8e22919b356 100644 --- a/ansible/roles/eos/templates/t2-leaf.j2 +++ b/ansible/roles/eos/templates/t2-leaf.j2 @@ -62,6 +62,7 @@ interface {{ name }} {% if name.startswith('Loopback') %} description LOOPBACK {% else %} + mtu 9214 no switchport {% endif %} {% if name.startswith('Port-Channel') %} diff --git a/ansible/roles/eos/templates/t2-vs-core.j2 b/ansible/roles/eos/templates/t2-vs-core.j2 index 09b63eb8771..fcc4a06ca60 100644 --- a/ansible/roles/eos/templates/t2-vs-core.j2 +++ b/ansible/roles/eos/templates/t2-vs-core.j2 @@ -62,6 +62,7 @@ interface {{ name }} {% if name.startswith('Loopback') %} description LOOPBACK {% else %} + mtu 9214 no switchport {% endif %} {% if name.startswith('Port-Channel') %} diff --git a/ansible/roles/eos/templates/t2-vs-leaf.j2 b/ansible/roles/eos/templates/t2-vs-leaf.j2 index dc3115434f1..8e22919b356 100644 --- a/ansible/roles/eos/templates/t2-vs-leaf.j2 +++ b/ansible/roles/eos/templates/t2-vs-leaf.j2 @@ -62,6 +62,7 @@ interface {{ name }} {% if name.startswith('Loopback') %} description LOOPBACK {% else %} + mtu 9214 no switchport {% endif %} {% if name.startswith('Port-Channel') %} diff --git a/ansible/roles/fanout/library/port_config_gen.py b/ansible/roles/fanout/library/port_config_gen.py index 211a2701eea..4df3166192a 100644 --- a/ansible/roles/fanout/library/port_config_gen.py +++ b/ansible/roles/fanout/library/port_config_gen.py @@ -58,7 +58,7 @@ def _get_platform(self): for line in machine_conf: if not line: continue - if "platform" in line: + if "platform" in line and "build_platform" not in line: return line.split("=")[1].strip() raise ValueError("Failed to retrieve platform from '%s'" % self.MACHINE_CONF) @@ -108,10 +108,13 @@ def _read_from_port_config(filepath): port_config = {} with open(filepath) as fd: lines = fd.readlines() - header = lines[0].strip("#\n ") + data_index = 0 + while lines[data_index].startswith("#"): + data_index = data_index + 1 + header = lines[data_index-1].strip("#\n ") keys = header.split() alias_index = keys.index("alias") - for line in lines[1:]: + for line in lines[data_index:]: if not line: continue values = line.split() diff --git a/ansible/roles/fanout/tasks/fanout_eos.yml b/ansible/roles/fanout/tasks/fanout_eos.yml index fb06f2996a6..520f55ff763 100644 --- a/ansible/roles/fanout/tasks/fanout_eos.yml +++ b/ansible/roles/fanout/tasks/fanout_eos.yml @@ -1,5 +1,14 @@ +- name: set login to tacacs if tacacs is defined + set_fact: ansible_ssh_user={{ fanout_tacacs_eos_user }} ansible_ssh_pass={{ fanout_tacacs_eos_password }} + when: > + fanout_tacacs_user is not defined and fanout_tacacs_user is not defined and + fanout_tacacs_eos_user is defined and fanout_tacacs_eos_password is defined + - name: prepare fanout switch admin login info set_fact: ansible_ssh_user={{ fanout_admin_user }} ansible_ssh_pass={{ fanout_admin_password }} + when: > + fanout_tacacs_user is not defined and fanout_tacacs_user is not defined and + fanout_tacacs_eos_user is not defined and fanout_tacacs_eos_password is not defined - name: create persistent shell login file: path=/mnt/flash/rc.eos state=touch diff --git a/ansible/roles/fanout/tasks/fanout_mlnx.yml b/ansible/roles/fanout/tasks/fanout_mlnx.yml index 663b1f76f8f..b0c95b952ff 100644 --- a/ansible/roles/fanout/tasks/fanout_mlnx.yml +++ b/ansible/roles/fanout/tasks/fanout_mlnx.yml @@ -6,8 +6,22 @@ ### specified in this playbook, you would need to come up with your own fanout switch deployment ### playbook ################################################################################################ +- name: set login to tacacs if tacacs is defined + set_fact: ansible_ssh_user={{ fanout_tacacs_mlnx_user }} ansible_ssh_pass={{ fanout_tacacs_mlnx_password }} + when: > + fanout_tacacs_user is not defined and fanout_tacacs_user is not defined and + fanout_tacacs_mlnx_user is defined and fanout_tacacs_mlnx_password is defined + tags: always + - name: prepare fanout switch admin login info - set_fact: ansible_ssh_user={{ fanout_mlnx_user }} ansible_ssh_pass={{ fanout_mlnx_password }} peer_hwsku={{device_info[inventory_hostname]['HwSku']}} + set_fact: ansible_ssh_user={{ fanout_mlnx_user }} ansible_ssh_pass={{ fanout_mlnx_password }} + when: > + fanout_tacacs_user is not defined and fanout_tacacs_user is not defined and + fanout_tacacs_mlnx_user is not defined and fanout_tacacs_mlnx_password is not defined + tags: always + +- name: prepare peer hwsku + set_fact: peer_hwsku={{ device_info[inventory_hostname]['HwSku'] }} tags: always ########################################## diff --git a/ansible/roles/fanout/tasks/fanout_sonic.yml b/ansible/roles/fanout/tasks/fanout_sonic.yml index 640a488bcc9..1e975443f1e 100644 --- a/ansible/roles/fanout/tasks/fanout_sonic.yml +++ b/ansible/roles/fanout/tasks/fanout_sonic.yml @@ -1,7 +1,16 @@ - debug: msg="{{ device_info[inventory_hostname] }}" +- name: set login to tacacs if tacacs is defined + set_fact: ansible_ssh_user={{ fanout_tacacs_sonic_user }} ansible_ssh_pass={{ fanout_tacacs_sonic_password }} + when: > + fanout_tacacs_user is not defined and fanout_tacacs_user is not defined and + fanout_tacacs_sonic_user is defined and fanout_tacacs_sonic_password is defined + - name: prepare fanout switch admin login info - set_fact: ansible_ssh_user={{ fanout_sonic_user }} ansible_ssh_password={{ fanout_sonic_password }} + set_fact: ansible_ssh_user={{ fanout_sonic_user }} ansible_ssh_pass={{ fanout_sonic_password }} + when: > + fanout_tacacs_user is not defined and fanout_tacacs_user is not defined and + fanout_tacacs_sonic_user is not defined and fanout_tacacs_sonic_password is not defined - name: retrieve SONiC version shell: cat /etc/sonic/sonic_version.yml diff --git a/ansible/roles/fanout/tasks/main.yml b/ansible/roles/fanout/tasks/main.yml index 4847930838b..9543ee3cbf3 100644 --- a/ansible/roles/fanout/tasks/main.yml +++ b/ansible/roles/fanout/tasks/main.yml @@ -13,6 +13,13 @@ - set_fact: sw_type="{{ device_info[inventory_hostname]['Type'] }}" +# fanout_tacacs_user can override fanout_tacacs_sonic_user, +# fanout_tacacs_sonic_user can override fanout_sonic_user +- name: set login info if fanout_tacacs_user and fanout_tacacs_password is defined + set_fact: ansible_ssh_user={{ fanout_tacacs_user }} ansible_ssh_pass={{ fanout_tacacs_password }} + when: fanout_tacacs_user is defined and fanout_tacacs_password is defined + tags: always + - set_fact: os='eos' when: os is not defined tags: always diff --git a/ansible/roles/fanout/tasks/rootfanout_connect.yml b/ansible/roles/fanout/tasks/rootfanout_connect.yml index 131a74e2b54..7498b1f284a 100644 --- a/ansible/roles/fanout/tasks/rootfanout_connect.yml +++ b/ansible/roles/fanout/tasks/rootfanout_connect.yml @@ -30,7 +30,7 @@ - name: Collect DUTs vlans set_fact: dev_vlans: "{{ dev_vlans|default([]) + item.value }}" - loop: "{{ devinfo['ansible_facts']['device_vlan_range'] | dict2items }}" + loop: "{{ devinfo['ansible_facts']['device_vlan_range'] | default ({}) | dict2items }}" - name: Find the root fanout switch set_fact: diff --git a/ansible/roles/fanout/templates/arista_7260_connect.j2 b/ansible/roles/fanout/templates/arista_7260_connect.j2 index 5717322c87e..94d6003194c 100644 --- a/ansible/roles/fanout/templates/arista_7260_connect.j2 +++ b/ansible/roles/fanout/templates/arista_7260_connect.j2 @@ -3,13 +3,13 @@ config {% if deploy_leaf %} vlan {{ dev_vlans | list | join(',') }} {% endif %} -{% for i in range(1,65) %} -{% set intf = 'Ethernet' + i|string %} -{% if intf in root_conn %} + +{% for intf in root_conn %} {% set peer_dev = root_conn[intf]['peerdevice'] %} {% set peer_port = root_conn[intf]['peerport'] %} +{% set peer_speed = root_conn[intf]['speed'] %} {% if peer_dev in lab_devices and 'Fanout' not in lab_devices[peer_dev]['Type'] and not deploy_leaf %} - interface {{ intf }} +interface {{ intf }} switchport switchport trunk allowed vlan remove {{ dev_vlans | list | join(',') }} {% if peer_dev == server and peer_port == server_port %} @@ -19,15 +19,20 @@ vlan {{ dev_vlans | list | join(',') }} {% endif %} {% endif %} {% if peer_dev in lab_devices and 'Fanout' in lab_devices[peer_dev]['Type'] and deploy_leaf %} - interface {{ intf }} +interface {{ intf }} switchport switchport trunk allowed vlan remove {{ dev_vlans | list | join(',') }} {% if peer_dev == leaf_name %} description {{ peer_dev }}-{{ peer_port }} + speed forced {{ peer_speed }}full switchport mode trunk - switchport trunk allowed vlan add {{ dev_vlans | list | join(',') }} + switchport trunk allowed vlan {{ dev_vlans | list | join(',') }} no shutdown -{% endif %} +{% if peer_speed == "100000" %} + error-correction encoding reed-solomon +{% else %} + no error-correction encoding +{% endif %} {% endif %} {% endif %} {% endfor %} diff --git a/ansible/roles/test/files/acstests/router_utils.py b/ansible/roles/test/files/acstests/router_utils.py index 222a598db32..806ec6b2931 100644 --- a/ansible/roles/test/files/acstests/router_utils.py +++ b/ansible/roles/test/files/acstests/router_utils.py @@ -4,7 +4,6 @@ from ptf.testutils import * from ptf.mask import Mask -import ipaddress import pprint import ipaddress diff --git a/ansible/roles/test/files/helpers/bfd_responder.py b/ansible/roles/test/files/helpers/bfd_responder.py new file mode 100644 index 00000000000..df5d6ed203b --- /dev/null +++ b/ansible/roles/test/files/helpers/bfd_responder.py @@ -0,0 +1,184 @@ +import binascii +import socket +import struct +import select +import json +import argparse +import os.path +from fcntl import ioctl +import logging +import scapy.all as scapy2 +from scapy.contrib.bfd import BFD +logging.getLogger("scapy.runtime").setLevel(logging.ERROR) +scapy2.conf.use_pcap = True + + +def get_if(iff, cmd): + s = socket.socket() + ifreq = ioctl(s, cmd, struct.pack("16s16x", iff)) + s.close() + return ifreq + + +def get_mac(iff): + SIOCGIFHWADDR = 0x8927 # Get hardware address + return get_if(iff, SIOCGIFHWADDR)[18:24] + + +class Interface(object): + + def __init__(self, iface): + self.iface = iface + self.socket = None + self.mac_address = get_mac(iface) + + def __del__(self): + if self.socket: + self.socket.close() + + def bind(self): + self.socket = scapy2.conf.L2listen(iface=self.iface, filter="udp port 4784") + + def handler(self): + return self.socket + + def recv(self): + sniffed = self.socket.recv() + pkt = sniffed[0] + str_pkt = str(pkt).encode("HEX") + binpkt = binascii.unhexlify(str_pkt) + return binpkt + + def send(self, data): + scapy2.sendp(data, iface=self.iface) + + def mac(self): + return self.mac_address + + def name(self): + return self.iface + + +class Poller(object): + def __init__(self, interfaces, responder): + self.responder = responder + self.mapping = {} + for interface in interfaces: + self.mapping[interface.handler()] = interface + + def poll(self): + handlers = self.mapping.keys() + while True: + (rdlist, _, _) = select.select(handlers, [], []) + for handler in rdlist: + self.responder.action(self.mapping[handler]) + + +class BFDResponder(object): + def __init__(self, sessions): + self.sessions = sessions + return + + def action(self, interface): + data = interface.recv() + mac_src, mac_dst, ip_src, ip_dst, bfd_remote_disc, bfd_state = self.extract_bfd_info(data) + if ip_dst not in self.sessions: + return + session = self.sessions[ip_dst] + if bfd_state == 3: + interface.send(session["pkt"]) + return + + if bfd_state == 2: + return + session["other_disc"] = bfd_remote_disc + bfd_pkt_init = self.craft_bfd_packet(session, data, mac_src, mac_dst, ip_src, ip_dst, bfd_remote_disc, 2) + bfd_pkt_init.payload.payload.chksum = None + interface.send(bfd_pkt_init) + bfd_pkt_init.payload.payload.payload.load.sta = 3 + bfd_pkt_init.payload.payload.chksum = None + session["pkt"] = bfd_pkt_init + return + + def extract_bfd_info(self, data): + # remote_mac, remote_ip, request_ip, op_type + ether = scapy2.Ether(data) + mac_src = ether.src + mac_dst = ether.dst + ip_src = ether.payload.src + ip_dst = ether.payload.dst + bfdpkt = BFD(ether.payload.payload.payload.load) + bfd_remote_disc = bfdpkt.my_discriminator + bfd_state = bfdpkt.sta + return mac_src, mac_dst, ip_src, ip_dst, bfd_remote_disc, bfd_state + + def craft_bfd_packet(self, session, data, mac_src, mac_dst, ip_src, ip_dst, bfd_remote_disc, bfd_state): + ethpart = scapy2.Ether(data) + bfdpart = BFD(ethpart.payload.payload.payload.load) + bfdpart.my_discriminator = session["my_disc"] + bfdpart.your_discriminator = bfd_remote_disc + bfdpart.sta = bfd_state + + ethpart.payload.payload.payload.load = bfdpart + ethpart.src = mac_dst + ethpart.dst = mac_src + ethpart.payload.src = ip_dst + ethpart.payload.dst = ip_src + return ethpart + + +def parse_args(): + parser = argparse.ArgumentParser(description='ARP autoresponder') + parser.add_argument('--conf', '-c', type=str, dest='conf', default='/tmp/from_t1.json', + help='path to json file with configuration') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + + if not os.path.exists(args.conf): + print("Can't find file %s" % args.conf) + return + + with open(args.conf) as fp: + data = json.load(fp) + + # generate ip_sets. every ip address will have it's own uniq mac address + sessions = {} + local_disc_base = 0xcdba0000 + local_src_port = 14000 + ifaces = {} + for bfd in data: + curr_session = {} + curr_session["local"] = bfd["local_addr"] + curr_session["remote"] = bfd["neighbor_addr"] + curr_session["intf"] = bfd["ptf_intf"] + curr_session["multihop"] = bfd["multihop"] + curr_session["my_disc"] = local_disc_base + curr_session["other_disc"] = 0x00 + curr_session["mac"] = get_mac(str(bfd["ptf_intf"])) + curr_session["src_port"] = local_src_port + curr_session["pkt"] = "" + if bfd["ptf_intf"] not in ifaces: + ifaces[curr_session["intf"]] = curr_session["mac"] + + local_disc_base += 1 + local_src_port += 1 + sessions[curr_session["local"]] = curr_session + ifaceobjs = [] + for iface_name in ifaces.keys(): + iface = Interface(str(iface_name)) + iface.bind() + ifaceobjs.append(iface) + + resp = BFDResponder(sessions) + + p = Poller(ifaceobjs, resp) + p.poll() + return + + +if __name__ == '__main__': + main() diff --git a/ansible/roles/test/files/ptftests/IP_decap_test.py b/ansible/roles/test/files/ptftests/IP_decap_test.py index 1412c95425d..78e38cee08c 100644 --- a/ansible/roles/test/files/ptftests/IP_decap_test.py +++ b/ansible/roles/test/files/ptftests/IP_decap_test.py @@ -98,14 +98,15 @@ def setUp(self): self.ttl_mode = self.test_params.get('ttl_mode') self.ignore_ttl = self.test_params.get('ignore_ttl', False) self.single_fib = self.test_params.get('single_fib_for_duts', False) - + self.asic_type = self.test_params.get('asic_type') # multi asic platforms have internal routing hops # this param will be used to set the correct ttl values for inner packet # this value is zero for single asic platform self.max_internal_hops = self.test_params.get('max_internal_hops', 0) if self.max_internal_hops: self.TTL_RANGE = list(range(self.max_internal_hops + 1, 63)) - + if self.asic_type == "marvell": + fib.EXCLUDE_IPV4_PREFIXES.append("240.0.0.0/4") self.fibs = [] for fib_info_file in self.test_params.get('fib_info_files'): self.fibs.append(fib.Fib(fib_info_file)) @@ -314,7 +315,6 @@ def send_and_verify(self, dst_ip, expected_ports, src_port, dut_index, outer_pkt @outer_ttl: TTL for the outer layer @inner_ttl: TTL for the inner layer ''' - pkt, exp_pkt = self.create_encap_packet(dst_ip, src_port, dut_index, outer_pkt_type, triple_encap, outer_ttl, inner_ttl) masked_exp_pkt = Mask(exp_pkt) masked_exp_pkt.set_do_not_care_scapy(scapy.Ether, "dst") @@ -470,9 +470,9 @@ def run_encap_combination_test(self, outer_pkt_type, inner_pkt_type): def check_range(self, ip_range, outer_pkt_type, inner_pkt_type, dut_index): dst_ips = [] dst_ips.append(ip_range.get_first_ip()) - if ip_range.length > 1: + if ip_range.length() > 1: dst_ips.append(ip_range.get_last_ip()) - if ip_range.length > 2: + if ip_range.length() > 2: dst_ips.append(ip_range.get_random_ip()) logging.info('Checking dst_ips={}'.format(dst_ips)) diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 57eb6660463..7dc4af07faa 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -139,9 +139,10 @@ def __init__(self): self.check_param('vlan_ports_file', '', required=True) self.check_param('ports_file', '', required=True) self.check_param('dut_mac', '', required=True) + self.check_param('vlan_mac', '', required=True) self.check_param('default_ip_range', '', required=True) self.check_param('vlan_ip_range', '', required=True) - self.check_param('lo_prefix', '10.1.0.32/32', required=False) + self.check_param('lo_prefix', '', required=False) self.check_param('lo_v6_prefix', 'fc00:1::/64', required=False) self.check_param('arista_vms', [], required=True) self.check_param('min_bgp_gr_timeout', 15, required=False) @@ -246,6 +247,9 @@ def __init__(self): alt_password=self.test_params.get('alt_password') ) + self.sender_thr = threading.Thread(target=self.send_in_background) + self.sniff_thr = threading.Thread(target=self.sniff_in_background) + # Check if platform type is kvm stdout, stderr, return_code = self.dut_connection.execCommand("show platform summary | grep Platform | awk '{print $2}'") platform_type = str(stdout[0]).replace('\n', '') @@ -437,7 +441,7 @@ def build_vlan_if_port_mapping(self): portchannel_names = [pc['name'] for pc in portchannel_content.values()] vlan_content = self.read_json('vlan_ports_file') - + vlan_if_port = [] for vlan in self.vlan_ip_range: for ifname in vlan_content[vlan]['members']: @@ -556,6 +560,8 @@ def setUp(self): if self.reboot_type in ['soft-reboot', 'reboot']: raise ValueError('Not supported reboot_type %s' % self.reboot_type) self.dut_mac = self.test_params['dut_mac'] + self.vlan_mac = self.test_params['vlan_mac'] + self.lo_prefix = self.test_params['lo_prefix'] if self.kvm_test: self.log("This test is for KVM platform") @@ -590,6 +596,7 @@ def setUp(self): self.log("DUT ssh: %s@%s" % (self.test_params['dut_username'], self.test_params['dut_hostname'])) self.log("DUT reboot limit in seconds: %s" % self.limit) self.log("DUT mac address: %s" % self.dut_mac) + self.log("DUT vlan mac address: %s" % self.vlan_mac) self.log("From server src addr: %s" % self.from_server_src_addr) self.log("From server src port: %s" % self.from_server_src_port) @@ -715,8 +722,8 @@ def generate_from_t1(self): def generate_from_vlan(self): packet = simple_tcp_packet( - eth_dst=self.dut_mac, eth_src=self.from_server_src_mac, + eth_dst=self.vlan_mac, ip_src=self.from_server_src_addr, ip_dst=self.from_server_dst_addr, tcp_dport=5000 @@ -736,17 +743,17 @@ def generate_from_vlan(self): def generate_ping_dut_lo(self): self.ping_dut_packets = [] - dut_lo_ipv4 = self.test_params['lo_prefix'].split('/')[0] + dut_lo_ipv4 = self.lo_prefix.split('/')[0] for src_port in self.vlan_host_ping_map: src_addr = random.choice(self.vlan_host_ping_map[src_port].keys()) src_mac = self.hex_to_mac(self.vlan_host_ping_map[src_port][src_addr]) packet = simple_icmp_packet(eth_src=src_mac, - eth_dst=self.dut_mac, + eth_dst=self.vlan_mac, ip_src=src_addr, ip_dst=dut_lo_ipv4) self.ping_dut_packets.append((src_port, str(packet))) - exp_packet = simple_icmp_packet(eth_src=self.dut_mac, + exp_packet = simple_icmp_packet(eth_src=self.vlan_mac, ip_src=dut_lo_ipv4, icmp_type='echo-reply') @@ -754,7 +761,7 @@ def generate_ping_dut_lo(self): ip_src=self.from_server_src_addr, ip_dst=dut_lo_ipv4) - self.ping_dut_exp_packet = Mask(exp_packet) + self.ping_dut_exp_packet = Mask(exp_packet) self.ping_dut_exp_packet.set_do_not_care_scapy(scapy.Ether, "dst") self.ping_dut_exp_packet.set_do_not_care_scapy(scapy.IP, "dst") self.ping_dut_exp_packet.set_do_not_care_scapy(scapy.IP, "id") @@ -955,6 +962,11 @@ def handle_fast_reboot_health_check(self): self.no_routing_stop = self.reboot_start def handle_warm_reboot_health_check(self): + # wait until sniffer and sender threads have started + while not (self.sniff_thr.isAlive() and self.sender_thr.isAlive()): + time.sleep(1) + + self.log("IO sender and sniffer threads have started, wait until completion") self.sniff_thr.join() self.sender_thr.join() @@ -1277,14 +1289,25 @@ def reboot_dut(self): if not self.kvm_test and\ (self.reboot_type == 'fast-reboot' or 'warm-reboot' in self.reboot_type): - self.sender_thr = threading.Thread(target = self.send_in_background) - self.sniff_thr = threading.Thread(target = self.sniff_in_background) self.sniffer_started = threading.Event() # Event for the sniff_in_background status. self.sniff_thr.start() self.sender_thr.start() self.log("Rebooting remote side") - stdout, stderr, return_code = self.dut_connection.execCommand("sudo " + self.reboot_type, timeout=30) + reboot_command = self.reboot_type + # create an empty log file to capture output of reboot command + reboot_log_file = "/host/{}.log".format(reboot_command.replace(' ', '')) + self.dut_connection.execCommand("sudo touch {}; sudo chmod 666 {}".format( + reboot_log_file, reboot_log_file)) + + # execute reboot command w/ nohup so that when the execCommand times-out: + # 1. there is a reader/writer for any bash commands using PIPE + # 2. the output and error of CLI still gets written to log file + stdout, stderr, return_code = self.dut_connection.execCommand( + "nohup sudo {} -v &> {}".format( + reboot_command, reboot_log_file), timeout=10) + + if stdout != []: self.log("stdout from %s: %s" % (self.reboot_type, str(stdout))) if stderr != []: @@ -1317,20 +1340,30 @@ def peer_state_check(self, ip, queue): self.log('SSH thread for VM {} finished'.format(ip)) lacp_pdu_times = self.lacp_pdu_times[ip] - lacp_pdu_down_times = lacp_pdu_times.get("lacp_down") - lacp_pdu_up_times = lacp_pdu_times.get("lacp_up") - lacp_pdu_before_reboot = float(lacp_pdu_down_times[-1]) if\ - lacp_pdu_down_times and len(lacp_pdu_down_times) > 0 else None - lacp_pdu_after_reboot = float(lacp_pdu_up_times[-1]) if\ - lacp_pdu_up_times and len(lacp_pdu_up_times) > 0 else None - if 'warm-reboot' in self.reboot_type and lacp_pdu_before_reboot and lacp_pdu_after_reboot: - lacp_time_diff = lacp_pdu_after_reboot - lacp_pdu_before_reboot - if lacp_time_diff >= 90 and not self.kvm_test: + lacp_pdu_all_times = lacp_pdu_times.get("lacp_all") + + self.log('lacp_pdu_all_times: IP:{}: {}'.format(ip, lacp_pdu_all_times)) + + # in the list of all LACPDUs received by T1, find the largest time gap between two consecutive LACPDUs + max_lacp_session_wait = None + if lacp_pdu_all_times and len(lacp_pdu_all_times) > 1: + lacp_pdu_all_times.sort() + max_lacp_session_wait = 0 + prev_time = lacp_pdu_all_times[0] + for new_time in lacp_pdu_all_times[1:]: + lacp_session_wait = new_time - prev_time + if lacp_session_wait > max_lacp_session_wait: + max_lacp_session_wait = lacp_session_wait + prev_time = new_time + + if 'warm-reboot' in self.reboot_type: + if max_lacp_session_wait and max_lacp_session_wait >= 90 and not self.kvm_test: self.fails['dut'].add("LACP session likely terminated by neighbor ({})".format(ip) +\ - " post-reboot lacpdu came after {}s of lacpdu pre-boot".format(lacp_time_diff)) - else: - lacp_time_diff = None - self.lacp_session_pause[ip] = lacp_time_diff + " post-reboot lacpdu came after {}s of lacpdu pre-boot".format(max_lacp_session_wait)) + elif not max_lacp_session_wait and not self.kvm_test: + self.fails['dut'].add("LACP session timing not captured") + + self.lacp_session_pause[ip] = max_lacp_session_wait def wait_until_cpu_port_down(self, signal): @@ -1436,19 +1469,6 @@ def scapy_sniff(self, wait=300, sniff_filter=''): self.packets = scapyall.rdpcap(capture_pcap) self.log("Number of all packets captured: {}".format(len(self.packets))) - def send_and_sniff(self): - """ - This method starts two background threads in parallel: - one for sending, another for collecting the sent packets. - """ - self.sender_thr = threading.Thread(target = self.send_in_background) - self.sniff_thr = threading.Thread(target = self.sniff_in_background) - self.sniffer_started = threading.Event() # Event for the sniff_in_background status. - self.sniff_thr.start() - self.sender_thr.start() - self.sniff_thr.join() - self.sender_thr.join() - def check_tcp_payload(self, packet): """ This method is used by examine_flow() method. @@ -1464,12 +1484,15 @@ def no_flood(self, packet): """ This method filters packets which are unique (i.e. no floods). """ - if (not int(str(packet[scapyall.TCP].payload)) in self.unique_id) and (packet[scapyall.Ether].src == self.dut_mac): + if (not int(str(packet[scapyall.TCP].payload)) in self.unique_id) and \ + (packet[scapyall.Ether].src == self.dut_mac or packet[scapyall.Ether].src == self.vlan_mac): # This is a unique (no flooded) received packet. + # for dualtor, t1->server rcvd pkt will have src MAC as vlan_mac, and server->t1 rcvd pkt will have src MAC as dut_mac self.unique_id.append(int(str(packet[scapyall.TCP].payload))) return True - elif packet[scapyall.Ether].dst == self.dut_mac: + elif packet[scapyall.Ether].dst == self.dut_mac or packet[scapyall.Ether].dst == self.vlan_mac: # This is a sent packet. + # for dualtor, t1->server sent pkt will have dst MAC as dut_mac, and server->t1 sent pkt will have dst MAC as vlan_mac return True else: return False @@ -1535,14 +1558,18 @@ def examine_flow(self, filename = None): missed_t1_to_vlan = 0 self.disruption_start, self.disruption_stop = None, None for packet in packets: - if packet[scapyall.Ether].dst == self.dut_mac: + if packet[scapyall.Ether].dst == self.dut_mac or packet[scapyall.Ether].dst == self.vlan_mac: # This is a sent packet - keep track of it as payload_id:timestamp. + # for dualtor both MACs are needed: + # t1->server sent pkt will have dst MAC as dut_mac, and server->t1 sent pkt will have dst MAC as vlan_mac sent_payload = int(str(packet[scapyall.TCP].payload)) sent_packets[sent_payload] = packet.time sent_counter += 1 continue - if packet[scapyall.Ether].src == self.dut_mac: + if packet[scapyall.Ether].src == self.dut_mac or packet[scapyall.Ether].src == self.vlan_mac: # This is a received packet. + # for dualtor both MACs are needed: + # t1->server rcvd pkt will have src MAC as vlan_mac, and server->t1 rcvd pkt will have src MAC as dut_mac received_time = packet.time received_payload = int(str(packet[scapyall.TCP].payload)) if (received_payload % 5) == 0 : # From vlan to T1. @@ -1557,6 +1584,8 @@ def examine_flow(self, filename = None): continue if received_payload - prev_payload > 1: # Packets in a row are missing, a disruption. + self.log("received_payload: {}, prev_payload: {}, sent_counter: {}, received_counter: {}".format( + received_payload, prev_payload, sent_counter, received_counter)) lost_id = (received_payload -1) - prev_payload # How many packets lost in a row. disrupt = (sent_packets[received_payload] - sent_packets[prev_payload + 1]) # How long disrupt lasted. # Add disrupt to the dict: @@ -1667,7 +1696,7 @@ def wait_dut_to_warm_up(self): up_time = None if elapsed > warm_up_timeout_secs: - raise Exception("Control plane didn't come up within warm up timeout") + raise Exception("IO didn't come up within warm up timeout. Control plane: {}, Data plane: {}".format(ctrlplane, dataplane)) time.sleep(1) # check until flooding is over. Flooding happens when FDB entry of diff --git a/ansible/roles/test/files/ptftests/arista.py b/ansible/roles/test/files/ptftests/arista.py index 0861c138d2b..6300e5ecc8a 100644 --- a/ansible/roles/test/files/ptftests/arista.py +++ b/ansible/roles/test/files/ptftests/arista.py @@ -54,8 +54,7 @@ def __init__(self, ip, queue, test_params, log_cb=None, login='admin', password= self.min_bgp_gr_timeout = int(test_params['min_bgp_gr_timeout']) self.reboot_type = test_params['reboot_type'] self.bgp_v4_v6_time_diff = test_params['bgp_v4_v6_time_diff'] - self.lacp_pdu_time_on_down = list() - self.lacp_pdu_time_on_up = list() + self.lacp_pdu_timings = list() def __del__(self): self.disconnect() @@ -161,7 +160,6 @@ def run(self): portchannel_output = "\n".join(portchannel_output.split("\r\n")[1:-1]) sample["po_changetime"] = json.loads(portchannel_output, strict=False)['interfaces']['Port-Channel1']['lastStatusChangeTimestamp'] samples[cur_time] = sample - collect_lacppdu_time = False while not (quit_enabled and v4_routing_ok and v6_routing_ok): cmd = None @@ -172,17 +170,11 @@ def run(self): if cmd == 'quit': quit_enabled = True continue - if cmd == 'cpu_down': + + if (cmd == 'cpu_down' or cmd == 'cpu_going_up' or cmd == 'cpu_up'): last_lacppdu_time_before_reboot = self.check_last_lacppdu_time() - self.lacp_pdu_time_on_down.append(last_lacppdu_time_before_reboot) - if cmd == 'cpu_up' or collect_lacppdu_time: - # control plane is back up, start polling for new lacp-pdu - last_lacppdu_time_after_reboot = self.check_last_lacppdu_time() - if last_lacppdu_time_after_reboot != last_lacppdu_time_before_reboot: - self.lacp_pdu_time_on_up.append(last_lacppdu_time_after_reboot) - collect_lacppdu_time = False # post-reboot lacp-pdu is received, stop the polling - else: # Until post-reboot lacp-pdu is not received, keep polling for it - collect_lacppdu_time = True + if last_lacppdu_time_before_reboot is not None: + self.lacp_pdu_timings.append(last_lacppdu_time_before_reboot) cur_time = time.time() info = {} @@ -286,7 +278,9 @@ def run(self): self.fails.add(msg) self.log('Finishing run()') - return self.fails, self.info, cli_data, log_data, {"lacp_down": self.lacp_pdu_time_on_down, "lacp_up": self.lacp_pdu_time_on_up} + return self.fails, self.info, cli_data, log_data, { + "lacp_all": list(set(self.lacp_pdu_timings)) + } def extract_from_logs(self, regexp, data): raw_data = [] diff --git a/ansible/roles/test/files/ptftests/bfd_responder.py b/ansible/roles/test/files/ptftests/bfd_responder.py new file mode 100644 index 00000000000..59aa8f303fc --- /dev/null +++ b/ansible/roles/test/files/ptftests/bfd_responder.py @@ -0,0 +1,169 @@ +# PTF bfd responder. Responds to any BFD packet that we received. +# Uses a monitor file as input. The monitor file has 2 lines: +# Line 1: list of port indices to monitor +# Line 2: list of ip addresses to respond to. + +import ptf +import time +import ptf.packet as scapy +from ptf.base_tests import BaseTest +from scapy.contrib.bfd import BFD +from ptf.testutils import (send_packet, test_params_get) +from ipaddress import ip_address, IPv4Address, IPv6Address +session_timeout = 1 + + +class BFD_Responder(BaseTest): + def __init__(self): + BaseTest.__init__(self) + self.DEFAULT_PKT_LEN = 100 + self.sessions = {} + self.local_disc_base = 0xcdba0000 + self.local_src_port = 14000 + + def setUp(self): + self.dataplane = ptf.dataplane_instance + self.test_params = test_params_get() + self.dut_mac = self.test_params['dut_mac'] + self.dut_loop_ips = self.test_params['dut_loop_ips'] + for ipaddr in self.dut_loop_ips: + if isinstance(ip_address(ipaddr.decode()), IPv4Address): + self.dut_loop_ipv4 = ipaddr + if isinstance(ip_address(ipaddr.decode()), IPv6Address): + self.dut_loop_ipv6 = ipaddr + self.monitor_file = self.test_params['monitor_file'] + + def respond_to_packet(self, port_number, received_pkt): + received_pkt = scapy.Ether(received_pkt) + args = {} + args['dst_mac'] = received_pkt['Ether'].dst + args['version'] = received_pkt['BFD'].version + args['diag'] = received_pkt['BFD'].diag + args['sta'] = received_pkt['BFD'].sta + args['flags'] = received_pkt['BFD'].flags + args['detect_multi'] = received_pkt['BFD'].detect_multi + args['len'] = received_pkt['BFD'].len + args['my_discriminator'] = received_pkt['BFD'].my_discriminator + args['your_discriminator'] = received_pkt['BFD'].your_discriminator + args['min_tx_interval'] = received_pkt['BFD'].min_tx_interval + args['min_rx_interval'] = received_pkt['BFD'].min_rx_interval + args['echo_rx_interval'] = received_pkt['BFD'].echo_rx_interval + + pkt = BFD(args) + count = send_packet(self, port_number, str(pkt)) + if count == 0: + raise RuntimeError( + "send_packet failed args:port_number{}, " + "dp_tuple:{}".format(port_number, str(pkt))) + + def runTest(self): + while True: + valid_monit_file = True + with open(self.monitor_file) as fd: + full_strings = fd.readlines() + try: + ports_to_monitor = full_strings[0].strip() + all_monitored_addresses = full_strings[1].strip() + except IndexError: + valid_monit_file = False + if ports_to_monitor == "" or all_monitored_addresses == "": + valid_monit_file = False + + if not valid_monit_file: + time.sleep(1) + continue + ports_to_monitor = [int(x) for x in ports_to_monitor.split(',')] + all_monitored_addresses = all_monitored_addresses.split(',') + + result = self.dataplane.poll(device_number=0, timeout=0.1) + if not isinstance(result, self.dataplane.PollSuccess) or \ + result.port not in ports_to_monitor or \ + "UDP" not in scapy.Ether(result.packet): + continue + if scapy.Ether(result.packet)['UDP'].dport != 4784: + continue + received_pkt = result.packet + port_number = result.port + mac_src, mac_dst, ip_src, ip_dst, bfd_remote_disc, bfd_state = \ + self.extract_bfd_info(received_pkt) + if ip_dst not in all_monitored_addresses: + continue + try: + session = self.sessions[ip_dst] + except KeyError: + self.sessions[ip_dst] = {} + + if bfd_state == 3: + count = send_packet(self, result.port, str(session["pkt"])) + if count == 0: + raise RuntimeError( + "send_packet failed args:port_number{}, " + "dp_tuple:{}".format(port_number, str(session['pkt']))) + + if bfd_state == 2: + continue + + session = {} + session['addr'] = ip_dst + session['remote_addr'] = ip_src + session['intf'] = result.port + session['multihop'] = True + session['mac'] = mac_dst + session['pkt'] = '' + session["src_port"] = self.local_src_port + self.local_disc_base += 1 + self.local_src_port += 1 + session['my_disc'] = self.local_disc_base + session["other_disc"] = bfd_remote_disc + + bfd_pkt_init = self.craft_bfd_packet( + session['my_disc'], + received_pkt, + mac_src, + mac_dst, + ip_src, + ip_dst, + bfd_remote_disc, + 2) + count = send_packet(self, session['intf'], str(bfd_pkt_init)) + if count == 0: + raise RuntimeError( + "send_packet failed args:port_number{}, " + "dp_tuple:{}".format(port_number, str(bfd_pkt_init))) + bfd_pkt_init.payload.payload.payload.load.sta = 3 + session["pkt"] = bfd_pkt_init + self.sessions[ip_dst] = session + + def extract_bfd_info(self, data): + # remote_mac, remote_ip, request_ip, op_type + ether = scapy.Ether(data) + mac_src = ether.src + mac_dst = ether.dst + ip_src = ether.payload.src + ip_dst = ether.payload.dst + bfdpkt = BFD(ether.payload.payload.payload.load) + bfd_remote_disc = bfdpkt.my_discriminator + bfd_state = bfdpkt.sta + return mac_src, mac_dst, ip_src, ip_dst, bfd_remote_disc, bfd_state + + def craft_bfd_packet(self, + my_discriminator, + data, + mac_src, + mac_dst, + ip_src, + ip_dst, + bfd_remote_disc, + bfd_state): + ethpart = scapy.Ether(data) + bfdpart = BFD(ethpart.payload.payload.payload.load) + bfdpart.my_discriminator = my_discriminator + bfdpart.your_discriminator = bfd_remote_disc + bfdpart.sta = bfd_state + + ethpart.payload.payload.payload.load = bfdpart + ethpart.src = mac_dst + ethpart.dst = mac_src + ethpart.payload.src = ip_dst + ethpart.payload.dst = ip_src + return ethpart diff --git a/ansible/roles/test/files/ptftests/copp_tests.py b/ansible/roles/test/files/ptftests/copp_tests.py index 499a7ed8570..e57665e1434 100644 --- a/ansible/roles/test/files/ptftests/copp_tests.py +++ b/ansible/roles/test/files/ptftests/copp_tests.py @@ -35,10 +35,10 @@ class ControlPlaneBaseTest(BaseTest): PPS_LIMIT_MAX = PPS_LIMIT * 1.3 NO_POLICER_LIMIT = PPS_LIMIT * 1.4 TARGET_PORT = "3" # Historically we have port 3 as a target port - TASK_TIMEOUT = 300 # Wait up to 5 minutes for tasks to complete + TASK_TIMEOUT = 600 # Wait up to 10 minutes for tasks to complete DEFAULT_PRE_SEND_INTERVAL_SEC = 1 - DEFAULT_SEND_INTERVAL_SEC = 10 + DEFAULT_SEND_INTERVAL_SEC = 30 DEFAULT_RECEIVE_WAIT_TIME = 3 def __init__(self): @@ -118,9 +118,9 @@ def copp_test(self, packet, send_intf, recv_intf): testutils.send_packet(self, send_intf, packet) pre_send_count += 1 - rcv_pkt_cnt = testutils.count_matched_packets(self, packet, recv_intf[1], recv_intf[0], timeout=0.01) + rcv_pkt_cnt = testutils.count_matched_packets(self, packet, recv_intf[1], recv_intf[0], timeout=5) self.log("Send %d and receive %d packets in the first second (PolicyTest)" % (pre_send_count, rcv_pkt_cnt)) - self.dataplane.flush() + pre_test_ptf_tx_counter = self.dataplane.get_counters(*send_intf) pre_test_ptf_rx_counter = self.dataplane.get_counters(*recv_intf) @@ -131,6 +131,7 @@ def copp_test(self, packet, send_intf, recv_intf): end_time = datetime.datetime.now() + datetime.timedelta(seconds=self.DEFAULT_SEND_INTERVAL_SEC) send_count = 0 + self.dataplane.flush() while datetime.datetime.now() < end_time: testutils.send_packet(self, send_intf, packet) send_count += 1 @@ -142,7 +143,7 @@ def copp_test(self, packet, send_intf, recv_intf): self.log("Sent out %d packets in %ds" % (send_count, self.DEFAULT_SEND_INTERVAL_SEC)) time.sleep(self.DEFAULT_RECEIVE_WAIT_TIME) # Wait a little bit for all the packets to make it through - recv_count = testutils.count_matched_packets(self, packet, recv_intf[1], recv_intf[0]) + recv_count = testutils.count_matched_packets(self, packet, recv_intf[1], recv_intf[0], timeout=10) post_test_ptf_tx_counter = self.dataplane.get_counters(*send_intf) post_test_ptf_rx_counter = self.dataplane.get_counters(*recv_intf) @@ -252,7 +253,7 @@ def check_constraints(self, send_count, recv_count, time_delta_ms, rx_pps): str(self.PPS_LIMIT_MIN <= rx_pps <= self.PPS_LIMIT_MAX)) ) - assert(self.PPS_LIMIT_MIN <= rx_pps <= self.PPS_LIMIT_MAX) + assert self.PPS_LIMIT_MIN <= rx_pps <= self.PPS_LIMIT_MAX, "rx_pps {}".format(rx_pps) # SONIC config contains policer CIR=600 for ARP @@ -353,6 +354,57 @@ def contruct_packet(self, port_number): return packet +# SONIC configuration has no policer limiting for DHCPv6 +class DHCP6Test(NoPolicyTest): + def __init__(self): + NoPolicyTest.__init__(self) + + def runTest(self): + self.log("DHCP6Test") + self.run_suite() + + def contruct_packet(self, port_number): + src_mac = self.my_mac[port_number] + + packet = testutils.simple_udpv6_packet( + pktlen=100, + eth_dst='33:33:00:01:00:02', + eth_src=src_mac, + ipv6_src='::1', + ipv6_dst='ff02::1:2', + udp_sport=546, + udp_dport=547 + ) + + return packet + + #SONIC configuration has no packets to CPU for DHCPv6-T1 Topo +class DHCP6TopoT1Test(PolicyTest): + def __init__(self): + PolicyTest.__init__(self) + # T1 DHCP6 no packet to packet to CPU so police rate is 0 + self.PPS_LIMIT_MIN = 0 + self.PPS_LIMIT_MAX = 0 + + def runTest(self): + self.log("DHCP6TopoT1Test") + self.run_suite() + + def contruct_packet(self, port_number): + src_mac = self.my_mac[port_number] + + packet = testutils.simple_udpv6_packet( + pktlen=100, + eth_dst='33:33:00:01:00:02', + eth_src=src_mac, + ipv6_src='::1', + ipv6_dst='ff02::1:2', + udp_sport=546, + udp_dport=547 + ) + + return packet + # SONIC configuration has no policer limiting for LLDP class LLDPTest(NoPolicyTest): def __init__(self): diff --git a/ansible/roles/test/files/ptftests/dhcp_relay_test.py b/ansible/roles/test/files/ptftests/dhcp_relay_test.py index 2febce4b5e1..86aa17e0764 100644 --- a/ansible/roles/test/files/ptftests/dhcp_relay_test.py +++ b/ansible/roles/test/files/ptftests/dhcp_relay_test.py @@ -9,6 +9,9 @@ from ptf import config from ptf.base_tests import BaseTest from ptf.mask import Mask +import scapy.all as scapy2 +from threading import Thread +import binascii # Helper function to increment an IP address @@ -87,6 +90,11 @@ class DHCPTest(DataplaneBaseTest): DHCP_LEASE_TIME_LEN = 6 LEASE_TIME = 86400 DHCP_PKT_BOOTP_MIN_LEN = 300 + DHCP_ETHER_TYPE_IP = 0x0800 + DHCP_BOOTP_OP_REPLY = 2 + DHCP_BOOTP_HTYPE_ETHERNET = 1 + DHCP_BOOTP_HLEN_ETHERNET = 6 + DHCP_BOOTP_FLAGS_BROADCAST_REPLY = 0x8000 def __init__(self): DataplaneBaseTest.__init__(self) @@ -98,6 +106,10 @@ def setUp(self): self.test_params = testutils.test_params_get() self.hostname = self.test_params['hostname'] + self.verified_option82 = False + + if self.test_params.has_key('other_client_port'): + self.other_client_port = ast.literal_eval(self.test_params['other_client_port']) # These are the interfaces we are injected into that link to out leaf switches self.server_port_indices = ast.literal_eval(self.test_params['leaf_port_indices']) @@ -232,11 +244,81 @@ def create_dhcp_discover_relayed_packet(self): pkt = ether / ip / udp / bootp return pkt + def dhcp_offer_packet(self, + eth_server="00:01:02:03:04:05", + eth_dst="06:07:08:09:10:11", + eth_client="12:13:14:15:16:17", + ip_server="0.1.2.3", + ip_dst="255.255.255.255", + ip_offered="8.9.10.11", + port_dst=DHCP_CLIENT_PORT, + netmask_client="255.255.255.0", + ip_gateway=DEFAULT_ROUTE_IP, + dhcp_lease=256, + padding_bytes=0, + set_broadcast_bit=False, + ): + """ + Return a DHCPOFFER packet + Supports a few parameters: + @param eth_server MAC address of DHCP server + @param eth_dst MAC address of destination (DHCP Client, Relay agent) or broadcast (ff:ff:ff:ff:ff:ff) + @param eth_client MAC address of DHCP client + @param ip_server IP address of DHCP server + @param ip_dst IP address of destination (DHCP Client, Relay agent) or broadcast (255.255.255.255) + @param ip_offered IP address that server is assigning to client + @param ip_gateway Gateway IP Address, address of relay agent if encountered + @param port_dst Destination port of packet (default: DHCP_PORT_CLIENT) + @param netmask_client Subnet mask of client + @param dhcp_lease Time in seconds of DHCP lease + @param padding_bytes Number of '\x00' bytes to append to end of packet + Destination IP can be unicast or broadcast (255.255.255.255) + Source port is always 67 (DHCP server port) + Destination port by default is 68 (DHCP client port), but can be also be 67 (DHCP server port) if being sent to a DHCP relay agent + """ + my_chaddr = binascii.unhexlify(eth_client.replace(':', '')) + my_chaddr += b'\x00\x00\x00\x00\x00\x00' + + pkt = scapy.Ether(dst=eth_dst, src=eth_server, type=self.DHCP_ETHER_TYPE_IP) + pkt /= scapy.IP(src=ip_server, dst=ip_dst, ttl=128, id=0) + pkt /= scapy.UDP(sport=self.DHCP_SERVER_PORT, dport=port_dst) + pkt /= scapy.BOOTP( + op=self.DHCP_BOOTP_OP_REPLY, + htype=self.DHCP_BOOTP_HTYPE_ETHERNET, + hlen=self.DHCP_BOOTP_HLEN_ETHERNET, + hops=0, + xid=0, + secs=0, + flags=self.DHCP_BOOTP_FLAGS_BROADCAST_REPLY if set_broadcast_bit else 0, + ciaddr=self.DEFAULT_ROUTE_IP, + yiaddr=ip_offered, + siaddr=ip_server, + giaddr=ip_gateway, + chaddr=my_chaddr, + ) + # The length of option82 is 41 bytes, and dhcp relay will strip option82, + # when the length of next option is bigger than 42 bytes, + # it could introduce the overwritten issue. + pkt /= scapy.DHCP( + options=[ + ("message-type", "offer"), + ("server_id", ip_server), + ("lease_time", int(dhcp_lease)), + ("subnet_mask", netmask_client), + (82, self.option82), + ("vendor_class_id", "http://0.0.0.0/this_is_a_very_very_long_path/test.bin"), + ("end"), + ] + ) + if padding_bytes: + pkt /= scapy.PADDING("\x00" * padding_bytes) + return pkt + def create_dhcp_offer_packet(self): - return testutils.dhcp_offer_packet(eth_server=self.server_iface_mac, + return self.dhcp_offer_packet(eth_server=self.server_iface_mac, eth_dst=self.uplink_mac, eth_client=self.client_mac, - ip_server=self.server_ip, + ip_server=self.server_ip[0], ip_dst=self.relay_iface_ip if not self.dual_tor else self.switch_loopback_ip, ip_offered=self.client_ip, port_dst=self.DHCP_SERVER_PORT, @@ -257,8 +339,8 @@ def create_dhcp_offer_relayed_packet(self): # 4.) Replaces the destination IP with broadcast (255.255.255.255) # 5.) Replaces the destination port with the DHCP client port (68) ether = scapy.Ether(dst=self.BROADCAST_MAC, src=self.relay_iface_mac, type=0x0800) - ip = scapy.IP(src=self.relay_iface_ip, dst=self.BROADCAST_IP, len=290, ttl=64) - udp = scapy.UDP(sport=self.DHCP_SERVER_PORT, dport=self.DHCP_CLIENT_PORT, len=262) + ip = scapy.IP(src=self.relay_iface_ip, dst=self.BROADCAST_IP, ttl=64) + udp = scapy.UDP(sport=self.DHCP_SERVER_PORT, dport=self.DHCP_CLIENT_PORT) bootp = scapy.BOOTP(op=2, htype=1, hlen=6, @@ -268,20 +350,20 @@ def create_dhcp_offer_relayed_packet(self): flags=0x8000, ciaddr=self.DEFAULT_ROUTE_IP, yiaddr=self.client_ip, - siaddr=self.server_ip, + siaddr=self.server_ip[0], giaddr=self.relay_iface_ip if not self.dual_tor else self.switch_loopback_ip, chaddr=my_chaddr) bootp /= scapy.DHCP(options=[('message-type', 'offer'), - ('server_id', self.server_ip), + ('server_id', self.server_ip[0]), ('lease_time', self.LEASE_TIME), ('subnet_mask', self.client_subnet), + ("vendor_class_id", "http://0.0.0.0/this_is_a_very_very_long_path/test.bin"), ('end')]) - # TODO: Need to add this to the packet creation functions in PTF code first! # If our bootp layer is too small, pad it - #pad_bytes = self.DHCP_PKT_BOOTP_MIN_LEN - len(bootp) - #if pad_bytes > 0: - # bootp /= scapy.PADDING('\x00' * pad_bytes) + pad_bytes = self.DHCP_PKT_BOOTP_MIN_LEN - len(bootp) + if pad_bytes > 0: + bootp /= scapy.PADDING('\x00' * pad_bytes) pkt = ether / ip / udp / bootp return pkt @@ -289,7 +371,7 @@ def create_dhcp_offer_relayed_packet(self): def create_dhcp_request_packet(self, dst_mac=BROADCAST_MAC, src_port=DHCP_CLIENT_PORT): request_packet = testutils.dhcp_request_packet( eth_client=self.client_mac, - ip_server=self.server_ip, + ip_server=self.server_ip[0], ip_requested=self.client_ip, set_broadcast_bit=True ) @@ -331,7 +413,7 @@ def create_dhcp_request_relayed_packet(self): chaddr=my_chaddr) bootp /= scapy.DHCP(options=[('message-type', 'request'), ('requested_addr', self.client_ip), - ('server_id', self.server_ip), + ('server_id', self.server_ip[0]), ('relay_agent_Information', self.option82), ('end')]) @@ -347,7 +429,7 @@ def create_dhcp_ack_packet(self): return testutils.dhcp_ack_packet(eth_server=self.server_iface_mac, eth_dst=self.uplink_mac, eth_client=self.client_mac, - ip_server=self.server_ip, + ip_server=self.server_ip[0], ip_dst=self.relay_iface_ip if not self.dual_tor else self.switch_loopback_ip, ip_offered=self.client_ip, port_dst=self.DHCP_SERVER_PORT, @@ -379,11 +461,11 @@ def create_dhcp_ack_relayed_packet(self): flags=0x8000, ciaddr=self.DEFAULT_ROUTE_IP, yiaddr=self.client_ip, - siaddr=self.server_ip, + siaddr=self.server_ip[0], giaddr=self.relay_iface_ip if not self.dual_tor else self.switch_loopback_ip, chaddr=my_chaddr) bootp /= scapy.DHCP(options=[('message-type', 'ack'), - ('server_id', self.server_ip), + ('server_id', self.server_ip[0]), ('lease_time', self.LEASE_TIME), ('subnet_mask', self.client_subnet), ('end')]) @@ -410,6 +492,26 @@ def client_send_discover(self, dst_mac=BROADCAST_MAC, src_port=DHCP_CLIENT_PORT) dhcp_discover = self.create_dhcp_discover_packet(dst_mac, src_port) testutils.send_packet(self, self.client_port_index, dhcp_discover) + #Verify the relayed packet has option82 info or not. Sniffing for the relayed packet on leaves and + #once the packet is recieved checking for the destination and looking into options and verifying + #the option82 info + + def pkt_callback(self, pkt): + if pkt.haslayer(scapy2.IP) and pkt.haslayer(scapy2.DHCP): + if pkt.getlayer(scapy2.IP).dst in self.server_ip and pkt.getlayer(scapy2.DHCP) is not None: + self.verified_option82 = False + pkt_options = '' + for option in pkt.getlayer(scapy2.DHCP).options: + if option[0] == 'relay_agent_Information': + pkt_options = option[1] + break + if self.option82 in pkt_options: + self.verified_option82 = True + + def Sniffer(self,iface): + scapy2.sniff(iface=iface, filter="udp and (port 67 or 68)",prn=self.pkt_callback, store=0, timeout=3) + + # Verify that the DHCP relay actually received and relayed the DHCPDISCOVER message to all of # its known DHCP servers. We also verify that the relay inserted Option 82 information in the # packet. @@ -561,7 +663,91 @@ def verify_ack_received(self): # NOTE: verify_packet() will fail for us via an assert, so no need to check a return value here testutils.verify_packet(self, masked_ack, self.client_port_index) + def verify_dhcp_relay_pkt_on_other_client_port_with_no_padding(self, dst_mac=BROADCAST_MAC, src_port=DHCP_CLIENT_PORT): + # Form and send DHCP Relay packet + dhcp_request = self.create_dhcp_request_packet(dst_mac, src_port) + testutils.send_packet(self, self.client_port_index, dhcp_request) + + # Mask off fields we don't care about matching + masked_request = Mask(dhcp_request) + masked_request.set_do_not_care_scapy(scapy.Ether, "src") + + masked_request.set_do_not_care_scapy(scapy.IP, "version") + masked_request.set_do_not_care_scapy(scapy.IP, "ihl") + masked_request.set_do_not_care_scapy(scapy.IP, "tos") + masked_request.set_do_not_care_scapy(scapy.IP, "len") + masked_request.set_do_not_care_scapy(scapy.IP, "id") + masked_request.set_do_not_care_scapy(scapy.IP, "flags") + masked_request.set_do_not_care_scapy(scapy.IP, "frag") + masked_request.set_do_not_care_scapy(scapy.IP, "ttl") + masked_request.set_do_not_care_scapy(scapy.IP, "proto") + masked_request.set_do_not_care_scapy(scapy.IP, "chksum") + masked_request.set_do_not_care_scapy(scapy.IP, "src") + masked_request.set_do_not_care_scapy(scapy.IP, "dst") + masked_request.set_do_not_care_scapy(scapy.IP, "options") + + masked_request.set_do_not_care_scapy(scapy.UDP, "chksum") + masked_request.set_do_not_care_scapy(scapy.UDP, "len") + masked_request.set_do_not_care_scapy(scapy.DHCP, "options") + masked_request.set_do_not_care_scapy(scapy.BOOTP, "sname") + masked_request.set_do_not_care_scapy(scapy.BOOTP, "file") + + masked_request.set_do_not_care_scapy(scapy.BOOTP, "yiaddr") + masked_request.set_do_not_care_scapy(scapy.BOOTP, "ciaddr") + masked_request.set_do_not_care_scapy(scapy.BOOTP, "siaddr") + masked_request.set_do_not_care_scapy(scapy.BOOTP, "giaddr") + masked_request.set_do_not_care_scapy(scapy.BOOTP, "chaddr") + + try : + testutils.verify_packets_any(self, masked_request, self.other_client_port) + except Exception: + self.assertTrue(False,"DHCP Relay packet not matched or Padded extra on client side") + + def verify_dhcp_relay_pkt_on_server_port_with_no_padding(self, dst_mac=BROADCAST_MAC, src_port=DHCP_CLIENT_PORT): + # Form and send DHCP Relay packet + dhcp_request = self.create_dhcp_request_packet(dst_mac, src_port) + testutils.send_packet(self, self.client_port_index, dhcp_request) + + # Mask off fields we don't care about matching + # Create a packet resembling a relayed DCHPREQUEST packet + dhcp_request_relayed = self.create_dhcp_request_relayed_packet() + + # Mask off fields we don't care about matching + masked_request = Mask(dhcp_request_relayed) + masked_request.set_do_not_care_scapy(scapy.Ether, "dst") + + masked_request.set_do_not_care_scapy(scapy.IP, "version") + masked_request.set_do_not_care_scapy(scapy.IP, "ihl") + masked_request.set_do_not_care_scapy(scapy.IP, "tos") + masked_request.set_do_not_care_scapy(scapy.IP, "len") + masked_request.set_do_not_care_scapy(scapy.IP, "id") + masked_request.set_do_not_care_scapy(scapy.IP, "flags") + masked_request.set_do_not_care_scapy(scapy.IP, "frag") + masked_request.set_do_not_care_scapy(scapy.IP, "ttl") + masked_request.set_do_not_care_scapy(scapy.IP, "proto") + masked_request.set_do_not_care_scapy(scapy.IP, "chksum") + masked_request.set_do_not_care_scapy(scapy.IP, "src") + masked_request.set_do_not_care_scapy(scapy.IP, "dst") + masked_request.set_do_not_care_scapy(scapy.IP, "options") + + masked_request.set_do_not_care_scapy(scapy.UDP, "chksum") + masked_request.set_do_not_care_scapy(scapy.UDP, "len") + + masked_request.set_do_not_care_scapy(scapy.BOOTP, "sname") + masked_request.set_do_not_care_scapy(scapy.BOOTP, "file") + + try : + testutils.verify_packets_any(self, masked_request, self.server_port_indices) + except Exception: + self.assertTrue(False,"DHCP Relay packet not matched or Padded extra on server side") + def runTest(self): + # Start sniffer process for each server port to capture DHCP packet + # and then verify option 82 + for interface_index in self.server_port_indices: + t1 = Thread(target=self.Sniffer, args=("eth"+str(interface_index),)) + t1.start() + self.client_send_discover(self.dest_mac_address, self.client_udp_src_port) self.verify_relayed_discover() self.server_send_offer() @@ -570,4 +756,10 @@ def runTest(self): self.verify_relayed_request() self.server_send_ack() self.verify_ack_received() + self.assertTrue(self.verified_option82,"Failed: Verifying option 82") + ## Below verification will be done only when client port is set in ptf_runner + if self.test_params.has_key('other_client_port'): + self.verify_dhcp_relay_pkt_on_other_client_port_with_no_padding(self.dest_mac_address, self.client_udp_src_port) + self.verify_dhcp_relay_pkt_on_server_port_with_no_padding(self.dest_mac_address, self.client_udp_src_port) + \ No newline at end of file diff --git a/ansible/roles/test/files/ptftests/dhcpv6_counter_test.py b/ansible/roles/test/files/ptftests/dhcpv6_counter_test.py index 9f204997998..4e50eadaa8f 100644 --- a/ansible/roles/test/files/ptftests/dhcpv6_counter_test.py +++ b/ansible/roles/test/files/ptftests/dhcpv6_counter_test.py @@ -1,4 +1,6 @@ +import os import ast +import time import subprocess # Packet Test Framework imports @@ -56,8 +58,11 @@ def setUp(self): self.server_ip = self.test_params['server_ip'] self.relay_iface_ip = self.test_params['relay_iface_ip'] self.relay_iface_mac = self.test_params['relay_iface_mac'] + self.dut_mac = self.test_params['dut_mac'] self.vlan_ip = self.test_params['vlan_ip'] self.client_mac = self.dataplane.get_mac(0, self.client_port_index) + self.loopback_ipv6 = self.test_params['loopback_ipv6'] + self.is_dualtor = True if self.test_params['is_dualtor'] == 'True' else False def generate_client_interace_ipv6_link_local_address(self, client_port_index): # Shutdown and startup the client interface to generate a proper IPv6 link-local address @@ -93,8 +98,11 @@ def create_packet(self, message): return packet def create_server_packet(self, message): - packet = Ether(dst=self.relay_iface_mac) - packet /= IPv6(src=self.server_ip, dst=self.relay_iface_ip) + packet = Ether(dst=self.dut_mac) + if self.is_dualtor: + packet /= IPv6(src=self.server_ip, dst=self.loopback_ipv6) + else: + packet /= IPv6(src=self.server_ip, dst=self.relay_iface_ip) packet /= UDP(sport=self.DHCP_SERVER_PORT, dport=self.DHCP_SERVER_PORT) packet /= DHCP6_RelayReply(msgtype=13, linkaddr=self.vlan_ip, peeraddr=self.client_link_local) packet /= DHCP6OptRelayMsg() @@ -112,6 +120,9 @@ def client_send(self): for message in client_messages: packet = self.create_packet(message) testutils.send_packet(self, self.client_port_index, packet) + # sleep a short time to low down packet sending rate in case multicast packets + # flooding cause packets drop on dhcpv6 relay filter raw socket + time.sleep(1) def server_send(self): server_messages = [DHCP6_Advertise, DHCP6_Reply] @@ -119,6 +130,7 @@ def server_send(self): packet = self.create_server_packet(message) packet.src = self.dataplane.get_mac(0, self.server_port_indices[0]) testutils.send_packet(self, self.server_port_indices[0], packet) + time.sleep(1) def runTest(self): self.client_send() diff --git a/ansible/roles/test/files/ptftests/dhcpv6_relay_test.py b/ansible/roles/test/files/ptftests/dhcpv6_relay_test.py index c7769a7422a..aa9b562e7e7 100644 --- a/ansible/roles/test/files/ptftests/dhcpv6_relay_test.py +++ b/ansible/roles/test/files/ptftests/dhcpv6_relay_test.py @@ -1,4 +1,5 @@ import ast +import socket import subprocess # Packet Test Framework imports @@ -114,10 +115,12 @@ def setUp(self): self.relay_iface_ip = self.test_params['relay_iface_ip'] self.relay_iface_mac = self.test_params['relay_iface_mac'] self.relay_link_local = self.test_params['relay_link_local'] - + self.relay_linkaddr = '::' self.vlan_ip = self.test_params['vlan_ip'] - self.client_mac = self.dataplane.get_mac(0, self.client_port_index) + self.uplink_mac = self.test_params['uplink_mac'] + self.loopback_ipv6 = self.test_params['loopback_ipv6'] + self.is_dualtor = True if self.test_params['is_dualtor'] == 'True' else False def generate_client_interace_ipv6_link_local_address(self, client_port_index): # Shutdown and startup the client interface to generate a proper IPv6 link-local address @@ -145,7 +148,6 @@ def tearDown(self): """ def create_dhcp_solicit_packet(self): - solicit_packet = Ether(src=self.client_mac, dst=self.BROADCAST_MAC) solicit_packet /= IPv6(src=self.client_link_local, dst=self.BROADCAST_IP) solicit_packet /= UDP(sport=self.DHCP_CLIENT_PORT, dport=self.DHCP_SERVER_PORT) @@ -154,19 +156,24 @@ def create_dhcp_solicit_packet(self): return solicit_packet def create_dhcp_solicit_relay_forward_packet(self): - - solicit_relay_forward_packet = Ether(src=self.relay_iface_mac) - solicit_relay_forward_packet /= IPv6() + solicit_relay_forward_packet = Ether(src=self.uplink_mac) + if self.is_dualtor: + solicit_relay_forward_packet /= IPv6(src=self.loopback_ipv6, dst=self.server_ip) + else: + solicit_relay_forward_packet /= IPv6(src=self.relay_iface_ip, dst=self.server_ip) + self.loopback_ipv6 = self.test_params['loopback_ipv6'] + self.is_dualtor = True if self.test_params['is_dualtor'] == 'True' else False solicit_relay_forward_packet /= UDP(sport=self.DHCP_SERVER_PORT, dport=self.DHCP_SERVER_PORT) solicit_relay_forward_packet /= DHCP6_RelayForward(msgtype=12, linkaddr=self.vlan_ip, peeraddr=self.client_link_local) solicit_relay_forward_packet /= DHCP6OptClientLinkLayerAddr() + if self.is_dualtor: + solicit_relay_forward_packet /= DHCP6OptIfaceId(ifaceid=socket.inet_pton(socket.AF_INET6, self.vlan_ip)) solicit_relay_forward_packet /= DHCP6OptRelayMsg() solicit_relay_forward_packet /= DHCP6_Solicit(trid=12345) return solicit_relay_forward_packet def create_dhcp_advertise_packet(self): - advertise_packet = Ether(src=self.relay_iface_mac, dst=self.client_mac) advertise_packet /= IPv6(src=self.relay_link_local, dst=self.client_link_local) advertise_packet /= UDP(sport=self.DHCP_SERVER_PORT, dport=self.DHCP_CLIENT_PORT) @@ -174,10 +181,13 @@ def create_dhcp_advertise_packet(self): return advertise_packet - def create_dhcp_advertise_relay_reply_packet(self): - advertise_relay_reply_packet = Ether(dst=self.relay_iface_mac) - advertise_relay_reply_packet /= IPv6(src=self.server_ip, dst=self.relay_iface_ip) + def create_dhcp_advertise_relay_reply_packet(self): + advertise_relay_reply_packet = Ether(dst=self.uplink_mac) + if self.is_dualtor: + advertise_relay_reply_packet /= IPv6(src=self.server_ip, dst=self.loopback_ipv6) + else: + advertise_relay_reply_packet /= IPv6(src=self.server_ip, dst=self.relay_iface_ip) advertise_relay_reply_packet /= UDP(sport=self.DHCP_SERVER_PORT, dport=self.DHCP_SERVER_PORT) advertise_relay_reply_packet /= DHCP6_RelayReply(msgtype=13, linkaddr=self.vlan_ip, peeraddr=self.client_link_local) advertise_relay_reply_packet /= DHCP6OptRelayMsg() @@ -186,7 +196,6 @@ def create_dhcp_advertise_relay_reply_packet(self): return advertise_relay_reply_packet def create_dhcp_request_packet(self): - request_packet = Ether(src=self.client_mac, dst=self.BROADCAST_MAC) request_packet /= IPv6(src=self.client_link_local, dst=self.BROADCAST_IP) request_packet /= UDP(sport=self.DHCP_CLIENT_PORT, dport=self.DHCP_SERVER_PORT) @@ -195,19 +204,22 @@ def create_dhcp_request_packet(self): return request_packet def create_dhcp_request_relay_forward_packet(self): - - request_relay_forward_packet = Ether(src=self.relay_iface_mac) - request_relay_forward_packet /= IPv6() + request_relay_forward_packet = Ether(src=self.uplink_mac) + if self.is_dualtor: + request_relay_forward_packet /= IPv6(src=self.loopback_ipv6, dst=self.server_ip) + else: + request_relay_forward_packet /= IPv6(src=self.relay_iface_ip, dst=self.server_ip) request_relay_forward_packet /= UDP(sport=self.DHCP_SERVER_PORT, dport=self.DHCP_SERVER_PORT) request_relay_forward_packet /= DHCP6_RelayForward(msgtype=12, linkaddr=self.vlan_ip, peeraddr=self.client_link_local) request_relay_forward_packet /= DHCP6OptClientLinkLayerAddr() + if self.is_dualtor: + request_relay_forward_packet /= DHCP6OptIfaceId(ifaceid=socket.inet_pton(socket.AF_INET6, self.vlan_ip)) request_relay_forward_packet /= DHCP6OptRelayMsg() request_relay_forward_packet /= DHCP6_Request(trid=12345) return request_relay_forward_packet def create_dhcp_reply_packet(self): - reply_packet = Ether(src=self.relay_iface_mac, dst=self.client_mac) reply_packet /= IPv6(src=self.relay_link_local, dst=self.client_link_local) reply_packet /= UDP(sport=self.DHCP_SERVER_PORT, dport=self.DHCP_CLIENT_PORT) @@ -216,9 +228,11 @@ def create_dhcp_reply_packet(self): return reply_packet def create_dhcp_reply_relay_reply_packet(self): - - reply_relay_reply_packet = Ether(dst=self.relay_iface_mac) - reply_relay_reply_packet /= IPv6(src=self.server_ip, dst=self.relay_iface_ip) + reply_relay_reply_packet = Ether(dst=self.uplink_mac) + if self.is_dualtor: + reply_relay_reply_packet /= IPv6(src=self.server_ip, dst=self.loopback_ipv6) + else: + reply_relay_reply_packet /= IPv6(src=self.server_ip, dst=self.relay_iface_ip) reply_relay_reply_packet /= UDP(sport=self.DHCP_SERVER_PORT, dport=self.DHCP_SERVER_PORT) reply_relay_reply_packet /= DHCP6_RelayReply(msgtype=13, linkaddr=self.vlan_ip, peeraddr=self.client_link_local) reply_relay_reply_packet /= DHCP6OptRelayMsg() @@ -226,6 +240,59 @@ def create_dhcp_reply_relay_reply_packet(self): return reply_relay_reply_packet + def create_dhcp_relay_forward_packet(self): + relay_forward_packet = Ether(src=self.client_mac, dst=self.BROADCAST_MAC) + relay_forward_packet /= IPv6(src=self.client_link_local, dst=self.BROADCAST_IP) + relay_forward_packet /= UDP(sport=self.DHCP_CLIENT_PORT, dport=self.DHCP_SERVER_PORT) + relay_forward_packet /= DHCP6_RelayForward(msgtype=12, linkaddr=self.vlan_ip, peeraddr=self.client_link_local) + relay_forward_packet /= DHCP6OptRelayMsg() + relay_forward_packet /= DHCP6_Solicit(trid=12345) + + return relay_forward_packet + + def create_dhcp_relayed_relay_packet(self): + relayed_relay_packet = Ether(src=self.uplink_mac) + if self.is_dualtor: + relayed_relay_packet /= IPv6(src=self.loopback_ipv6, dst=self.server_ip) + else: + relayed_relay_packet /= IPv6(src=self.relay_iface_ip, dst=self.server_ip) + relayed_relay_packet /= UDP(sport=self.DHCP_SERVER_PORT, dport=self.DHCP_SERVER_PORT) + relayed_relay_packet /= DHCP6_RelayForward(msgtype=12, hopcount = 1, linkaddr=self.relay_linkaddr, peeraddr=self.client_link_local) + if self.is_dualtor: + relayed_relay_packet /= DHCP6OptIfaceId(ifaceid=socket.inet_pton(socket.AF_INET6, self.vlan_ip)) + relayed_relay_packet /= DHCP6OptRelayMsg() + relayed_relay_packet /= DHCP6_RelayForward(msgtype=12, linkaddr=self.vlan_ip, peeraddr=self.client_link_local) + relayed_relay_packet /= DHCP6OptRelayMsg() + relayed_relay_packet /= DHCP6_Solicit(trid=12345) + + return relayed_relay_packet + + def create_dhcp_relay_relay_reply_packet(self): + relay_relay_reply_packet = Ether(dst=self.uplink_mac) + if self.is_dualtor: + relay_relay_reply_packet /= IPv6(src=self.server_ip, dst=self.loopback_ipv6) + else: + relay_relay_reply_packet /= IPv6(src=self.server_ip, dst=self.relay_iface_ip) + relay_relay_reply_packet /= UDP(sport=self.DHCP_SERVER_PORT, dport=self.DHCP_SERVER_PORT) + relay_relay_reply_packet /= DHCP6_RelayReply(msgtype=13, hopcount = 1, linkaddr=self.vlan_ip, peeraddr=self.client_link_local) + if self.is_dualtor: + relay_relay_reply_packet /= DHCP6OptIfaceId(ifaceid=socket.inet_pton(socket.AF_INET6, self.vlan_ip)) + relay_relay_reply_packet /= DHCP6OptRelayMsg() + relay_relay_reply_packet /= DHCP6_RelayReply(msgtype=13, linkaddr=self.vlan_ip, peeraddr=self.client_link_local) + relay_relay_reply_packet /= DHCP6OptRelayMsg() + relay_relay_reply_packet /= DHCP6_Reply(trid=12345) + + return relay_relay_reply_packet + + def create_dhcp_relay_reply_packet(self): + relay_reply_packet = Ether(src=self.relay_iface_mac, dst=self.client_mac) + relay_reply_packet /= IPv6(src=self.relay_link_local, dst=self.client_link_local) + relay_reply_packet /= UDP(sport=self.DHCP_SERVER_PORT, dport=self.DHCP_CLIENT_PORT) + relay_reply_packet /= DHCP6_RelayReply(msgtype=13, linkaddr=self.vlan_ip, peeraddr=self.client_link_local) + relay_reply_packet /= DHCP6OptRelayMsg() + relay_reply_packet /= DHCP6_Reply(trid=12345) + + return relay_reply_packet """ Send/receive functions @@ -247,7 +314,6 @@ def verify_relayed_solicit_relay_forward(self): # Mask off fields we don't care about matching masked_packet = Mask(solicit_relay_forward_packet) masked_packet.set_do_not_care_scapy(packet.Ether, "dst") - masked_packet.set_do_not_care_scapy(IPv6, "src") masked_packet.set_do_not_care_scapy(IPv6, "dst") masked_packet.set_do_not_care_scapy(IPv6, "fl") masked_packet.set_do_not_care_scapy(IPv6, "tc") @@ -258,10 +324,8 @@ def verify_relayed_solicit_relay_forward(self): masked_packet.set_do_not_care_scapy(DHCP6OptClientLinkLayerAddr, "clladdr") masked_packet.set_do_not_care_scapy(scapy.layers.dhcp6.DHCP6_RelayForward, "linkaddr") - # Count the number of these packets received on the ports connected to our leaves - solicit_count = testutils.count_matched_packets_all_ports(self, masked_packet, self.server_port_indices, timeout=4.0) - self.assertTrue(solicit_count >= 1, - "Failed: Solicit count of %d" % solicit_count) + # verify packets received on the ports connected to our leaves + testutils.verify_packet_any_port(self, masked_packet, self.server_port_indices) # Simulate a DHCP server sending a DHCPv6 RELAY-REPLY encapsulating ADVERTISE packet message to client. # We do this by injecting a RELAY-REPLY encapsulating ADVERTISE message on the link connected to one @@ -280,6 +344,7 @@ def verify_relayed_advertise(self): # Mask off fields we don't care about matching masked_packet = Mask(advertise_packet) masked_packet.set_do_not_care_scapy(IPv6, "fl") + # dual tor uses loopback0 ipv6 address as source masked_packet.set_do_not_care_scapy(packet.UDP, "chksum") masked_packet.set_do_not_care_scapy(packet.UDP, "len") @@ -301,7 +366,6 @@ def verify_relayed_request_relay_forward(self): # Mask off fields we don't care about matching masked_packet = Mask(request_relay_forward_packet) masked_packet.set_do_not_care_scapy(packet.Ether, "dst") - masked_packet.set_do_not_care_scapy(IPv6, "src") masked_packet.set_do_not_care_scapy(IPv6, "dst") masked_packet.set_do_not_care_scapy(IPv6, "fl") masked_packet.set_do_not_care_scapy(IPv6, "tc") @@ -312,10 +376,8 @@ def verify_relayed_request_relay_forward(self): masked_packet.set_do_not_care_scapy(DHCP6OptClientLinkLayerAddr, "clladdr") masked_packet.set_do_not_care_scapy(scapy.layers.dhcp6.DHCP6_RelayForward, "linkaddr") - # Count the number of these packets received on the ports connected to our leaves - request_count = testutils.count_matched_packets_all_ports(self, masked_packet, self.server_port_indices, timeout=4.0) - self.assertTrue(request_count >= 1, - "Failed: Request count of %d" % request_count) + # verify packets received on the ports connected to our leaves + testutils.verify_packet_any_port(self, masked_packet, self.server_port_indices) # Simulate a DHCP server sending a DHCPv6 RELAY-REPLY encapsulating REPLY packet message to client. def server_send_reply_relay_reply(self): @@ -332,6 +394,53 @@ def verify_relayed_reply(self): # Mask off fields we don't care about matching masked_packet = Mask(reply_packet) masked_packet.set_do_not_care_scapy(IPv6, "fl") + # dual tor uses loopback0 ipv6 address as source + masked_packet.set_do_not_care_scapy(packet.UDP, "chksum") + masked_packet.set_do_not_care_scapy(packet.UDP, "len") + + # NOTE: verify_packet() will fail for us via an assert, so no need to check a return value here + testutils.verify_packet(self, masked_packet, self.client_port_index) + + # Simulate a DHCP server sending a DHCPv6 RELAY-FORWARD encapsulating SOLICIT packet message to client. + def client_send_relayed_relay_forward(self): + # Form and send DHCPv6 RELAY-FORWARD encapsulating REPLY packet + relay_forward_packet = self.create_dhcp_relay_forward_packet() + testutils.send_packet(self, self.client_port_index, relay_forward_packet) + + # Verify that the DHCPv6 RELAYED RELAY would be received by our simulated server + def verify_relayed_relay_forward(self): + # Create a packet resembling a DHCPv6 RELAYED RELAY FORWARD packet + relayed_relay_forward_count = self.create_dhcp_relayed_relay_packet() + + # Mask off fields we don't care about matching + masked_packet = Mask(relayed_relay_forward_count) + masked_packet.set_do_not_care_scapy(packet.Ether, "dst") + masked_packet.set_do_not_care_scapy(IPv6, "dst") + masked_packet.set_do_not_care_scapy(IPv6, "fl") + masked_packet.set_do_not_care_scapy(IPv6, "tc") + masked_packet.set_do_not_care_scapy(IPv6, "plen") + masked_packet.set_do_not_care_scapy(IPv6, "nh") + masked_packet.set_do_not_care_scapy(packet.UDP, "chksum") + masked_packet.set_do_not_care_scapy(packet.UDP, "len") + + testutils.verify_packet_any_port(self, masked_packet, self.server_port_indices) + + # Simulate a DHCP server sending a DHCPv6 RELAY-REPLY encapsulating RELAY-REPLY packet message to next relay agent + def server_send_relay_relay_reply(self): + # Form and send DHCPv6 RELAY-REPLY encapsulating REPLY packet + relay_relay_reply_packet = self.create_dhcp_relay_relay_reply_packet() + relay_relay_reply_packet.src = self.dataplane.get_mac(0, self.server_port_indices[0]) + testutils.send_packet(self, self.server_port_indices[0], relay_relay_reply_packet) + + # Verify that the DHCPv6 RELAY REPLY would be uncapsulated and forwarded to the next relay agent + def verify_relay_relay_reply(self): + # Create a packet resembling a DHCPv6 RELAY REPLY packet + relay_reply_packet = self.create_dhcp_relay_reply_packet() + + # Mask off fields we don't care about matching + masked_packet = Mask(relay_reply_packet) + masked_packet.set_do_not_care_scapy(IPv6, "fl") + # dual tor uses relay_iface_ip as ip src masked_packet.set_do_not_care_scapy(packet.UDP, "chksum") masked_packet.set_do_not_care_scapy(packet.UDP, "len") @@ -347,3 +456,7 @@ def runTest(self): self.verify_relayed_request_relay_forward() self.server_send_reply_relay_reply() self.verify_relayed_reply() + self.client_send_relayed_relay_forward() + self.verify_relayed_relay_forward() + self.server_send_relay_relay_reply() + self.verify_relay_relay_reply() \ No newline at end of file diff --git a/ansible/roles/test/files/ptftests/dir_bcast_test.py b/ansible/roles/test/files/ptftests/dir_bcast_test.py index cd460d4a3f0..60cc5b58e1b 100644 --- a/ansible/roles/test/files/ptftests/dir_bcast_test.py +++ b/ansible/roles/test/files/ptftests/dir_bcast_test.py @@ -2,24 +2,35 @@ Description: This file contains the Directed Broadcast test for SONIC Usage: Examples of how to use log analyzer - ptf --test-dir ptftests dir_bcast_test.BcastTest --platform remote -t "testbed_type='t0';router_mac='00:01:02:03:04:05';vlan_info='/root/vlan_info.txt'" --relax --debug info --log-file /tmp/dir_bcast_test.log --disable-vxlan --disable-geneve --disable-erspan --disable-mpls --disable-nvgre + ptf --test-dir ptftests dir_bcast_test.BcastTest \ + --platform remote \ + -t "testbed_type='t0';router_mac='00:01:02:03:04:05';vlan_info='/root/vlan_info.txt'" + --relax \ + --debug info \ + --log-file /tmp/dir_bcast_test.log \ + --disable-vxlan + --disable-geneve \ + --disable-erspan \ + --disable-mpls \ + --disable-nvgre ''' -#--------------------------------------------------------------------- +# --------------------------------------------------------------------- # Global imports -#--------------------------------------------------------------------- +# --------------------------------------------------------------------- import logging import random +import json import ptf import ptf.packet as scapy -import ptf.dataplane as dataplane -from ptf import config from ptf.base_tests import BaseTest from ptf.mask import Mask -from ptf.testutils import * -from ipaddress import ip_address, ip_network +from ptf.testutils import test_params_get, simple_ip_packet, simple_udp_packet,\ + send_packet, count_matched_packets_all_ports +from ipaddress import ip_network + class BcastTest(BaseTest): ''' @@ -34,9 +45,9 @@ class BcastTest(BaseTest): - IP frame, Dst Mac = Router MAC, Dst IP = Directed Broadcast IP ''' - #--------------------------------------------------------------------- + # --------------------------------------------------------------------- # Class variables - #--------------------------------------------------------------------- + # --------------------------------------------------------------------- BROADCAST_MAC = 'ff:ff:ff:ff:ff:ff' DHCP_SERVER_PORT = 67 TEST_SRC_IP = "1.1.1.1" # Some src IP @@ -48,55 +59,34 @@ def __init__(self): BaseTest.__init__(self) self.test_params = test_params_get() - #--------------------------------------------------------------------- + # --------------------------------------------------------------------- def setUp(self): self.dataplane = ptf.dataplane_instance self.router_mac = self.test_params['router_mac'] - self.setUpVlan(self.test_params['vlan_info']) - if self.test_params['testbed_type'] == 't0': - self.src_ports = range(1, 25) + range(28, 32) - if self.test_params['testbed_type'] == 't0-52': - self.src_ports = range(0, 52) - if self.test_params['testbed_type'] == 't0-56': - self.src_ports = range(0, 32) - if self.test_params['testbed_type'] == 't0-64': - self.src_ports = range(0, 2) + range(4, 18) + range(20, 33) + range(36, 43) + range(48, 49) + range(52, 59) - if self.test_params['testbed_type'] == 't0-116': - self.src_ports = range(24, 32) - if self.test_params['testbed_type'] == 't0-120': - self.src_ports = [48, 49, 54, 55, 60, 61, 66, 67] - - #--------------------------------------------------------------------- - - def setUpVlan(self, file_path): - ''' - @summary: Populate the VLAN dictionary with IP/Prefix and member port list - ''' - self._vlan_dict = {} - with open(file_path, 'r') as f: - for line in f.readlines(): - entry = line.split(' ', 1) - prefix = ip_network(unicode(entry[0])) - if prefix.version != 4: - continue - self._vlan_dict[prefix] = [int(i) for i in entry[1].split()] + ptf_test_port_map = self.test_params['ptf_test_port_map'] + with open(ptf_test_port_map) as f: + self.ptf_test_port_map = json.load(f) + self.src_ports = self.ptf_test_port_map['ptf_src_ports'] + self._vlan_dict = self.ptf_test_port_map['vlan_ip_port_pair'] - #--------------------------------------------------------------------- + # --------------------------------------------------------------------- def check_all_dir_bcast(self): ''' @summary: Loop through all the VLANs and send directed broadcast packets ''' - for vlan_pfx in self._vlan_dict: - bcast_ip = str(ip_network(vlan_pfx).broadcast_address) - dst_port_list = self._vlan_dict[vlan_pfx] - self.check_ip_dir_bcast(bcast_ip, dst_port_list) - self.check_bootp_dir_bcast(bcast_ip, dst_port_list) + for vlan_pfx, dst_ports in self._vlan_dict.items(): + if ip_network(vlan_pfx).version == 4: + bcast_ip = str(ip_network(vlan_pfx).broadcast_address) + logging.info("bcast_ip: {}, vlan_pfx: {}, dst_ports: {}".format( + bcast_ip, vlan_pfx, dst_ports)) + self.check_ip_dir_bcast(bcast_ip, dst_ports) + self.check_bootp_dir_bcast(bcast_ip, dst_ports) - #--------------------------------------------------------------------- + # --------------------------------------------------------------------- - def check_ip_dir_bcast(self, dst_bcast_ip, dst_port_list): + def check_ip_dir_bcast(self, dst_bcast_ip, dst_ports): ''' @summary: Check directed broadcast IP forwarding and receiving on all member ports. ''' @@ -111,30 +101,35 @@ def check_ip_dir_bcast(self, dst_bcast_ip, dst_port_list): ip_dst=ip_dst) exp_pkt = simple_ip_packet(eth_dst=bcast_mac, - eth_src=self.router_mac, - ip_src=ip_src, - ip_dst=ip_dst) + eth_src=self.router_mac, + ip_src=ip_src, + ip_dst=ip_dst) masked_exp_pkt = Mask(exp_pkt) masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "chksum") masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "ttl") - src_port = random.choice([port for port in self.src_ports if port not in dst_port_list]) + src_port = random.choice( + [port for port in self.src_ports if port not in dst_ports]) send_packet(self, src_port, pkt) - logging.info("Sending packet from port " + str(src_port) + " to " + ip_dst) + logging.info("Sending packet from port " + + str(src_port) + " to " + ip_dst) - pkt_count = count_matched_packets_all_ports(self, masked_exp_pkt, dst_port_list) + pkt_count = count_matched_packets_all_ports( + self, masked_exp_pkt, dst_ports) ''' Check if broadcast packet is received on all member ports of vlan ''' - logging.info("Received " + str(pkt_count) + " broadcast packets, expecting " + str(len(dst_port_list))) - assert (pkt_count == len(dst_port_list)), "received {} expected {}".format(pkt_count, len(dst_port_list)) + logging.info("Received " + str(pkt_count) + + " broadcast packets, expecting " + str(len(dst_ports))) + assert (pkt_count == len(dst_ports)), "received {} expected {}".format( + pkt_count, len(dst_ports)) return - #--------------------------------------------------------------------- + # --------------------------------------------------------------------- - def check_bootp_dir_bcast(self, dst_bcast_ip, dst_port_list): + def check_bootp_dir_bcast(self, dst_bcast_ip, dst_ports): ''' @summary: Check directed broadcast BOOTP packet forwarding and receiving on all member ports. ''' @@ -162,20 +157,25 @@ def check_bootp_dir_bcast(self, dst_bcast_ip, dst_port_list): masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "chksum") masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "ttl") - src_port = random.choice([port for port in self.src_ports if port not in dst_port_list]) + src_port = random.choice( + [port for port in self.src_ports if port not in dst_ports]) send_packet(self, src_port, pkt) - logging.info("Sending BOOTP packet from port " + str(src_port) + " to " + ip_dst) + logging.info("Sending BOOTP packet from port " + + str(src_port) + " to " + ip_dst) - pkt_count = count_matched_packets_all_ports(self, masked_exp_pkt, dst_port_list) + pkt_count = count_matched_packets_all_ports( + self, masked_exp_pkt, dst_ports) ''' Check if broadcast BOOTP packet is received on all member ports of vlan ''' - logging.info("Received " + str(pkt_count) + " broadcast BOOTP packets, expecting " + str(len(dst_port_list))) - assert (pkt_count == len(dst_port_list)), "received {} expected {}".format(pkt_count, len(dst_port_list)) + logging.info("Received " + str(pkt_count) + + " broadcast BOOTP packets, expecting " + str(len(dst_ports))) + assert (pkt_count == len(dst_ports)), "received {} expected {}".format( + pkt_count, len(dst_ports)) return - #--------------------------------------------------------------------- + # --------------------------------------------------------------------- def runTest(self): """ diff --git a/ansible/roles/test/files/ptftests/fg_ecmp_test.py b/ansible/roles/test/files/ptftests/fg_ecmp_test.py index 5de842177a3..4667b2e3276 100644 --- a/ansible/roles/test/files/ptftests/fg_ecmp_test.py +++ b/ansible/roles/test/files/ptftests/fg_ecmp_test.py @@ -17,7 +17,6 @@ import time import os import json -import ipaddress import ptf import ptf.packet as scapy @@ -122,7 +121,7 @@ def setUp(self): ''' self.dataplane = ptf.dataplane_instance self.test_params = testutils.test_params_get() - self.max_deviation = 0.25 + self.max_deviation = 0.35 if 'test_case' in self.test_params: self.test_case = self.test_params['test_case'] else: @@ -197,6 +196,18 @@ def test_balancing(self, hit_count_map): " num_flows " + str(num_flows) + " deviation " + str(deviation)) assert deviation <= self.max_deviation + def test_balancing_no_assert(self, hit_count_map): + deviation_max = 0 + for port, exp_flows in self.exp_flow_count.items(): + assert port in hit_count_map + num_flows = hit_count_map[port] + deviation = float(num_flows)/float(exp_flows) + deviation = abs(1-deviation) + self.log("port "+ str(port) + " exp_flows " + str(exp_flows) + + " num_flows " + str(num_flows) + " deviation " + str(deviation)) + if deviation_max < deviation: + deviation_max = deviation + return deviation_max def fg_ecmp(self): ipv4 = isinstance(ipaddress.ip_address(self.dst_ip.decode('utf8')), @@ -226,23 +237,29 @@ def fg_ecmp(self): tuple_to_port_map[self.dst_ip] = {} if self.test_case == 'create_flows': - # Send packets with varying src_ips to create NUM_FLOWS unique flows - # and generate a flow to port map - self.log("Creating flow to port map ...") - for i in range(0, self.num_flows): - if ipv4 or self.inner_hashing: - src_ip = self.src_ipv4_interval.get_random_ip() - else: - src_ip = self.src_ipv6_interval.get_random_ip() - - if self.inner_hashing: - in_port = random.choice(self.net_ports) - else: - in_port = self.net_ports[0] - (port_idx, _) = self.send_rcv_ip_pkt( - in_port, src_port, dst_port, src_ip, dst_ip, self.serv_ports, ipv4) - hit_count_map[port_idx] = hit_count_map.get(port_idx, 0) + 1 - tuple_to_port_map[self.dst_ip][src_ip] = port_idx + # try 3 times until test_balancing_no_assert returns true + for retry_time in range(0, 3): + # Send packets with varying src_ips to create NUM_FLOWS unique flows + # and generate a flow to port map + self.log("Creating flow to port map ...") + for i in range(0, self.num_flows): + if ipv4 or self.inner_hashing: + src_ip = self.src_ipv4_interval.get_random_ip() + else: + src_ip = self.src_ipv6_interval.get_random_ip() + + if self.inner_hashing: + in_port = random.choice(self.net_ports) + else: + in_port = self.net_ports[0] + (port_idx, _) = self.send_rcv_ip_pkt( + in_port, src_port, dst_port, src_ip, dst_ip, self.serv_ports, ipv4) + hit_count_map[port_idx] = hit_count_map.get(port_idx, 0) + 1 + tuple_to_port_map[self.dst_ip][src_ip] = port_idx + deviation = self.test_balancing_no_assert(hit_count_map) + if deviation <= self.max_deviation: + break + assert deviation <= self.max_deviation elif self.test_case == 'initial_hash_check': self.log("Ensure that flow to port map is maintained when the same flow is re-sent...") @@ -312,6 +329,7 @@ def fg_ecmp(self): assert port_idx in self.exp_port_set_two hit_count_map[port_idx] = hit_count_map.get(port_idx, 0) + 1 tuple_to_port_map[self.dst_ip][src_ip] = port_idx + self.test_balancing(hit_count_map) elif self.test_case == 'withdraw_nh': self.log("Withdraw next-hop " + str(self.withdraw_nh_port) + " and ensure hash redistribution within correct bank") @@ -374,6 +392,7 @@ def fg_ecmp(self): tuple_to_port_map[self.dst_ip][src_ip] = port_idx else: assert port_idx == port + self.test_balancing(hit_count_map) elif self.test_case == 'add_first_nh': self.log("Add 1st next-hop " + str(self.first_nh) + " and ensure hash redistribution is as expected") @@ -418,7 +437,6 @@ def fg_ecmp(self): self.log("Unsupported testcase " + self.test_case) return - self.test_balancing(hit_count_map) json.dump(tuple_to_port_map, open(PERSIST_MAP,"w")) return diff --git a/ansible/roles/test/files/ptftests/fib_test.py b/ansible/roles/test/files/ptftests/fib_test.py index fce7d04ca28..dbcc3df8f03 100644 --- a/ansible/roles/test/files/ptftests/fib_test.py +++ b/ansible/roles/test/files/ptftests/fib_test.py @@ -108,6 +108,9 @@ def setUp(self): - single_fib_for_duts: have a single fib file for all DUTs in multi-dut case. Default: False ''' self.dataplane = ptf.dataplane_instance + self.asic_type = self.test_params.get('asic_type') + if self.asic_type == "marvell": + fib.EXCLUDE_IPV4_PREFIXES.append("240.0.0.0/4") self.fibs = [] for fib_info_file in self.test_params.get('fib_info_files'): @@ -309,7 +312,8 @@ def check_ipv4_route(self, src_port, dst_ip_addr, dst_port_list): format(ip_src, ip_dst, src_port, dst_port_list[rcvd_port], exp_src_mac, actual_src_mac)) return (rcvd_port, rcvd_pkt) elif self.pkt_action == self.ACTION_DROP: - return verify_no_packet_any(self, masked_exp_pkt, dst_port_list) + verify_no_packet_any(self, masked_exp_pkt, dst_ports) + return (None, None) #--------------------------------------------------------------------- def check_ipv6_route(self, src_port, dst_ip_addr, dst_port_list): @@ -388,7 +392,8 @@ def check_ipv6_route(self, src_port, dst_ip_addr, dst_port_list): format(ip_src, ip_dst, src_port, dst_port_list[rcvd_port], exp_src_mac, actual_src_mac)) return (rcvd_port, rcvd_pkt) elif self.pkt_action == self.ACTION_DROP: - return verify_no_packet_any(self, masked_exp_pkt, dst_port_list) + verify_no_packet_any(self, masked_exp_pkt, dst_ports) + return (None, None) def check_within_expected_range(self, actual, expected): ''' diff --git a/ansible/roles/test/files/ptftests/hash_test.py b/ansible/roles/test/files/ptftests/hash_test.py index f42c41e99a0..6c37ca9d096 100644 --- a/ansible/roles/test/files/ptftests/hash_test.py +++ b/ansible/roles/test/files/ptftests/hash_test.py @@ -22,6 +22,7 @@ from ptf.testutils import simple_tcpv6_packet from ptf.testutils import send_packet from ptf.testutils import verify_packet_any_port +from ptf.testutils import simple_ipv4ip_packet import fib import lpm @@ -82,6 +83,8 @@ def setUp(self): self.ignore_ttl = self.test_params.get('ignore_ttl', False) self.single_fib = self.test_params.get('single_fib_for_duts', False) + self.ipver = self.test_params.get('ipver', 'ipv4') + # set the base mac here to make it persistent across calls of check_ip_route self.base_mac = self.dataplane.get_mac(*random.choice(self.dataplane.ports.keys())) @@ -172,9 +175,11 @@ def check_ip_route(self, hash_key, src_port, dst_ip, dst_port_list): def _get_ip_proto(self, ipv6=False): # ip_proto 2 is IGMP, should not be forwarded by router + # ip_proto 4 and 41 are encapsulation protocol, ip payload will be malformat + # ip_proto 60 is redirected to 4 as encapsulation protocol, ip payload will be malformat # ip_proto 254 is experimental # MLNX ASIC can't forward ip_proto 254, BRCM is OK, skip for all for simplicity - skip_protos = [2, 253, 254] + skip_protos = [2, 4, 41, 60, 253, 254] if ipv6: # Skip ip_proto 0 for IPv6 skip_protos.append(0) @@ -235,19 +240,21 @@ def check_ipv4_route(self, hash_key, src_port, dst_port_list): masked_exp_pkt.set_do_not_care_scapy(scapy.Ether, "src") send_packet(self, src_port, pkt) - logging.info('Sent Ether(src={}, dst={})/IP(src={}, dst={})/TCP(sport={}, dport={} on port {})'\ + logging.info('Sent Ether(src={}, dst={})/IP(src={}, dst={}, proto={})/TCP(sport={}, dport={} on port {})'\ .format(pkt.src, pkt.dst, pkt['IP'].src, pkt['IP'].dst, + pkt['IP'].proto, sport, dport, src_port)) - logging.info('Expect Ether(src={}, dst={})/IP(src={}, dst={})/TCP(sport={}, dport={})'\ + logging.info('Expect Ether(src={}, dst={})/IP(src={}, dst={}, proto={})/TCP(sport={}, dport={})'\ .format('any', 'any', ip_src, ip_dst, + ip_proto, sport, dport)) @@ -313,19 +320,21 @@ def check_ipv6_route(self, hash_key, src_port, dst_port_list): masked_exp_pkt.set_do_not_care_scapy(scapy.Ether, "src") send_packet(self, src_port, pkt) - logging.info('Sent Ether(src={}, dst={})/IPv6(src={}, dst={})/TCP(sport={}, dport={} on port {})'\ + logging.info('Sent Ether(src={}, dst={})/IPv6(src={}, dst={}, proto={})/TCP(sport={}, dport={} on port {})'\ .format(pkt.src, pkt.dst, pkt['IPv6'].src, pkt['IPv6'].dst, + pkt['IPv6'].nh, sport, dport, src_port)) - logging.info('Expect Ether(src={}, dst={})/IPv6(src={}, dst={})/TCP(sport={}, dport={})'\ + logging.info('Expect Ether(src={}, dst={})/IPv6(src={}, dst={}, proto={})/TCP(sport={}, dport={})'\ .format('any', 'any', ip_src, ip_dst, + ip_proto, sport, dport)) @@ -383,7 +392,238 @@ def runTest(self): @summary: Send packet for each range of both IPv4 and IPv6 spaces and expect the packet to be received from one of the expected ports """ + for hash_key in self.hash_keys: + logging.info("hash test hash_key: {}".format(hash_key)) + self.check_hash(hash_key) + +class IPinIPHashTest(HashTest): + ''' + This test is to verify the hash key for IPinIP packet. + The src_ip, dst_ip, src_port and dst_port of inner frame are expected to be hash keys + for IPinIP packet. + ''' + + def check_ipv4_route(self, hash_key, src_port, dst_port_list, outer_src_ip, outer_dst_ip): + ''' + @summary: Check IPv4 route works. + @param hash_key: hash key to build packet with. + @param src_port: index of port to use for sending packet to switch + @param dst_port_list: list of ports on which to expect packet to come back from the switch + ''' + ip_src = self.src_ip_interval.get_random_ip() if hash_key == 'src-ip' else self.src_ip_interval.get_first_ip() + ip_dst = self.dst_ip_interval.get_random_ip() if hash_key == 'dst-ip' else self.dst_ip_interval.get_first_ip() + sport = random.randint(0, 65535) if hash_key == 'src-port' else 1234 + dport = random.randint(0, 65535) if hash_key == 'dst-port' else 80 + + src_mac = (self.base_mac[:-5] + "%02x" % random.randint(0, 255) + ":" + "%02x" % random.randint(0, 255)) \ + if hash_key == 'src-mac' else self.base_mac + + router_mac = self.ptf_test_port_map[str(src_port)]['target_mac'] + + vlan_id = random.choice(self.vlan_ids) if hash_key == 'vlan-id' else 0 + ip_proto = self._get_ip_proto() if hash_key == 'ip-proto' else None + + inner_pkt_len = random.randrange(100, 1024) if hash_key == 'inner_length' else 100 + + pkt = simple_tcp_packet(pktlen=inner_pkt_len if vlan_id == 0 else inner_pkt_len + 4, + dl_vlan_enable=False if vlan_id == 0 else True, + vlan_vid=vlan_id, + vlan_pcp=0, + ip_src=ip_src, + ip_dst=ip_dst, + tcp_sport=sport, + tcp_dport=dport, + ip_ttl=64) + + ipinip_pkt = simple_ipv4ip_packet( + eth_dst=router_mac, + eth_src=src_mac, + ip_src=outer_src_ip, + ip_dst=outer_dst_ip, + inner_frame=pkt['IP']) + + exp_pkt = ipinip_pkt.copy() + exp_pkt['IP'].ttl -= 1 + + if hash_key == 'ip-proto': + ipinip_pkt['IP'].payload.proto = ip_proto + exp_pkt['IP'].payload.proto = ip_proto + masked_exp_pkt = Mask(exp_pkt) + masked_exp_pkt.set_do_not_care_scapy(scapy.Ether, "src") + masked_exp_pkt.set_do_not_care_scapy(scapy.Ether, "dst") + # mask the chksum also if masking the ttl + if self.ignore_ttl: + masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "ttl") + masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "chksum") + masked_exp_pkt.set_do_not_care_scapy(scapy.TCP, "chksum") + + send_packet(self, src_port, ipinip_pkt) + logging.info('Sent Ether(src={}, dst={})/IP(src={}, dst={}, proto={})/IP(src={}, dst={}, proto={})/TCP(sport={}, dport={} on port {})'\ + .format(ipinip_pkt.src, + ipinip_pkt.dst, + ipinip_pkt['IP'].src, + ipinip_pkt['IP'].dst, + ipinip_pkt['IP'].proto, + pkt['IP'].src, + pkt['IP'].dst, + pkt['IP'].proto, + sport, + dport, + src_port)) + + rcvd_port, rcvd_pkt = verify_packet_any_port(self, masked_exp_pkt, dst_port_list) + exp_src_mac = self.router_macs[self.ptf_test_port_map[str(dst_port_list[rcvd_port])]['target_dut']] + actual_src_mac = Ether(rcvd_pkt).src + if exp_src_mac != actual_src_mac: + raise Exception("Pkt sent from {} to {} on port {} was rcvd pkt on {} which is one of the expected ports, " + "but the src mac doesn't match, expected {}, got {}". + format(ip_src, ip_dst, src_port, dst_port_list[rcvd_port], exp_src_mac, actual_src_mac)) + return (rcvd_port, rcvd_pkt) + + def check_ipv6_route(self, hash_key, src_port, dst_port_list, outer_src_ip, outer_dst_ip): + ''' + @summary: Check IPv6 route works. + @param hash_key: hash key to build packet with. + @param in_port: index of port to use for sending packet to switch + @param dst_port_list: list of ports on which to expect packet to come back from the switch + @return Boolean + ''' + ip_src = self.src_ip_interval.get_random_ip() if hash_key == 'src-ip' else self.src_ip_interval.get_first_ip() + ip_dst = self.dst_ip_interval.get_random_ip() if hash_key == 'dst-ip' else self.dst_ip_interval.get_first_ip() + + sport = random.randint(0, 65535) if hash_key == 'src-port' else 1234 + dport = random.randint(0, 65535) if hash_key == 'dst-port' else 80 + + src_mac = (self.base_mac[:-5] + "%02x" % random.randint(0, 255) + ":" + "%02x" % random.randint(0, 255)) \ + if hash_key == 'src-mac' else self.base_mac + router_mac = self.ptf_test_port_map[str(src_port)]['target_mac'] + vlan_id = random.choice(self.vlan_ids) if hash_key == 'vlan-id' else 0 + ip_proto = self._get_ip_proto(ipv6=True) if hash_key == "ip-proto" else None + + inner_pkt_len = random.randrange(100, 1024) if hash_key == 'inner_length' else 100 + + pkt = simple_tcpv6_packet(pktlen=inner_pkt_len if vlan_id == 0 else inner_pkt_len + 4, + dl_vlan_enable=False if vlan_id == 0 else True, + vlan_vid=vlan_id, + vlan_pcp=0, + ipv6_dst=ip_dst, + ipv6_src=ip_src, + tcp_sport=sport, + tcp_dport=dport, + ipv6_hlim=64) + + ipinip_pkt = simple_ipv4ip_packet( + eth_dst=router_mac, + eth_src=src_mac, + ip_src=outer_src_ip, + ip_dst=outer_dst_ip, + inner_frame=pkt['IPv6']) + + exp_pkt = ipinip_pkt.copy() + exp_pkt['IP'].ttl -= 1 + + if hash_key == 'ip-proto': + ipinip_pkt['IP'].payload['IPv6'].nh = ip_proto + exp_pkt['IP'].payload['IPv6'].nh = ip_proto + + masked_exp_pkt = Mask(exp_pkt) + masked_exp_pkt.set_do_not_care_scapy(scapy.Ether, "src") + masked_exp_pkt.set_do_not_care_scapy(scapy.Ether,"dst") + # mask the chksum also if masking the ttl + if self.ignore_ttl: + masked_exp_pkt.set_do_not_care_scapy(scapy.IPv6, "hlim") + masked_exp_pkt.set_do_not_care_scapy(scapy.IPv6, "chksum") + masked_exp_pkt.set_do_not_care_scapy(scapy.TCP, "chksum") + + send_packet(self, src_port, ipinip_pkt) + logging.info('Sent Ether(src={}, dst={})/IP(src={}, dst={}, proto={})/IPv6(src={}, dst={}, proto={})/TCP(sport={}, dport={} on port {})'\ + .format(ipinip_pkt.src, + ipinip_pkt.dst, + ipinip_pkt['IP'].src, + ipinip_pkt['IP'].dst, + ipinip_pkt['IP'].proto, + pkt['IPv6'].src, + pkt['IPv6'].dst, + pkt['IPv6'].nh, + sport, + dport, + src_port)) + + rcvd_port, rcvd_pkt = verify_packet_any_port(self, masked_exp_pkt, dst_port_list) + exp_src_mac = self.router_macs[self.ptf_test_port_map[str(dst_port_list[rcvd_port])]['target_dut']] + actual_src_mac = Ether(rcvd_pkt).src + if exp_src_mac != actual_src_mac: + raise Exception("Pkt sent from {} to {} on port {} was rcvd pkt on {} which is one of the expected ports, " + "but the src mac doesn't match, expected {}, got {}". + format(ip_src, ip_dst, src_port, dst_port_list[rcvd_port], exp_src_mac, actual_src_mac)) + return (rcvd_port, rcvd_pkt) + + def check_ip_route(self, hash_key, src_port, dst_port_list, outer_src_ip, outer_dst_ip): + if self.ipver == 'ipv4': + (matched_index, received) = self.check_ipv4_route(hash_key, src_port, dst_port_list, outer_src_ip, outer_dst_ip) + else: + (matched_index, received) = self.check_ipv6_route(hash_key, src_port, dst_port_list, outer_src_ip, outer_dst_ip) + + assert received + + matched_port = dst_port_list[matched_index] + logging.info("Received packet at " + str(matched_port)) + time.sleep(0.02) + + return (matched_port, received) + + def check_hash(self, hash_key): + # Use dummy IPv4 address for outer_src_ip and outer_dst_ip + # We don't care the actually value as long as the outer_dst_ip is routed by default routed + # The outer_src_ip and outer_dst_ip are fixed + outer_src_ip = '80.1.0.31' + outer_dst_ip = '80.1.0.32' + src_port, exp_port_list, next_hop = self.get_src_and_exp_ports(outer_dst_ip) + + logging.info("outer_src_ip={}, outer_dst_ip={}, src_port={}, exp_port_list={}".format(outer_src_ip, outer_dst_ip, src_port, exp_port_list)) + if len(exp_port_list) <= 1: + logging.warning("{} has only {} nexthop".format(outer_dst_ip, exp_port_list)) + assert False + + hit_count_map = {} + if hash_key == 'ingress-port': + # The 'ingress-port' key is not used in hash by design. We are doing negative test for 'ingress-port'. + # When 'ingress-port' is included in HASH_KEYS, the PTF test will try to inject same packet to different + # ingress ports and expect that they are forwarded from same egress port. + for ingress_port in self.get_ingress_ports(exp_port_list, outer_dst_ip): + logging.info('Checking hash key {}, src_port={}, exp_ports={}, outer_src_ip={}, outer_dst_ip={}'\ + .format(hash_key, ingress_port, exp_port_list, outer_src_ip, outer_dst_ip)) + (matched_index, _) = self.check_ip_route(hash_key, ingress_port, exp_port_list, outer_src_ip, outer_dst_ip) + hit_count_map[matched_index] = hit_count_map.get(matched_index, 0) + 1 + logging.info("hit count map: {}".format(hit_count_map)) + assert True if len(hit_count_map.keys()) == 1 else False + elif hash_key == 'inner_length': + # The length of inner_frame is not used as hash key for IPinIP packet. + # The test generates IPinIP packets with random inner_frame_length, and then verify the egress path. + # The egress port should never change + for _ in range(0, self.balancing_test_times*len(exp_port_list)): + logging.info('Checking hash key {}, exp_ports={}, outer_src_ip={}, outer_dst_ip={}'\ + .format(hash_key, exp_port_list, outer_src_ip, outer_dst_ip)) + (matched_index, _) = self.check_ip_route(hash_key, src_port, exp_port_list, outer_src_ip, outer_dst_ip) + hit_count_map[matched_index] = hit_count_map.get(matched_index, 0) + 1 + logging.info("hit count map: {}".format(hit_count_map)) + assert True if len(hit_count_map.keys()) == 1 else False + else: + for _ in range(0, self.balancing_test_times*len(exp_port_list)): + logging.info('Checking hash key {}, src_port={}, exp_ports={}, outer_src_ip={}, outer_dst_ip={}'\ + .format(hash_key, src_port, exp_port_list, outer_src_ip, outer_dst_ip)) + (matched_index, _) = self.check_ip_route(hash_key, src_port, exp_port_list, outer_src_ip, outer_dst_ip) + hit_count_map[matched_index] = hit_count_map.get(matched_index, 0) + 1 + logging.info("hash_key={}, hit count map: {}".format(hash_key, hit_count_map)) + + self.check_balancing(next_hop.get_next_hop(), hit_count_map) + + def runTest(self): + """ + @summary: Send IPinIP packet for each range of both IPv4 and IPv6 spaces and + expect the packet to be received from one of the expected ports + """ for hash_key in self.hash_keys: logging.info("hash test hash_key: {}".format(hash_key)) self.check_hash(hash_key) diff --git a/ansible/roles/test/files/ptftests/ip_in_ip_tunnel_test.py b/ansible/roles/test/files/ptftests/ip_in_ip_tunnel_test.py index 94c0a04a268..15eb166b5f0 100644 --- a/ansible/roles/test/files/ptftests/ip_in_ip_tunnel_test.py +++ b/ansible/roles/test/files/ptftests/ip_in_ip_tunnel_test.py @@ -22,6 +22,12 @@ PACKET_NUM = 10000 # packet count for verifying traffic is not forwarded from standby tor to server directly PACKET_NUM_FOR_NEGATIVE_CHECK = 100 +# max times we can try for verifying balanced traffic +MAX_TIMES_CHECK = 100 +# basic packet count for verifying traffic is forwarded via IPinIP tunnel +BASIC_PACKET_NUM = 100 +# basic packet count for verifying traffic is not forwarded from standby tor to server directly +BASIC_PACKET_NUM_FOR_NEGATIVE_CHECK = 10 DIFF = 0.25 # The valid range for balance check SRC_IP_RANGE = [unicode('8.0.0.0'), unicode('8.255.255.255')] @@ -58,6 +64,7 @@ def setUp(self): self.hash_key_list = self.test_params['hash_key_list'] self.dataplane = ptf.dataplane_instance self.is_ipv4 = isinstance(ip_address(self.server_ip), IPv4Address) + self.completeness_level = self.test_params['completeness_level'] def runTest(self): """ @@ -149,6 +156,7 @@ def generate_expected_packet(self, inner_packet): exp_tunnel_pkt.set_do_not_care_scapy(scapy.IP, "id") # since src and dst changed, ID would change too exp_tunnel_pkt.set_do_not_care_scapy(scapy.IP, "ttl") # ttl in outer packet is set to 255 exp_tunnel_pkt.set_do_not_care_scapy(scapy.IP, "chksum") # checksum would differ as the IP header is not the same + exp_tunnel_pkt.set_do_not_care_scapy(scapy.IP, "flags") # DF bit may be set return exp_tunnel_pkt @@ -177,12 +185,14 @@ def generate_unexpected_packet(self, inner_pkt): return unexpected_packet - def check_balance(self, pkt_distribution, hash_key): + def check_balance(self, pkt_distribution, hash_key, run_times): + self.logger.info("For {} times: pkt_distribution={}".format(run_times, pkt_distribution)) portchannel_num = len(self.ptf_portchannel_indices) - expect_packet_num = PACKET_NUM / portchannel_num + expect_packet_num = ((PACKET_NUM//MAX_TIMES_CHECK)*run_times) / portchannel_num pkt_num_lo = expect_packet_num * (1.0 - DIFF) pkt_num_hi = expect_packet_num * (1.0 + DIFF) self.logger.info("hash key = {}".format(hash_key)) + self.logger.info("low threshold {} high threshold {}".format(pkt_num_lo, pkt_num_hi)) self.logger.info("%-10s \t %10s \t %10s \t" % ("port(s)", "exp_cnt", "act_cnt")) balance = True for portchannel, count in pkt_distribution.items(): @@ -190,8 +200,18 @@ def check_balance(self, pkt_distribution, hash_key): if count < pkt_num_lo or count > pkt_num_hi: balance = False if not balance: - print("Check balance failed for {}".format(hash_key)) - assert(balance) + self.logger.info("Check balance failed for {} in the {} times run".format(hash_key, run_times)) + return balance + + def check_received(self, pkt_distribution, hash_key): + not_received = True + for portchannel, count in pkt_distribution.items(): + self.logger.info("%-10s \t %10s \t %10s \t" % (portchannel, ">0", str(count))) + if count <= 0: + not_received = False + if not not_received: + self.logger.info("Check balance failed for {}".format(hash_key)) + assert(not_received) def send_and_verify_packets(self): """ @@ -201,7 +221,13 @@ def send_and_verify_packets(self): # Select the first ptf indice as src port src_port = dst_ports[0] # Step 1. verify no packet is received from standby_tor to server - for i in range(0, PACKET_NUM_FOR_NEGATIVE_CHECK): + + if self.completeness_level == "thorough": + negative_packet_num = PACKET_NUM_FOR_NEGATIVE_CHECK + else: + negative_packet_num = BASIC_PACKET_NUM_FOR_NEGATIVE_CHECK + self.logger.info("Verify {} negative packets.".format(negative_packet_num)) + for i in range(0, negative_packet_num): inner_pkt = self.generate_packet_to_server('src-ip') unexpected_packet = self.generate_unexpected_packet(inner_pkt) self.dataplane.flush() @@ -215,19 +241,49 @@ def send_and_verify_packets(self): for hash_key in self.hash_key_list: self.logger.info("Verifying traffic balance for hash key {}".format(hash_key)) pkt_distribution = {} - for i in range(0, PACKET_NUM): - inner_pkt = self.generate_packet_to_server(hash_key) - tunnel_pkt = self.generate_expected_packet(inner_pkt) - l3packet = inner_pkt.getlayer(IP) or inner_pkt.getlayer(IPv6) - self.logger.info("Sending packet dst_mac = {} src_mac = {} dst_ip = {} src_ip = {} from port {}" \ - .format(inner_pkt[Ether].dst, inner_pkt[Ether].src, l3packet.dst, l3packet.src, src_port)) - self.dataplane.flush() - send_packet(self, src_port, inner_pkt) - # Verify packet is received from IPinIP tunnel - idx, count = verify_packet_any_port(test=self, - pkt=tunnel_pkt, - ports=dst_ports, - device_number=0, - timeout=TIMEOUT) - pkt_distribution[self.indice_to_portchannel[dst_ports[idx]]] = pkt_distribution.get(self.indice_to_portchannel[dst_ports[idx]], 0) + 1 - self.check_balance(pkt_distribution, hash_key) + for port in self.ptf_portchannel_indices.keys(): + pkt_distribution[port] = 0 + # For thorough completeness level, verify PACKET_NUM packets + if self.completeness_level == "thorough": + self.logger.info("Verifying traffic balance on {} completeness level, send {} packets every time.".format(self.completeness_level, PACKET_NUM//MAX_TIMES_CHECK)) + for k in range(MAX_TIMES_CHECK): + for i in range(0, PACKET_NUM//MAX_TIMES_CHECK): + inner_pkt = self.generate_packet_to_server(hash_key) + tunnel_pkt = self.generate_expected_packet(inner_pkt) + l3packet = inner_pkt.getlayer(IP) or inner_pkt.getlayer(IPv6) + self.logger.info("Sending packet dst_mac = {} src_mac = {} dst_ip = {} src_ip = {} from port {}" \ + .format(inner_pkt[Ether].dst, inner_pkt[Ether].src, l3packet.dst, l3packet.src, src_port)) + self.dataplane.flush() + send_packet(self, src_port, inner_pkt) + # Verify packet is received from IPinIP tunnel + idx, count = verify_packet_any_port(test=self, + pkt=tunnel_pkt, + ports=dst_ports, + device_number=0, + timeout=TIMEOUT) + pkt_distribution[self.indice_to_portchannel[dst_ports[idx]]] = pkt_distribution.get(self.indice_to_portchannel[dst_ports[idx]], 0) + 1 + is_balance = self.check_balance(pkt_distribution, hash_key, k+1) + if is_balance: + self.logger.info("After verification for {} times, the traffic is balanced.".format(k+1)) + return + assert(is_balance) + # For other completeness level, just do basic check + # if receive any expected packet on every portchannel then pass + else: + self.logger.info("Verifying traffic on {} completeness level, send {} packets.".format(self.completeness_level, BASIC_PACKET_NUM)) + for i in range(0, BASIC_PACKET_NUM): + inner_pkt = self.generate_packet_to_server(hash_key) + tunnel_pkt = self.generate_expected_packet(inner_pkt) + l3packet = inner_pkt.getlayer(IP) or inner_pkt.getlayer(IPv6) + self.logger.info("Sending packet dst_mac = {} src_mac = {} dst_ip = {} src_ip = {} from port {}" \ + .format(inner_pkt[Ether].dst, inner_pkt[Ether].src, l3packet.dst, l3packet.src, src_port)) + self.dataplane.flush() + send_packet(self, src_port, inner_pkt) + # Verify packet is received from IPinIP tunnel + idx, count = verify_packet_any_port(test=self, + pkt=tunnel_pkt, + ports=dst_ports, + device_number=0, + timeout=TIMEOUT) + pkt_distribution[self.indice_to_portchannel[dst_ports[idx]]] = pkt_distribution.get(self.indice_to_portchannel[dst_ports[idx]], 0) + 1 + self.check_received(pkt_distribution, hash_key) diff --git a/ansible/roles/test/files/ptftests/pfc_pause_test.py b/ansible/roles/test/files/ptftests/pfc_pause_test.py index e4431558886..b35d048ce66 100755 --- a/ansible/roles/test/files/ptftests/pfc_pause_test.py +++ b/ansible/roles/test/files/ptftests/pfc_pause_test.py @@ -7,7 +7,6 @@ import socket import sys import struct -import ipaddress import re import ptf @@ -180,6 +179,16 @@ def runTest(self): send_packet(self, self.port_src, pkt, 1) send_packet(self, self.port_src, pkt_bg, 1) + if self.debug: + dump_msg = "Iteration {}:\n port_src: {} sport: {} dport: {} dscp: {} dscp_bg: {} vlan_id: {} \n".format(x, self.port_src, sport, dport, self.dscp, self.dscp_bg, self.vlan_id) + log_file.write(dump_msg) + + dump_msg = "Pkt:\n Hex dump: {}\n\n".format(sc.utils.hexstr(bytes(pkt))) + log_file.write(dump_msg) + + dump_msg = "pkt_bg:\n Hex dump: {}\n\n".format(sc.utils.hexstr(bytes(pkt_bg))) + log_file.write(dump_msg) + pkts = capture_matched_packets(self, masked_exp_pkt, self.port_dst) if self.debug: diff --git a/ansible/roles/test/files/ptftests/pfc_wd.py b/ansible/roles/test/files/ptftests/pfc_wd.py index 42e7234cfa1..1fa57b6b48b 100644 --- a/ansible/roles/test/files/ptftests/pfc_wd.py +++ b/ansible/roles/test/files/ptftests/pfc_wd.py @@ -4,7 +4,6 @@ import socket import sys import struct -import ipaddress import re import ptf diff --git a/ansible/roles/test/files/ptftests/vnet_vxlan.py b/ansible/roles/test/files/ptftests/vnet_vxlan.py index d519b0bb2e9..591e58077a6 100644 --- a/ansible/roles/test/files/ptftests/vnet_vxlan.py +++ b/ansible/roles/test/files/ptftests/vnet_vxlan.py @@ -23,9 +23,8 @@ from ptf.mask import Mask import datetime import subprocess -import ipaddress from pprint import pprint -from ipaddress import ip_address, ip_network +from ipaddress import ip_address, ip_network, IPv4Address, IPv6Address class VNET(BaseTest): def __init__(self): @@ -361,7 +360,7 @@ def FromVM(self, test): tcp_dport=5000) udp_sport = 1234 # Use entropy_hash(pkt) udp_dport = self.vxlan_port - if isinstance(ip_address(test['host']), ipaddress.IPv4Address): + if isinstance(ip_address(test['host']), IPv4Address): vxlan_pkt = simple_vxlan_packet( eth_dst=self.dut_mac, eth_src=self.random_mac, @@ -374,7 +373,7 @@ def FromVM(self, test): vxlan_vni=int(test['vni']), with_udp_chksum=False, inner_frame=pkt) - elif isinstance(ip_address(test['host']), ipaddress.IPv6Address): + elif isinstance(ip_address(test['host']), IPv6Address): vxlan_pkt = simple_vxlanv6_packet( eth_dst=self.dut_mac, eth_src=self.random_mac, @@ -452,7 +451,7 @@ def FromServer(self, test): tcp_dport=5000) udp_sport = 1234 # Use entropy_hash(pkt) udp_dport = self.vxlan_port - if isinstance(ip_address(test['host']), ipaddress.IPv4Address): + if isinstance(ip_address(test['host']), IPv4Address): encap_pkt = simple_vxlan_packet( eth_src=self.dut_mac, eth_dst=self.random_mac, @@ -466,7 +465,7 @@ def FromServer(self, test): vxlan_vni=vni, inner_frame=exp_pkt) encap_pkt[IP].flags = 0x2 - elif isinstance(ip_address(test['host']), ipaddress.IPv6Address): + elif isinstance(ip_address(test['host']), IPv6Address): encap_pkt = simple_vxlanv6_packet( eth_src=self.dut_mac, eth_dst=self.random_mac, @@ -484,7 +483,7 @@ def FromServer(self, test): masked_exp_pkt = Mask(encap_pkt) masked_exp_pkt.set_do_not_care_scapy(scapy.Ether, "src") masked_exp_pkt.set_do_not_care_scapy(scapy.Ether, "dst") - if isinstance(ip_address(test['host']), ipaddress.IPv4Address): + if isinstance(ip_address(test['host']), IPv4Address): masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "ttl") else: masked_exp_pkt.set_do_not_care_scapy(scapy.IPv6, "hlim") diff --git a/ansible/roles/test/files/ptftests/vxlan_traffic.py b/ansible/roles/test/files/ptftests/vxlan_traffic.py index fdefc99a3a8..c6d4bfa4e9a 100644 --- a/ansible/roles/test/files/ptftests/vxlan_traffic.py +++ b/ansible/roles/test/files/ptftests/vxlan_traffic.py @@ -1,45 +1,116 @@ -# ptf --test-dir ptftests vxlan_traffic.VXLAN --platform-dir ptftests --qlen=1000 --platform remote \ -# -t 't2_ports=[16, 17, 0, 1, 4, 5, 21, 20];dut_mac=u"64:3a:ea:c1:73:f8";expect_encap_success=True; \ -# vxlan_port=4789;topo_file="/tmp/vxlan_topo_file.json";config_file="/tmp/vxlan-config-TC1-v6_in_v4.json";t0_ports=[u"Ethernet42"]' --relax --debug info \ -# --log-file /tmp/vxlan-tests.TC1.v6_in_v4.log - -# The test checks vxlan encapsulation: -# The test runs three tests for each vlan on the DUT: -# 'test_encap' : Sends regular packets to T0-facing interface and expects to see the encapsulated packets on the T2-facing interfaces. -# -# The test has the following parameters: -# 1. 'config_file' is a filename of a file which contains all necessary information to run the test. The file is populated by ansible. This parameter is mandatory. - -import sys +# VxLAN Traffic Script, to be run in PTF container. Usage: +# ptf --test-dir ptftests vxlan_traffic.VXLAN --platform-dir ptftests +# --qlen=1000 --platform remote -t +# 't2_ports=[16, 17, 0, 1, 4, 5, 21, 20];dut_mac=u"64:3a:ea:c1:73:f8";\ +# expect_encap_success=True;packet_count=10;downed_endpoints=["100.0.1.10"]\ +# vxlan_port=4789;topo_file="/tmp/vxlan_topo_file.json";config_file=\ +# "/tmp/vxlan-config-TC1-v6_in_v4.json";t0_ports=[u"Ethernet42"];\ +# random_src_ip=False;random_dport=True;random_dport=False' --relax +# --debug info --log-file /tmp/vxlan-tests.TC1.v6_in_v4.log + +''' + The test checks vxlan encapsulation: + 'test_encap' : Sends regular packets to T0-facing interface and expects to + see the encapsulated packets on the T2-facing interfaces. + + The test has the following parameters: + config_file : is a filename of a file which contains all + necessary information to run the test. The file is + populated by ansible. This parameter is mandatory. + t2_ports : The list of PTF port indices facing T2 Neighbors, + AKA ports to expect the encapsulated packets to + come in. + dut_mac : The MAC address of the dut, given by "show + platform summary". + expect_encap_success : Is the encapsulation expected to succeed ? + True/False. + packet_count : Number of packets per endpoint to try. Default 10 + downned_endpoints : The list of IP addresses which are down, due to BFD + being disabled. + vxlan_port : The global VxLAN port setup in the DUT. + Default: 4789 + topo_file : The file that contains the topology information, + like minigraph data, connections, and so on. + t0_ports : The DUT intf into which we will inject payload + packets. + random_src_ip : Should we use random src IP addresses for the + payload packets? Default:False + random_dport : Should we use random dest port for the payload + packets? Default:True + random_sport : Should we use random src port for the payload + packets? Default:False +''' + import os.path import json +from datetime import datetime +import logging +import random +from ipaddress import ip_address, IPv4Address, IPv6Address import ptf import ptf.packet as scapy from ptf.base_tests import BaseTest -from ptf import config -from ptf.testutils import * -from ptf.dataplane import match_exp_pkt +from ptf.testutils import ( + simple_tcp_packet, + simple_tcpv6_packet, + simple_vxlan_packet, + simple_vxlanv6_packet, + verify_no_packet_any, + send_packet, + test_params_get, + dp_poll) from ptf.mask import Mask -import datetime -import subprocess -import ipaddress -from pprint import pprint -from ipaddress import ip_address -import random + VARS = {} VARS['tcp_sport'] = 1234 VARS['tcp_dport'] = 5000 +Logger = logging.getLogger(__name__) + # Some constants used in this code -TEST_ECN = False +MIN_PACKET_COUNT = 4 +MINIMUM_PACKETS_FOR_ECMP_VALIDATION = 300 +TEST_ECN = True -def get_incremental_value(key): +Address_Count = 0 + + +def get_ip_address(af, hostid=1, netid=100): + ''' + Get a new IP address to use based on the arguments. + hostid : The last octet in the Address. + netid : The first octet in the Address. + ''' + global Address_Count + third_octet = Address_Count % 255 + second_octet = (Address_Count / 255) % 255 + first_octet = netid + (Address_Count / 65025) + Address_Count = Address_Count + 1 + if af == 'v4': + return "{}.{}.{}.{}".format( + first_octet, second_octet, third_octet, hostid).decode() + if af == 'v6': + # :0: gets removed in the IPv6 addresses. + # Adding a to octets, to avoid it. + return "fddd:a{}:a{}::a{}:{}".format( + first_octet, second_octet, third_octet, hostid).decode() + +def get_incremental_value(key): + ''' + Global function to keep track of the tcp/udp port numbers used in + payload. + ''' global VARS - VARS[key] = VARS[key] + 1 + # We would like to use the ports from 1234 to 65535 + VARS[key] = max(1234, (VARS[key] + 1) % 65535) return VARS[key] + def read_ptf_macs(): + ''' + Get the list of mac addresses of all interfaces in the PTF. + ''' addrs = {} for intf in os.listdir('/sys/class/net'): if os.path.isdir('/sys/class/net/%s' % intf): @@ -48,17 +119,41 @@ def read_ptf_macs(): return addrs + class VXLAN(BaseTest): + ''' + Testcase for VxLAN. Currently implements encap testcase. + decap is TBD. + ''' def __init__(self): BaseTest.__init__(self) - self.DEFAULT_PKT_LEN = 100 def setUp(self): + ''' + Setup the internal structures for running the test. + 1. Parse the command line arguments. + 2. Load the configs from the input files. + 3. Ready the mapping of destination->nexthops. + ''' self.dataplane = ptf.dataplane_instance self.test_params = test_params_get() + self.random_src_ip = self.test_params['random_src_ip'] + self.random_dport = self.test_params['random_dport'] + self.random_sport = self.test_params['random_sport'] + self.tolerance = self.test_params['tolerance'] self.dut_mac = self.test_params['dut_mac'] self.vxlan_port = self.test_params['vxlan_port'] self.expect_encap_success = self.test_params['expect_encap_success'] + self.packet_count = self.test_params['packet_count'] + self.downed_endpoints = self.test_params['downed_endpoints'] + self.t2_ports = self.test_params['t2_ports'] + # The ECMP check fails occasionally if there is not enough packets. + # We should keep the packet count atleast MIN_PACKET_COUNT. + if self.packet_count < MIN_PACKET_COUNT: + Logger.warning( + "Packet_count is below minimum, resetting to %s", + MIN_PACKET_COUNT) + self.packet_count = MIN_PACKET_COUNT self.random_mac = "00:aa:bb:cc:dd:ee" self.ptf_mac_addrs = read_ptf_macs() @@ -68,7 +163,6 @@ def setUp(self): self.topo_data = json.load(fp) self.fill_loopback_ip() - self.t2_ports = self.test_params['t2_ports'] self.nbr_info = self.config_data['neighbors'] self.packets = [] self.dataplane.flush() @@ -76,121 +170,228 @@ def setUp(self): return def tearDown(self): + ''' + Close the packet capture file. + ''' if self.vxlan_enabled: json.dump(self.packets, open("/tmp/vnet_pkts.json", 'w')) return def fill_loopback_ip(self): - loop_config_data = self.topo_data['minigraph_facts']['minigraph_lo_interfaces'] + ''' + Get the DUT's Loopback ipv4 ipv6 addresses from minigraph. + ''' + loop_config_data = \ + self.topo_data['minigraph_facts']['minigraph_lo_interfaces'] for entry in loop_config_data: - if isinstance(ipaddress.ip_address(entry['addr']), ipaddress.IPv4Address): + if isinstance(ip_address(entry['addr']), IPv4Address): self.loopback_ipv4 = entry['addr'] - if isinstance(ipaddress.ip_address(entry['addr']), ipaddress.IPv6Address): + if isinstance(ip_address(entry['addr']), IPv6Address): self.loopback_ipv6 = entry['addr'] def runTest(self): + ''' + Main code of this script. + Run the encap test for every destination, and its nexthops. + ''' + mg_facts = self.topo_data['minigraph_facts'] for t0_intf in self.test_params['t0_ports']: - # find the list of neigh addresses for the t0_ports. For each neigh address(Addr1): - # for each destination address(Addr2) in the same Vnet as t0_intf, + # find the list of neigh addresses for the t0_ports. + # For each neigh address(Addr1): + # For each destination address(Addr2) in the same Vnet as t0_intf, # send traffic from Add1 to it. If there - # are multiple nexthops for the Addr2, then send that many different - # streams(different tcp ports). + # are multiple nexthops for the Addr2, then send that + # many different streams(different tcp ports). neighbors = [self.config_data['neighbors'][t0_intf]] - ptf_port = self.topo_data['minigraph_facts']['minigraph_ptf_indices'][t0_intf] + ptf_port = mg_facts['minigraph_ptf_indices'][t0_intf] vnet = self.config_data['vnet_intf_map'][t0_intf] - vni = self.config_data['vnet_vni_map'][vnet] + vni = self.config_data['vnet_vni_map'][vnet] for addr in neighbors: - for destination,nh in self.config_data['dest_to_nh_map'][vnet].iteritems(): - self.test_encap(ptf_port, vni, addr, destination, nh, test_ecn=TEST_ECN) - - def cmd(self, cmds): - process = subprocess.Popen(cmds, - shell=False, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - stdout, stderr = process.communicate() - return_code = process.returncode - - return stdout, stderr, return_code - - def read_ptf_macs(self): - addrs = {} - for intf in os.listdir('/sys/class/net'): - if os.path.isdir('/sys/class/net/%s' % intf): - with open('/sys/class/net/%s/address' % intf) as fp: - addrs[intf] = fp.read().strip() - - return addrs - - def test_encap(self, ptf_port, vni, ptf_addr, destination, nhs, test_ecn=False, vlan=0): - rv = True + for destination, nexthops in \ + self.config_data['dest_to_nh_map'][vnet].iteritems(): + self.test_encap( + ptf_port, + vni, + addr, + destination, + nexthops, + test_ecn=TEST_ECN, + random_dport=self.random_dport, + random_sport=self.random_sport, + random_src_ip=self.random_src_ip) + + def verify_all_addresses_used_equally(self, + nhs, + returned_ip_addresses, + packet_count, + downed_endpoints=[]): + ''' + Verify the ECMP functionality using 2 checks. + Check 1 verifies every nexthop address has been used. + Check 2 verifies the distribution of number of packets among the + nexthops. + Params: + nhs : the nexthops that are configured. + returned_ip_addresses : The dict containing the nh addresses + and corresponding packet counts. + ''' + + if downed_endpoints: + for down_endpoint in downed_endpoints: + if down_endpoint in nhs: + nhs.remove(down_endpoint) + if down_endpoint in returned_ip_addresses: + raise RuntimeError( + "We received traffic with a downed endpoint({}), " + "unexpected.".format(down_endpoint)) + + # Check #1 : All addresses have been used, except the downed ones. + if set(nhs) - set(returned_ip_addresses.keys()) == set([]): + Logger.info(" Each valid endpoint address has been used") + Logger.info("Packets sent:%s distribution:", packet_count) + for nh_address in returned_ip_addresses.keys(): + Logger.info(" %s : %s", + nh_address, + returned_ip_addresses[nh_address]) + # Check #2 : The packets are almost equally distributed. + # Every next-hop should have received within {tolerance}% of the + # packets that we sent per nexthop(which is packet_count). This + # check is valid only if there are large enough number of + # packets(300). Any lower number will need higher + # tolerance(more than 2%). + if packet_count > MINIMUM_PACKETS_FOR_ECMP_VALIDATION: + for nh_address in returned_ip_addresses.keys(): + if (1.0-self.tolerance) * packet_count <= \ + returned_ip_addresses[nh_address] <= \ + (1.0+self.tolerance) * packet_count: + pass + else: + raise RuntimeError( + "ECMP nexthop address: {} received too less or too" + " many of the packets expected. Expected:{}, " + "received on that address:{}".format( + nh_address, + packet_count, + returned_ip_addresses[nh_address])) + else: + raise RuntimeError( + "Not all addresses were used. Here are the unused ones:{}," + "expected:{}, got:{}".format( + set(nhs) - set(returned_ip_addresses.keys()), + nhs, + returned_ip_addresses)) + + def test_encap( + self, + ptf_port, + vni, + ptf_addr, + destination, + nhs, + test_ecn=False, + random_dport=True, + random_sport=False, + random_src_ip=False): + ''' + Test the encapsulation of packets works correctly. + 1. Send a TCP packet to the DUT port. + 2. Verify that the DUT returns an encapsulated packet correctly. + 3. Optionally: Perform if the ECMP is working(all nexthops are used + equally). + ''' try: - pkt_len = self.DEFAULT_PKT_LEN - if 'vlan' != 0: - tagged = True - pkt_len += 4 - else: - tagged = False - - - options = {'ip_tos' : 0} - options_v6 = {'ipv6_tc' : 0} + pkt_len = 100 + + options = {'ip_ecn': 0} + options_v6 = {'ipv6_ecn': 0} if test_ecn: - options = {'ip_tos' : random.randint(0, 3)} - options_v6 = {'ipv6_tos' : random.randint(0, 3)} + ecn = random.randint(0, 3) + options = {'ip_ecn': ecn} + options_v6 = {'ipv6_ecn': ecn} - # ECMP support, assume it is a string of comma seperated list of addresses. - returned_ip_addresses = {} + # ECMP support, assume it is a string of comma seperated list of + # addresses. check_ecmp = False - for host_address in nhs: + working_nhs = list(set(nhs) - set(self.downed_endpoints)) + expect_success = self.expect_encap_success + test_nhs = working_nhs + packet_count = self.packet_count + if not working_nhs: + # Since there is no NH that is up for this destination, + # we can't expect success here. + expect_success = False + test_nhs = nhs + # Also reduce the packet count, since this script has to wait + # 1 second per packet(1000 packets is 20 minutes). + packet_count = 4 + returned_ip_addresses = {} + for host_address in test_nhs: check_ecmp = True # This will ensure that every nh is used atleast once. - for i in range(4): - tcp_sport = get_incremental_value('tcp_sport') - tcp_dport = 5000 - valid_combination = True - if isinstance(ip_address(destination), ipaddress.IPv4Address) and isinstance(ip_address(ptf_addr), ipaddress.IPv4Address): + Logger.info( + "Sending %s packets from port %s to %s", + packet_count, + str(ptf_port), + destination) + for _ in range(packet_count): + if random_sport: + tcp_sport = get_incremental_value('tcp_sport') + else: + tcp_sport = VARS['tcp_sport'] + if random_dport: + tcp_dport = get_incremental_value('tcp_dport') + else: + tcp_dport = VARS['tcp_dport'] + if isinstance(ip_address(destination), IPv4Address) and \ + isinstance(ip_address(ptf_addr), IPv4Address): + if random_src_ip: + ptf_addr = get_ip_address( + "v4", hostid=3, netid=170) pkt_opts = { "pktlen": pkt_len, "eth_dst": self.dut_mac, "eth_src": self.ptf_mac_addrs['eth%d' % ptf_port], - "ip_dst":destination, - "ip_src":ptf_addr, - "ip_id":105, - "ip_ttl":64, - "tcp_sport":tcp_sport, - "tcp_dport":tcp_dport} + "ip_dst": destination, + "ip_src": ptf_addr, + "ip_id": 105, + "ip_ttl": 64, + "tcp_sport": tcp_sport, + "tcp_dport": tcp_dport} pkt_opts.update(options) pkt = simple_tcp_packet(**pkt_opts) pkt_opts['ip_ttl'] = 63 pkt_opts['eth_src'] = self.dut_mac exp_pkt = simple_tcp_packet(**pkt_opts) - elif isinstance(ip_address(destination), ipaddress.IPv6Address) and isinstance(ip_address(ptf_addr), ipaddress.IPv6Address): + elif isinstance(ip_address(destination), IPv6Address) and \ + isinstance(ip_address(ptf_addr), IPv6Address): + if random_src_ip: + ptf_addr = get_ip_address( + "v6", hostid=4, netid=170) pkt_opts = { - "pktlen":pkt_len, - "eth_dst":self.dut_mac, - "eth_src":self.ptf_mac_addrs['eth%d' % ptf_port], - "ipv6_dst":destination, - "ipv6_src":ptf_addr, - "ipv6_hlim":64, - "tcp_sport":tcp_sport, - "tcp_dport":tcp_dport} + "pktlen": pkt_len, + "eth_dst": self.dut_mac, + "eth_src": self.ptf_mac_addrs['eth%d' % ptf_port], + "ipv6_dst": destination, + "ipv6_src": ptf_addr, + "ipv6_hlim": 64, + "tcp_sport": tcp_sport, + "tcp_dport": VARS['tcp_dport']} pkt_opts.update(options_v6) pkt = simple_tcpv6_packet(**pkt_opts) pkt_opts['ipv6_hlim'] = 63 - pkt_opts['eth_dst'] = self.dut_mac pkt_opts['eth_src'] = self.dut_mac exp_pkt = simple_tcpv6_packet(**pkt_opts) else: - valid_combination = False - print("Unusable combination:src:{} and dst:{}".format(src, destination)) - udp_sport = 1234 # Use entropy_hash(pkt), it will be ignored in the test later. + raise RuntimeError( + "Invalid mapping of destination and PTF address.") + udp_sport = 1234 # it will be ignored in the test later. udp_dport = self.vxlan_port - if isinstance(ip_address(host_address), ipaddress.IPv4Address): + if isinstance(ip_address(host_address), IPv4Address): encap_pkt = simple_vxlan_packet( eth_src=self.dut_mac, eth_dst=self.random_mac, ip_id=0, + ip_ihl=5, ip_src=self.loopback_ipv4, ip_dst=host_address, ip_ttl=128, @@ -198,9 +399,10 @@ def test_encap(self, ptf_port, vni, ptf_addr, destination, nhs, test_ecn=False, udp_dport=udp_dport, with_udp_chksum=False, vxlan_vni=vni, - inner_frame=exp_pkt) - encap_pkt[IP].flags = 0x2 - elif isinstance(ip_address(host_address), ipaddress.IPv6Address): + inner_frame=exp_pkt, + **options) + encap_pkt[scapy.IP].flags = 0x2 + elif isinstance(ip_address(host_address), IPv6Address): encap_pkt = simple_vxlanv6_packet( eth_src=self.dut_mac, eth_dst=self.random_mac, @@ -210,54 +412,106 @@ def test_encap(self, ptf_port, vni, ptf_addr, destination, nhs, test_ecn=False, udp_dport=udp_dport, with_udp_chksum=False, vxlan_vni=vni, - inner_frame=exp_pkt) - send_packet(self, ptf_port, str(pkt), count=2) + inner_frame=exp_pkt, + **options_v6) + send_packet(self, ptf_port, str(pkt)) + + # After we sent all packets, wait for the responses. + if expect_success: + wait_timeout = 2 + loop_timeout = max(packet_count * 5, 1000) # milliseconds + start_time = datetime.now() + vxlan_count = 0 + Logger.info("Loop time:out %s milliseconds", loop_timeout) + while (datetime.now() - start_time).total_seconds() *\ + 1000 < loop_timeout and vxlan_count < packet_count: + result = dp_poll( + self, timeout=wait_timeout + ) + if isinstance(result, self.dataplane.PollSuccess): + if not isinstance( + result, self.dataplane.PollSuccess) or \ + result.port not in self.t2_ports or \ + "VXLAN" not in scapy.Ether(result.packet): + continue + else: + vxlan_count += 1 + scapy_pkt = scapy.Ether(result.packet) + # Store every destination that was received. + if isinstance( + ip_address(host_address), IPv6Address): + dest_ip = scapy_pkt['IPv6'].dst + else: + dest_ip = scapy_pkt['IP'].dst + try: + returned_ip_addresses[dest_ip] = \ + returned_ip_addresses[dest_ip] + 1 + except KeyError: + returned_ip_addresses[dest_ip] = 1 + else: + Logger.info("No packet came in %s seconds", + wait_timeout) + break + if not vxlan_count or not returned_ip_addresses: + raise RuntimeError( + "Didnot get any reply for this destination:{}" + " Its active endpoints:{}".format( + destination, test_nhs)) + Logger.info( + "Vxlan packets received:%s, loop time:%s " + "seconds", vxlan_count, + (datetime.now() - start_time).total_seconds()) + Logger.info("received = {}".format(returned_ip_addresses)) + + else: + check_ecmp = False + Logger.info("Verifying no packet") masked_exp_pkt = Mask(encap_pkt) + masked_exp_pkt.set_ignore_extra_bytes() masked_exp_pkt.set_do_not_care_scapy(scapy.Ether, "src") masked_exp_pkt.set_do_not_care_scapy(scapy.Ether, "dst") - if isinstance(ip_address(host_address), ipaddress.IPv4Address): + if isinstance(ip_address(host_address), IPv4Address): masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "ttl") - masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "chksum") + masked_exp_pkt.set_do_not_care_scapy(scapy.IP, + "chksum") masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "dst") else: - masked_exp_pkt.set_do_not_care_scapy(scapy.IPv6, "hlim") - masked_exp_pkt.set_do_not_care_scapy(scapy.IPv6, "chksum") - masked_exp_pkt.set_do_not_care_scapy(scapy.IPv6, "dst") - masked_exp_pkt.set_do_not_care_scapy(scapy.UDP, "sport") - masked_exp_pkt.set_do_not_care_scapy(scapy.UDP, "chksum") - - logging.info("Sending packet from port " + str(ptf_port) + " to " + destination) - - if self.expect_encap_success: - status, received_pkt = verify_packet_any_port(self, masked_exp_pkt, self.t2_ports) - scapy_pkt = Ether(received_pkt) - # Store every destination that was received. - if isinstance(ip_address(host_address), ipaddress.IPv6Address): - dest_ip = scapy_pkt['IPv6'].dst - else: - dest_ip = scapy_pkt['IP'].dst - try: - returned_ip_addresses[dest_ip] = returned_ip_addresses[dest_ip] + 1 - except KeyError: - returned_ip_addresses[dest_ip] = 1 + masked_exp_pkt.set_do_not_care_scapy(scapy.IPv6, + "hlim") + masked_exp_pkt.set_do_not_care_scapy(scapy.IPv6, + "chksum") + masked_exp_pkt.set_do_not_care_scapy(scapy.IPv6, + "dst") + masked_exp_pkt.set_do_not_care_scapy(scapy.UDP, + "sport") + masked_exp_pkt.set_do_not_care_scapy(scapy.UDP, + "chksum") - else: - check_ecmp = False - print ("Verifying no packet") - verify_no_packet_any(self, masked_exp_pkt, self.t2_ports) + try: + verify_no_packet_any( + self, + masked_exp_pkt, + self.t2_ports) + except BaseException: + raise RuntimeError( + "Verify_no_packet failed. Args:ports:{} sent:{}\n," + "expected:{}\n, encap_pkt:{}\n".format( + self.t2_ports, + repr(pkt), + repr(exp_pkt), + repr(encap_pkt))) # Verify ECMP: if check_ecmp: - if set(nhs) - set(returned_ip_addresses.keys()) == set([]): - print ("Each address has been used") - else: - raise RuntimeError('''ECMP might have failed for:{}, we expected every ip address in the nexthop group({} of them) - to be used, but only {} are used:\nUsed addresses:{}\nUnused Addresses:{}'''.format(destination, - len(nhs), len(returned_ip_addresses.keys()), - returned_ip_addresses.keys(), set(nhs)-set(returned_ip_addresses.keys()))) + self.verify_all_addresses_used_equally( + nhs, + returned_ip_addresses, + packet_count, + self.downed_endpoints) + pkt.load = '0' * 60 + str(len(self.packets)) self.packets.append((ptf_port, str(pkt).encode("base64"))) finally: - print + Logger.info("") diff --git a/ansible/roles/test/files/ptftests/wr_arp.py b/ansible/roles/test/files/ptftests/wr_arp.py index 1eb3b6c0065..ab061b3447a 100644 --- a/ansible/roles/test/files/ptftests/wr_arp.py +++ b/ansible/roles/test/files/ptftests/wr_arp.py @@ -21,10 +21,11 @@ import ptf from ptf.base_tests import BaseTest from ptf import config +from ptf.mask import Mask import ptf.dataplane as dataplane import ptf.testutils as testutils from device_connection import DeviceConnection - +import ipaddress class ArpTest(BaseTest): def __init__(self): @@ -102,11 +103,20 @@ def dut_thr(self, q_from, q_to): def test_port_thr(self): self.log("test_port_thr started") - while time.time() < self.stop_at: + while True: for test in self.tests: + self.log("Looping through tests: {}".format(test)) for port in test['acc_ports']: + if time.time() > self.stop_at: + break nr_rcvd = self.testPort(port) self.records[port][time.time()] = nr_rcvd + else: + continue + break + else: + continue + break self.log("Quiting from test_port_thr") return @@ -145,13 +155,15 @@ def generate_VlanPrefixes(self, gw, prefixlen, acc_ports): return res - def generatePkts(self, gw, port_ip, port_mac): + def generatePkts(self, gw, port_ip, port_mac, vlan_id): pkt = testutils.simple_arp_packet( ip_snd=port_ip, ip_tgt=gw, eth_src=port_mac, hw_snd=port_mac, - ) + vlan_vid=vlan_id + ) + exp_pkt = testutils.simple_arp_packet( ip_snd=gw, ip_tgt=port_ip, @@ -160,9 +172,12 @@ def generatePkts(self, gw, port_ip, port_mac): hw_snd=self.dut_mac, hw_tgt=port_mac, arp_op=2, + vlan_vid=vlan_id ) - - return str(pkt), str(exp_pkt) + masked_exp_pkt = Mask(exp_pkt) + # Ignore the Ethernet padding zeros + masked_exp_pkt.set_ignore_extra_bytes() + return pkt, masked_exp_pkt def generatePackets(self): self.gen_pkts = {} @@ -171,7 +186,12 @@ def generatePackets(self): gw = test['vlan_gw'] port_ip = test['vlan_ip_prefixes'][port] port_mac = self.ptf_mac_addrs['eth%d' % port] - self.gen_pkts[port] = self.generatePkts(gw, port_ip, port_mac) + tagging_mode = test['tagging_mode'][port] + if tagging_mode == 'tagged': + vlan_id = test['vlan_id'] + else: + vlan_id = 0 + self.gen_pkts[port] = self.generatePkts(gw, port_ip, port_mac, vlan_id) return @@ -208,24 +228,34 @@ def setUp(self): self.tests = [] vni_base = 0 - for name, data in graph['minigraph_vlans'].items(): + for vlan, config in graph['vlan_facts'].items(): test = {} - test['acc_ports'] = [graph['minigraph_port_indices'][member] for member in data['members']] - vlan_id = int(name.replace('Vlan', '')) - test['vni'] = vni_base + vlan_id + test['acc_ports'] = [] + test['tagging_mode'] = {} + for member, mode in config['members'].items(): + ptf_port_idx = graph['minigraph_port_indices'][member] + test['acc_ports'].append(ptf_port_idx) + test['tagging_mode'].update( + { + ptf_port_idx: mode['tagging_mode'] + } + ) + test['vlan_id'] = int(config['vlanid']) + test['vni'] = vni_base + test['vlan_id'] - gw = None prefixlen = None - for d in graph['minigraph_vlan_interfaces']: - if d['attachto'] == name: - gw = d['addr'] + for d in config['interfaces']: + if sys.version_info < (3, 0): + ip = ipaddress.ip_address(d['addr'].decode('utf8')) + else: + ip = ipaddress.ip_address(d['addr']) + if ip.version == 4: + test['vlan_gw'] = d['addr'] prefixlen = int(d['prefixlen']) + test['vlan_ip_prefixes'] = self.generate_VlanPrefixes(d['addr'], prefixlen, test['acc_ports']) break else: - raise Exception("Vlan '%s' is not found" % name) - - test['vlan_gw'] = gw - test['vlan_ip_prefixes'] = self.generate_VlanPrefixes(gw, prefixlen, test['acc_ports']) + raise Exception("No invalid IPv4 address found for Vlan '%s'" % vlan) self.tests.append(test) @@ -277,9 +307,9 @@ def runTest(self): test_port_thr.join(timeout=self.how_long) if test_port_thr.isAlive(): - self.log("Timed out waiting for warm reboot") + self.log("Timed out waiting for traffic-sender (test_port_thr thread)") self.req_dut('quit') - self.assertTrue(False, "Timed out waiting for warm reboot") + self.assertTrue(False, "Timed out waiting for traffic-sender (test_port_thr thread)") uptime_after = self.req_dut('uptime') if uptime_after.startswith('error'): diff --git a/ansible/roles/test/files/tools/loganalyzer/loganalyzer.py b/ansible/roles/test/files/tools/loganalyzer/loganalyzer.py index f6e7e9d4631..d111da0e973 100644 --- a/ansible/roles/test/files/tools/loganalyzer/loganalyzer.py +++ b/ansible/roles/test/files/tools/loganalyzer/loganalyzer.py @@ -45,6 +45,8 @@ err_no_start_marker = -4 err_invalid_string_format = -5 err_invalid_input = -6 +err_end_ignore_marker = -7 +err_start_ignore_marker = -8 class AnsibleLogAnalyzer: ''' @@ -76,6 +78,8 @@ class AnsibleLogAnalyzer: start_marker_prefix = "start-LogAnalyzer" end_marker_prefix = "end-LogAnalyzer" + start_ignore_marker_prefix = "start-ignore-LogAnalyzer" + end_ignore_marker_prefix = "end-ignore-LogAnalyzer" def init_sys_logger(self): logger = logging.getLogger('LogAnalyzer') @@ -129,6 +133,14 @@ def create_end_marker(self): return self.end_marker_prefix + "-" + self.run_id #--------------------------------------------------------------------- + def create_start_ignore_marker(self): + return self.start_ignore_marker_prefix + "-" + self.run_id + #--------------------------------------------------------------------- + + def create_end_ignore_marker(self): + return self.end_ignore_marker_prefix + "-" + self.run_id + #--------------------------------------------------------------------- + def flush_rsyslogd(self): ''' @summary: flush all remaining buffer in rsyslogd to disk @@ -194,30 +206,28 @@ def wait_for_marker(self, marker, timeout=60, polling_interval=10): last_check_pos = 0 syslog_file = "/var/log/syslog" prev_syslog_file = "/var/log/syslog.1" - last_dt = os.path.getctime(syslog_file) while wait_time <= timeout: - with open(syslog_file, 'r') as fp: - dt = os.path.getctime(syslog_file) - if last_dt != dt: - try: - with open(prev_syslog_file, 'r') as pfp: - pfp.seek(last_check_pos) - for l in fp: - if marker in l: - return True - except FileNotFoundError: - print("cannot find file {}".format(prev_syslog_file)) - last_check_pos = 0 - last_dt = dt - # resume from last search position - if last_check_pos: - fp.seek(last_check_pos) - # check if marker in the file - for l in fp: - if marker in l: - return True - # record last search position - last_check_pos = fp.tell() + # look for marker in syslog file + if os.path.exists(syslog_file): + with open(syslog_file, 'r') as fp: + # resume from last search position + if last_check_pos: + fp.seek(last_check_pos) + # check if marker in the file + for l in fp: + if marker in l: + return True + # record last search position + last_check_pos = fp.tell() + + # logs might get rotated while waiting for marker + # look for marker in syslog.1 file + if os.path.exists(prev_syslog_file): + with open(prev_syslog_file, 'r') as pfp: + # check if marker in the file + for l in pfp: + if marker in l: + return True time.sleep(polling_interval) wait_time += polling_interval @@ -421,11 +431,12 @@ def analyze_file(self, log_file_path, match_messages_regex, ignore_messages_rege start_marker = self.create_start_marker() end_marker = self.create_end_marker() + ignore_marker_run_ids = [] for rev_line in reversed(log_file.readlines()): if stdin_as_input: in_analysis_range = True else: - if rev_line.find(end_marker) != -1: + if end_marker in rev_line: self.print_diagnostic_message('found end marker: %s' % end_marker) if (found_end_marker): print('ERROR: duplicate end marker found') @@ -433,6 +444,26 @@ def analyze_file(self, log_file_path, match_messages_regex, ignore_messages_rege found_end_marker = True in_analysis_range = True continue + elif self.end_ignore_marker_prefix in rev_line: + marker_run_id = rev_line.split(self.end_ignore_marker_prefix)[1] + ignore_marker_run_ids.append(marker_run_id) + self.print_diagnostic_message('found end ignore marker: %s' + % rev_line[rev_line.index(self.end_ignore_marker_prefix):]) + if not in_analysis_range: + print('ERROR: duplicate end ignore marker found') + sys.exit(err_end_ignore_marker) + in_analysis_range = False + continue + + elif self.start_ignore_marker_prefix in rev_line: + marker_run_id = ignore_marker_run_ids.pop() + self.print_diagnostic_message('found start ignore marker: %s' + % rev_line[rev_line.index(self.start_ignore_marker_prefix):]) + if in_analysis_range or marker_run_id not in rev_line: + print('ERROR: unexpected start ignore marker found') + sys.exit(err_start_ignore_marker) + in_analysis_range = True + continue if not stdin_as_input: if rev_line.find(start_marker) != -1 and 'extract_log' not in rev_line: @@ -544,11 +575,9 @@ def check_action(action, log_files_in, out_dir, match_files_in, ignore_files_in, ret_code = True - if (action == 'init'): - ret_code = True - elif (action == 'add_end_marker'): + if action in ['init', 'add_end_marker', 'add_start_ignore_mark', 'add_end_ignore_mark']: ret_code = True - elif (action == 'analyze'): + elif action == 'analyze': if out_dir is None or len(out_dir) == 0: print('ERROR: missing required out_dir for analyze action') ret_code = False @@ -735,10 +764,10 @@ def main(argv): log_file_list = list(filter(None, log_files_in.split(tokenizer))) result = {} - if (action == "init"): + if action == "init": analyzer.place_marker(log_file_list, analyzer.create_start_marker()) return 0 - elif (action == "analyze"): + elif action == "analyze": match_file_list = match_files_in.split(tokenizer) ignore_file_list = ignore_files_in.split(tokenizer) expect_file_list = expect_files_in.split(tokenizer) @@ -758,9 +787,16 @@ def main(argv): unused_regex_messages = [] write_result_file(run_id, out_dir, result, messages_regex_e, unused_regex_messages) write_summary_file(run_id, out_dir, result, unused_regex_messages) - elif (action == "add_end_marker"): + elif action == "add_end_marker": analyzer.place_marker(log_file_list, analyzer.create_end_marker(), wait_for_marker=True) return 0 + elif action == "add_start_ignore_mark": + analyzer.place_marker(log_file_list, analyzer.create_start_ignore_marker(), wait_for_marker=True) + return 0 + elif action == "add_end_ignore_mark": + analyzer.place_marker(log_file_list, analyzer.create_end_ignore_marker(), wait_for_marker=True) + return 0 + else: print('Unknown action:%s specified' % action) diff --git a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_ignore.txt b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_ignore.txt index ca0e47ee512..45927ef5e76 100644 --- a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_ignore.txt +++ b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_ignore.txt @@ -18,3 +18,137 @@ r, ".* WARNING syncd#SDK:.* check_attribs_metadata: Not implemented attribute SA r, ".* WARNING syncd#SDK:.* check_attribs_metadata: Not supported attribute SAI_SWITCH_ATTR_AVAILABLE_IPMC_ENTRY.*" r, ".* WARNING syncd#SDK:.* sai_get_attributes: Failed attribs check.*" r, ".* ERR swss#buffermgrd:.* doSpeedUpdateTask: Unable to create/update PG profile for port Ethernet(128|132|256|260).*" +r, ".* WARNING kernel.*probe of intel-spi failed with error.*" +r, ".* WARNING syncd#syncd.*discover.*skipping since it causes crash.*" +r, ".* ERR swss#buffermgrd:.* doTask: Failed to process invalid entry, drop it.*" +r, ".* ERR syncd#SDK: :- setQueueCounterList: Queue RID oid:.* can't provide the statistic.*" +r, ".* INFO kernel:.*" +r, ".* INFO systemd.*" +r, ".* ERR kernel:.* Module gpio_ich is blacklisted.*" + +# White list below messages found on KVM for now. Need to address them later. +r, ".* ERR macsec#wpa_supplicant.*l2_packet_send.*Network is down.*" +r, ".* ERR systemd.*Failed to start dhcp_relay container.*" +r, ".* ERR monit.* 'rsyslog' failed to get service data.*" +r, ".* ERR monit.* 'rsyslog' process is not running.*" +r, ".* ERR ntpd.*syntax error.*" +r, ".* ERR teamd#tlm_teamd.*Can't get dump for LAG.*" +r, ".* ERR bgp#root: Error: zebra is not ready to accept connections.*" +r, ".* ERR bgp#/supervisor-proc-exit-listener: Process .* is not running in namespace.*" +r, ".* ERR syncd#syncd.*collectMACsecSACounters: Failed to get stats of MACsec SA.*" +r, ".* ERR syncd#syncd.*meta_sai_validate_oid.*" +r, ".* ERR syncd#syncd.*meta_validate_stats: object.*don't exists.*" +r, ".* ERR syncd#syncd.*translateVidToRid: unable to get RID for VID.*" +r, ".* ERR dhcp_relay.*setsockopt.*No such device.*" + +##### White list below messages found on physical devices for now. Need to address them later. + +# https://dev.azure.com/msazure/One/_workitems/edit/14233578 +r, ".* ERR bgp#bgpcfgd: .*BGPSLBPassive.*attribute is supported.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233579 +r, ".* ERR bgp#bgpcfgd: .*BGPVac.*attribute is supported.*" + +# https://msazure.visualstudio.com/One/_workitems/edit/14233938 +r, ".* ERR swss#fdbsyncd: :- readData: netlink reports an error=-25 on reading a netlink socket.*" +r, ".* ERR swss#fdbsyncd: :- readData: netlink reports an error=-33 on reading a netlink socket.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14213168 +r, ".* ERR /hostcfgd: sonic-kdump-config --disable - failed.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233968 +r, ".* ERR lldp#lldp-syncd.*Could not infer system information from.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233547 +r, ".* ERR mux#linkmgrd: link_manager/LinkManagerStateMachine.*LinkProberWaitMuxUnknownLinkUpTransitionFunction.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233548 +r, ".* ERR mux#linkmgrd: link_manager/LinkManagerStateMachine.*Received unsolicited MUX state.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233594 +r, ".* ERR monit.*Unix socket /var/run/monit.sock connection error.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233936 +r, ".* ERR pmon#thermalctld.*Caught exception while running thermal policy - AttributeError.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233549 +r, ".* ERR pmon#ycable.*executing the cli for prbs thread.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233550 +r, ".* ERR pmon#ycable.*URLError.*No route to host.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233937 +r, ".* ERR rsyslogd: omfwd.*Network is unreachable.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233551 +r, ".* ERR swss#buffermgrd.*doSpeedUpdateTask: Unable to create/update PG profile for port.*No PG profile configured for speed.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233939 +r, ".* ERR swss#orchagent.*applyDscpToTcMapToSwitch: Switch level DSCP to TC QoS map configuration is not supported.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233552 +r, ".* ERR swss#orchagent.*getPort.*Failed to get bridge port ID for FDB entry.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233940 +r, ".* ERR swss#orchagent.*getResAvailability: Failed to get availability counter for.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233553 +r, ".* ERR swss#orchagent.*handleSaiGetStatus: Encountered failure in get operation.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233554 +r, ".* ERR swss#orchagent.*handleSaiSetStatus: Encountered failure in set operation.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233555 +r, ".* ERR swss#orchagent.*meta_generic_validation_get.*object key.*doesn't exist.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233556 +r, ".* ERR swss#orchagent.*removeLag.*Failed to remove ref count.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233557 +r, ".* ERR swss#orchagent.*set status.*SAI_STATUS_.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233558 +r, ".* ERR swss#orchagent.*setState: State transition from active to active is not-handled.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233559 +r, ".* ERR swss#orchagent.*setState: State transition from standby to standby is not-handled.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233560 +r, ".* ERR syncd#syncd.*check_fdb_event_notification_data.*is not present on local ASIC DB.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233562 +r, ".* ERR syncd#syncd.*getSaiAttrFromDefaultValue.*serialization type.*is not supported yet.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233941 +r, ".* ERR syncd#syncd.*getSupportedBufferPoolCounters.*is not supported on buffer pool oid.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233570 +r, ".* ERR syncd#syncd.*SAI_API_PORT:brcm_sai_get_port_stats_ext.*Ext Stat Get failed.*" +r, ".* ERR syncd#syncd.*SAI_API_PORT:brcm_sai_get_port_stats.*Multi stats get failed with error.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233568 +r, ".* ERR syncd#syncd.*SAI_API_BUFFER.*Unknown or unsupported stat type.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233942 +r, ".* ERR syncd#syncd.*SAI_API_SWITCH.*Error in capability query for obj type.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233574 +r, ".* INFO systemd.*Finished Kernel crash dump capture service.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233575 +r, ".* INFO systemd.*Starting Kernel crash dump capture service.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14233609 +r, ".*ERR syncd[0-9]*#syncd.*updateSupportedBufferPoolCounters.*BUFFER_POOL_WATERMARK_STAT_COUNTER.*counter SAI_BUFFER_POOL_STAT_XOFF_ROOM_WATERMARK_BYTES is not supported on buffer pool.*SAI_STATUS_INVALID_PARAMETER.*" + +# https://dev.azure.com/msazure/One/_workitems/edit/14482841 +r, ".* ERR dhcp_relay#dhcpmon.*Invalid number of interfaces, downlink/south 1, uplink/north 0.*" + +# https://github.com/sonic-net/sonic-swss/pull/2401 +r, ".* ERR swss#orchagent: :- update: Unsupported FDB Flush: .*" + +# https://msazure.visualstudio.com/One/_workitems/edit/16110065 +r, ".* ERR kernel:.* Set it down before adding it as a team port.*" + +# https://msazure.visualstudio.com/One/_workitems/edit/25018599 +r, ".* ERROR: Failed to parse lldp age.*" diff --git a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_match.txt b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_match.txt index 1ad44be455f..31f10688770 100644 --- a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_match.txt +++ b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_match.txt @@ -1,4 +1,4 @@ -r, "\.ERR", "\.WARN", "crash" +r, "\.ERR", "crash" r, "kernel:.*Oops", "kernel:.*hung", "kernel.*oom\s" r, "kernel:.*scheduling", "kernel:.*atomic", "kernel:.*panic" r, "kernel:.*\serr", "kernel:.*allocation", "kernel:.*kill" diff --git a/ansible/roles/test/tasks/snmp/memory.yml b/ansible/roles/test/tasks/snmp/memory.yml index 81d97f44b4d..95d2ed61a2d 100644 --- a/ansible/roles/test/tasks/snmp/memory.yml +++ b/ansible/roles/test/tasks/snmp/memory.yml @@ -46,7 +46,7 @@ - name: 'Validating SNMP total memory matches shell "/proc/meminfo" result' assert: - that: "{{ (ansible_sysTotalMemery|int - shell_total_memory.stdout|int)|abs }} == 0" + that: "{{ (ansible_sysTotalMemory|int - shell_total_memory.stdout|int)|abs }} == 0" - name: Check total free memory via shell shell: grep MemFree /proc/meminfo | awk '{print $2}' @@ -55,11 +55,11 @@ - name: Validating SNMP total free memory matches shell result before stress test assert: - that: "{{ '%.6f'|format((ansible_sysTotalFreeMemery|int - shell_total_free_memory.stdout|int)|abs / ansible_sysTotalFreeMemery|int) }} <= {{ tolerance|float }}" + that: "{{ '%.6f'|format((ansible_sysTotalFreeMemory|int - shell_total_free_memory.stdout|int)|abs / ansible_sysTotalFreeMemory|int) }} <= {{ tolerance|float }}" - - set_fact: test_momory="{{ ((ansible_sysTotalFreeMemery - min_memory_size|int) / 1024)|int }}" + - set_fact: test_momory="{{ ((ansible_sysTotalFreeMemory - min_memory_size|int) / 1024)|int }}" - - debug: msg="Memory stress test will skip since system total free momory({{ ansible_sysTotalFreeMemery }}) is less than min_memory_size({{ min_memory_size }})" + - debug: msg="Memory stress test will skip since system total free momory({{ ansible_sysTotalFreeMemory }}) is less than min_memory_size({{ min_memory_size }})" when: test_momory|int <= 0 - name: Start memory stress generation @@ -84,7 +84,7 @@ - name: Validating SNMP total free memory matches shell result in stress test assert: - that: "{{ '%.6f'|format((ansible_sysTotalFreeMemery|int - shell_total_free_memory.stdout|int)|abs / ansible_sysTotalFreeMemery|int) }} <= {{ tolerance|float }}" + that: "{{ '%.6f'|format((ansible_sysTotalFreeMemory|int - shell_total_free_memory.stdout|int)|abs / ansible_sysTotalFreeMemory|int) }} <= {{ tolerance|float }}" when: test_momory|int > 0 - name: Stop memory stress generation @@ -110,7 +110,7 @@ - name: Validating SNMP total free memory matches shell result after stress test assert: - that: "{{ '%.6f'|format((ansible_sysTotalFreeMemery|int - shell_total_free_memory.stdout|int)|abs / ansible_sysTotalFreeMemery|int) }} <= {{ tolerance|float }}" + that: "{{ '%.6f'|format((ansible_sysTotalFreeMemory|int - shell_total_free_memory.stdout|int)|abs / ansible_sysTotalFreeMemory|int) }} <= {{ tolerance|float }}" when: test_momory|int > 0 always: diff --git a/ansible/roles/test/vars/testcases.yml b/ansible/roles/test/vars/testcases.yml index 1385901213d..de6b7132626 100644 --- a/ansible/roles/test/vars/testcases.yml +++ b/ansible/roles/test/vars/testcases.yml @@ -15,7 +15,7 @@ testcases: ecmp: filename: ecmp.yml topologies: [t1] - + bgp_bounce: filename: bgp_bounce.yml topologies: [t1] @@ -259,7 +259,7 @@ testcases: reboot: filename: reboot.yml - topologies: [t0, t0-52, t0-56, t0-64, t0-64-32, t0-116, t0-120, t1, t1-lag, t1-64-lag, t1-64-lag-clet, ptf32, ptf64] + topologies: [t0, t0-52, t0-56, t0-56-po2vlan, t0-64, t0-64-32, t0-116, t0-120, t1, t1-lag, t1-64-lag, t1-64-lag-clet, t1-56-lag, ptf32, ptf64] repeat_harness: filename: repeat_harness.yml diff --git a/ansible/roles/vm_set/files/mux_simulator.py b/ansible/roles/vm_set/files/mux_simulator.py index 3e86015b084..2d095a95eda 100644 --- a/ansible/roles/vm_set/files/mux_simulator.py +++ b/ansible/roles/vm_set/files/mux_simulator.py @@ -11,6 +11,7 @@ import sys import threading import traceback +import time from collections import defaultdict from logging.handlers import RotatingFileHandler @@ -42,6 +43,8 @@ app = Flask(__name__) g_muxes = None # Global variable holding instance of the class Muxes +g_get_mux_counter = 0 +g_start_time = time.time() ################################################## Error Handlers #################################################### @@ -664,12 +667,23 @@ def mux_status(vm_set, port_index): Returns: object: Return a flask response object. """ + global g_get_mux_counter + global g_start_time _validate_vm_set(vm_set) if not g_muxes.has_mux(port_index): abort(404, 'Unknown bridge, vm_set={}, port_index={}'.format(vm_set, port_index)) if request.method == 'GET': - return g_muxes.get_mux_status(port_index) + response = g_muxes.get_mux_status(port_index) + g_get_mux_counter += 1 + elapsed_time = time.time() - g_start_time + if elapsed_time > 60 or g_get_mux_counter % 100 == 0: + app.logger.info('===== No.{} GET method since last log ====='.format(g_get_mux_counter)) + app.logger.info('===== {} GET {} with port {} ====='.format(request.remote_addr, request.url, port_index)) + app.logger.info('===== GET port {} with response {} ====='.format(port_index, response)) + g_get_mux_counter = 0 + g_start_time = time.time() + return response elif request.method == 'POST': # Set the active side of mux data = _validate_posted_data(request) @@ -694,7 +708,10 @@ def all_mux_status(vm_set): """ _validate_vm_set(vm_set) if request.method == 'GET': - return g_muxes.get_mux_status() + response = g_muxes.get_mux_status() + app.logger.info('===== {} GET {} with all ports ====='.format(request.remote_addr, request.url)) + app.logger.info('===== GET all ports with response {} ====='.format(response)) + return response elif request.method == 'POST': # Set the active side for all mux bridges data = _validate_posted_data(request) diff --git a/ansible/roles/vm_set/library/vm_topology.py b/ansible/roles/vm_set/library/vm_topology.py index 433510fe78c..7b9a292ef6b 100644 --- a/ansible/roles/vm_set/library/vm_topology.py +++ b/ansible/roles/vm_set/library/vm_topology.py @@ -235,7 +235,7 @@ def __init__(self, vm_names, vm_properties, fp_mtu, max_fp_num, topo): self.topo = topo return - def init(self, vm_set_name, vm_base, duts_fp_ports, duts_name, ptf_exists=True): + def init(self, vm_set_name, vm_base, duts_fp_ports, duts_name, ptf_exists=True, check_bridge=True): self.vm_set_name = vm_set_name self.duts_name = duts_name @@ -254,12 +254,13 @@ def init(self, vm_set_name, vm_base, duts_fp_ports, duts_name, ptf_exists=True): for k, v in self.topo['VMs'].items(): if self.vm_base_index + v['vm_offset'] < len(self.vm_names): self.VMs[k] = v - - for hostname, attrs in self.VMs.items(): - vmname = self.vm_names[self.vm_base_index + attrs['vm_offset']] - vm_bridges = self.get_vm_bridges(vmname) - if len(attrs['vlans']) > len(vm_bridges): - raise Exception("Wrong vlans parameter for hostname %s, vm %s. Too many vlans. Maximum is %d" % (hostname, vmname, len(vm_bridges))) + if check_bridge: + for hostname, attrs in self.VMs.items(): + vmname = self.vm_names[self.vm_base_index + attrs['vm_offset']] + vm_bridges = self.get_vm_bridges(vmname) + if len(attrs['vlans']) > len(vm_bridges): + raise Exception("Wrong vlans parameter for hostname %s, vm %s. Too many vlans. Maximum is %d" \ + % (hostname, vmname, len(vm_bridges))) self._is_multi_duts = True if len(self.duts_name) > 1 else False # For now distinguish a cable topology since it does not contain any vms and there are two ToR's @@ -645,12 +646,13 @@ def bind_vs_dut_ports(self, br_name, dut_ports): def unbind_vs_dut_ports(self, br_name, dut_ports): """unbind all ports except the vm port from an ovs bridge""" - ports = VMTopology.get_ovs_br_ports(br_name) - for dut_index, a_port in enumerate(dut_ports): - dut_name = self.duts_name[dut_index] - port_name = "{}-{}".format(dut_name, (a_port + 1)) - if port_name in ports: - VMTopology.cmd('ovs-vsctl del-port %s %s' % (br_name, port_name)) + if VMTopology.intf_exists(br_name): + ports = VMTopology.get_ovs_br_ports(br_name) + for dut_index, a_port in enumerate(dut_ports): + dut_name = self.duts_name[dut_index] + port_name = "{}-{}".format(dut_name, (a_port + 1)) + if port_name in ports: + VMTopology.cmd('ovs-vsctl del-port %s %s' % (br_name, port_name)) def bind_ovs_ports(self, br_name, dut_iface, injected_iface, vm_iface, disconnect_vm=False): """ @@ -697,18 +699,35 @@ def bind_ovs_ports(self, br_name, dut_iface, injected_iface, vm_iface, disconnec VMTopology.cmd("ovs-ofctl add-flow %s table=0,in_port=%s,action=output:%s" % (br_name, dut_iface_id, injected_iface_id)) else: # Add flow from external iface to a VM and a ptf container - VMTopology.cmd("ovs-ofctl add-flow %s table=0,in_port=%s,action=output:%s,%s" % (br_name, dut_iface_id, vm_iface_id, injected_iface_id)) + # Allow BGP, IPinIP, fragmented packets, ICMP, SNMP packets and layer2 packets from DUT to neighbors + # Block other traffic from DUT to EOS for EOS's stability, + # Allow all traffic from DUT to PTF. + VMTopology.cmd("ovs-ofctl add-flow %s table=0,priority=10,tcp,in_port=%s,tp_src=179,action=output:%s,%s" % (br_name, dut_iface_id, vm_iface_id, injected_iface_id)) + VMTopology.cmd("ovs-ofctl add-flow %s table=0,priority=10,tcp,in_port=%s,tp_dst=179,action=output:%s,%s" % (br_name, dut_iface_id, vm_iface_id, injected_iface_id)) + VMTopology.cmd("ovs-ofctl add-flow %s table=0,priority=10,tcp6,in_port=%s,tp_src=179,action=output:%s,%s" % (br_name, dut_iface_id, vm_iface_id, injected_iface_id)) + VMTopology.cmd("ovs-ofctl add-flow %s table=0,priority=10,tcp6,in_port=%s,tp_dst=179,action=output:%s,%s" % (br_name, dut_iface_id, vm_iface_id, injected_iface_id)) + VMTopology.cmd("ovs-ofctl add-flow %s table=0,priority=10,ip,in_port=%s,nw_proto=4,action=output:%s,%s" % (br_name, dut_iface_id, vm_iface_id, injected_iface_id)) + VMTopology.cmd("ovs-ofctl add-flow %s table=0,priority=8,ip,in_port=%s,nw_frag=yes,action=output:%s,%s" % (br_name, dut_iface_id, vm_iface_id, injected_iface_id)) + VMTopology.cmd("ovs-ofctl add-flow %s table=0,priority=8,ipv6,in_port=%s,nw_frag=yes,action=output:%s,%s" % (br_name, dut_iface_id, vm_iface_id, injected_iface_id)) + VMTopology.cmd("ovs-ofctl add-flow %s table=0,priority=8,icmp,in_port=%s,action=output:%s,%s" % (br_name, dut_iface_id, vm_iface_id, injected_iface_id)) + VMTopology.cmd("ovs-ofctl add-flow %s table=0,priority=8,icmp6,in_port=%s,action=output:%s,%s" % (br_name, dut_iface_id, vm_iface_id, injected_iface_id)) + VMTopology.cmd("ovs-ofctl add-flow %s table=0,priority=8,udp,in_port=%s,udp_src=161,action=output:%s,%s" % (br_name, dut_iface_id, vm_iface_id, injected_iface_id)) + VMTopology.cmd("ovs-ofctl add-flow %s table=0,priority=8,udp6,in_port=%s,udp_src=161,action=output:%s,%s" % (br_name, dut_iface_id, vm_iface_id, injected_iface_id)) + VMTopology.cmd("ovs-ofctl add-flow %s table=0,priority=5,ip,in_port=%s,action=output:%s" % (br_name, dut_iface_id, injected_iface_id)) + VMTopology.cmd("ovs-ofctl add-flow %s table=0,priority=5,ipv6,in_port=%s,action=output:%s" % (br_name, dut_iface_id, injected_iface_id)) + VMTopology.cmd("ovs-ofctl add-flow %s table=0,priority=3,in_port=%s,action=output:%s,%s" % (br_name, dut_iface_id, vm_iface_id, injected_iface_id)) # Add flow from a ptf container to an external iface VMTopology.cmd("ovs-ofctl add-flow %s table=0,in_port=%s,action=output:%s" % (br_name, injected_iface_id, dut_iface_id)) def unbind_ovs_ports(self, br_name, vm_port): """unbind all ports except the vm port from an ovs bridge""" - ports = VMTopology.get_ovs_br_ports(br_name) + if VMTopology.intf_exists(br_name): + ports = VMTopology.get_ovs_br_ports(br_name) - for port in ports: - if port != vm_port: - VMTopology.cmd('ovs-vsctl del-port %s %s' % (br_name, port)) + for port in ports: + if port != vm_port: + VMTopology.cmd('ovs-vsctl del-port %s %s' % (br_name, port)) def unbind_ovs_port(self, br_name, port): """unbind a port from an ovs bridge""" @@ -999,7 +1018,9 @@ def cmd(cmdline, grep_cmd=None, retry=1, negative=False): continue # Reached max retry, fail with exception - raise Exception('ret_code=%d, error message="%s". cmd="%s"' % (ret_code, err, cmdline)) + err_msg = 'ret_code=%d, error message="%s". cmd="%s%s"' \ + % (ret_code, err, cmdline, ' | ' + grep_cmd if grep_cmd else '') + raise Exception(err_msg) @staticmethod def get_ovs_br_ports(bridge): @@ -1331,7 +1352,7 @@ def main(): vm_base = None vm_type = module.params['vm_type'] - net.init(vm_set_name, vm_base, duts_fp_ports, duts_name) + net.init(vm_set_name, vm_base, duts_fp_ports, duts_name, check_bridge=False) if module.params['duts_mgmt_port']: for dut_mgmt_port in module.params['duts_mgmt_port']: diff --git a/ansible/roles/vm_set/tasks/add_topo.yml b/ansible/roles/vm_set/tasks/add_topo.yml index 8178aa8693d..99483ddf1be 100644 --- a/ansible/roles/vm_set/tasks/add_topo.yml +++ b/ansible/roles/vm_set/tasks/add_topo.yml @@ -164,8 +164,8 @@ capabilities: - net_admin privileged: yes - memory: 8G - memory_swap: 8G + memory: 16G + memory_swap: 32G become: yes - name: Enable ipv6 for docker container ptf_{{ vm_set_name }} diff --git a/ansible/roles/vm_set/tasks/announce_routes.yml b/ansible/roles/vm_set/tasks/announce_routes.yml index 23626f421c8..fa341c93bd4 100644 --- a/ansible/roles/vm_set/tasks/announce_routes.yml +++ b/ansible/roles/vm_set/tasks/announce_routes.yml @@ -18,73 +18,196 @@ groups: - ptf_host -- name: Set facts +- name: Set default exabgp_action set_fact: - ptf_local_ipv4: "{{ configuration_properties.common.nhipv4|default('10.10.246.254') }}" - ptf_local_ipv6: "{{ configuration_properties.common.nhipv6|default('fc0a::ff') }}" - -- name: (Re)start exabgp processes for IPv4 on PTF - exabgp: - name: "{{ vm_item.key }}" - state: "restarted" - router_id: "{{ ptf_local_ipv4 }}" - local_ip: "{{ ptf_local_ipv4 }}" - peer_ip: "{{ configuration[vm_item.key].bp_interface.ipv4.split('/')[0] }}" - local_asn: "{{ configuration[vm_item.key].bgp.asn }}" - peer_asn: "{{ configuration[vm_item.key].bgp.asn }}" - port: "{{ 5000 + vm_item.value.vm_offset|int }}" - async: 300 - poll: 0 - loop: "{{ topology['VMs']|dict2items }}" - loop_control: - loop_var: vm_item - delegate_to: "{{ ptf_host }}" - -- name: (Re)start exabgp processes for IPv6 on PTF - exabgp: - name: "{{ vm_item.key }}-v6" - state: "restarted" - router_id: "{{ ptf_local_ipv4 }}" - local_ip: "{{ ptf_local_ipv6 }}" - peer_ip: "{{ configuration[vm_item.key].bp_interface.ipv6.split('/')[0] }}" - local_asn: "{{ configuration[vm_item.key].bgp.asn }}" - peer_asn: "{{ configuration[vm_item.key].bgp.asn }}" - port: "{{ 6000 + vm_item.value.vm_offset|int }}" - async: 300 - poll: 0 - loop: "{{ topology['VMs']|dict2items }}" - loop_control: - loop_var: vm_item - delegate_to: "{{ ptf_host }}" - -- name: Wait for exabgp to (re)start - pause: - seconds: "{{ 2 * topology['VMs']|dict2items|length }}" - -- name: Verify that exabgp processes for IPv4 are started - wait_for: - host: "{{ ptf_host_ip }}" - port: "{{ 5000 + topology.VMs[vm_item.key].vm_offset|int }}" - state: "started" - timeout: 180 - loop: "{{ topology['VMs']|dict2items }}" - loop_control: - loop_var: vm_item - delegate_to: localhost + exabgp_action: start + when: exabgp_action is not defined + +- block: + - name: Set facts + set_fact: + ptf_local_ipv4: "{{ configuration_properties.common.nhipv4|default('10.10.246.254') }}" + ptf_local_ipv6: "{{ configuration_properties.common.nhipv6|default('fc0a::ff') }}" + + - name: Configure exabgp processes for IPv4 on PTF + exabgp: + name: "{{ vm_item.key }}" + state: "configure" + router_id: "{{ ptf_local_ipv4 }}" + local_ip: "{{ ptf_local_ipv4 }}" + peer_ip: "{{ configuration[vm_item.key].bp_interface.ipv4.split('/')[0] }}" + local_asn: "{{ configuration[vm_item.key].bgp.asn }}" + peer_asn: "{{ configuration[vm_item.key].bgp.asn }}" + port: "{{ 5000 + vm_item.value.vm_offset|int }}" + loop: "{{ topology['VMs']|dict2items }}" + loop_control: + loop_var: vm_item + delegate_to: "{{ ptf_host }}" + + - name: Gather exabgp v4 programs + set_fact: + program_group_name: "exabgpv4" + program_group_programs: "{{ topology['VMs'].keys() | map('regex_replace', '(.*)', 'exabgp-\\1') | join(',')}}" + + - name: Configure exabgpv4 group + template: + src: "roles/vm_set/templates/exabgp.conf.j2" + dest: "/etc/supervisor/conf.d/exabgpv4.conf" + delegate_to: "{{ ptf_host }}" + + - name: configure exabgp processes for IPv6 on PTF + exabgp: + name: "{{ vm_item.key }}-v6" + state: "configure" + router_id: "{{ ptf_local_ipv4 }}" + local_ip: "{{ ptf_local_ipv6 }}" + peer_ip: "{{ configuration[vm_item.key].bp_interface.ipv6.split('/')[0] }}" + local_asn: "{{ configuration[vm_item.key].bgp.asn }}" + peer_asn: "{{ configuration[vm_item.key].bgp.asn }}" + port: "{{ 6000 + vm_item.value.vm_offset|int }}" + loop: "{{ topology['VMs']|dict2items }}" + loop_control: + loop_var: vm_item + delegate_to: "{{ ptf_host }}" + + - name: Gather exabgp v6 programs + set_fact: + program_group_name: "exabgpv6" + program_group_programs: "{{ topology['VMs'].keys() | map('regex_replace', '(.*)', 'exabgp-\\1-v6') | join(',')}}" + + - name: Configure exabgpv6 group + template: + src: "roles/vm_set/templates/exabgp.conf.j2" + dest: "/etc/supervisor/conf.d/exabgpv6.conf" + delegate_to: "{{ ptf_host }}" + + - name: Add exabgpv4 supervisor config and start related processes + supervisorctl: + name: "exabgpv4:" + state: present # present contains `supervisorctl reread` and `supervisorctl add` + delegate_to: "{{ ptf_host }}" + + - name: Add exabgpv6 supervisor config and start related processes + supervisorctl: + name: "exabgpv6:" + state: present # present contains `supervisorctl reread` and `supervisorctl add` + delegate_to: "{{ ptf_host }}" + + - name: Verify that exabgp processes for IPv4 are started + wait_for: + host: "{{ ptf_host_ip }}" + port: "{{ 5000 + topology.VMs[vm_item.key].vm_offset|int }}" + state: "started" + timeout: 180 + loop: "{{ topology['VMs']|dict2items }}" + loop_control: + loop_var: vm_item + delegate_to: localhost + + - name: Verify that exabgp processes for IPv6 are started + wait_for: + host: "{{ ptf_host_ip }}" + port: "{{ 6000 + topology.VMs[vm_item.key].vm_offset|int }}" + state: "started" + timeout: 180 + loop: "{{ topology['VMs']|dict2items }}" + loop_control: + loop_var: vm_item + delegate_to: localhost -- name: Verify that exabgp processes for IPv6 are started + - name: Announce routes + announce_routes: + topo_name: "{{ topo }}" + ptf_ip: "{{ ptf_host_ip }}" + delegate_to: localhost + when: exabgp_action == 'start' + +- name: Check if ptf is accessible wait_for: host: "{{ ptf_host_ip }}" - port: "{{ 6000 + topology.VMs[vm_item.key].vm_offset|int }}" - state: "started" - timeout: 180 - loop: "{{ topology['VMs']|dict2items }}" - loop_control: - loop_var: vm_item + port: 22 + timeout: 3 + register: ptf_accessible + ignore_errors: true delegate_to: localhost -- name: Announce routes - announce_routes: - topo_name: "{{ topo }}" - ptf_ip: "{{ ptf_host_ip }}" - delegate_to: localhost +- name: Check and stop exabgp processes on PTF + block: + - name: Check exabgp processes for IPv4 running on PTF + shell: "supervisorctl status exabgpv4:* | grep RUNNING | wc -l" + register: exabgpv4_running + delegate_to: "{{ ptf_host }}" + + - name: Stop exabgp processes for IPv4 on PTF + supervisorctl: + name: "exabgpv4:" + state: stopped + delegate_to: "{{ ptf_host }}" + when: exabgpv4_running.stdout|int > 0 + + - name: Check exabgp processes for IPv6 running on PTF + shell: "supervisorctl status exabgpv6:* | grep RUNNING | wc -l" + register: exabgpv6_running + delegate_to: "{{ ptf_host }}" + + - name: Stop exabgp processes for IPv6 on PTF + supervisorctl: + name: "exabgpv6:" + state: stopped + delegate_to: "{{ ptf_host }}" + when: exabgpv6_running.stdout|int > 0 + + - name: Check and stop exabgp processes on PTF (for old naming convention) + block: + - name: Check if exabgp processes running on PTF (for old naming convention) + shell: "supervisorctl status | grep RUNNING | grep ^exabgp-.* | wc -l" + register: exabgp_running_old + delegate_to: "{{ ptf_host }}" + + - name: Stop exabgp processes for IPv4 on PTF (for old naming convention) + exabgp: + name: "{{ vm_item.key }}" + state: "stopped" + loop: "{{ topology['VMs']|dict2items }}" + loop_control: + loop_var: vm_item + delegate_to: "{{ ptf_host }}" + when: exabgp_running_old.stdout|int > 0 + + - name: Stop exabgp processes for IPv6 on PTF (for old naming convention) + exabgp: + name: "{{ vm_item.key }}-v6" + state: "stopped" + loop: "{{ topology['VMs']|dict2items }}" + loop_control: + loop_var: vm_item + delegate_to: "{{ ptf_host }}" + when: exabgp_running_old.stdout|int > 0 + + - name: Get count of exabgp processes running on PTF + shell: "supervisorctl status | grep RUNNING | grep ^exabgp.* | wc -l" + register: exabgp_running + delegate_to: "{{ ptf_host }}" + + - name: Verify no exabgp processes running on PTF + assert: + that: exabgp_running.stdout|int == 0 + fail_msg: "exabgp processes are still running on PTF, please check manually" + + - name: Stop all processes managed by supervisor on PTF + block: + - name: Stop all processes managed by supervisor on PTF + shell: "supervisorctl stop all" + delegate_to: "{{ ptf_host }}" + + - name: Get count of running processes managed by supervisor + shell: "supervisorctl status | grep RUNNING | wc -l" + register: supervisor_proc_running + delegate_to: "{{ ptf_host }}" + + - name: Verify all the processes managed by supervisor are not running + assert: + that: supervisor_proc_running.stdout|int == 0 + fail_msg: "There are still processes managed by supervisor running on PTF, please check manually" + + when: exabgp_action == 'stop' and ptf_accessible is defined and not ptf_accessible.failed diff --git a/ansible/roles/vm_set/tasks/control_mux_simulator.yml b/ansible/roles/vm_set/tasks/control_mux_simulator.yml index a8c3614e86b..b9ec5d8431e 100644 --- a/ansible/roles/vm_set/tasks/control_mux_simulator.yml +++ b/ansible/roles/vm_set/tasks/control_mux_simulator.yml @@ -11,8 +11,19 @@ - name: Start mux simulator block: + - name: Set default Flask version + set_fact: + flask_version: "1.1.2" + python_command: "python" + + - name: Use newer Flask version for pip3 + set_fact: + flask_version: "2.0.3" + python_command: "python3" + when: pip_executable == "pip3" + - name: Install flask - pip: name=flask version=2.0.3 state=forcereinstall executable={{ pip_executable }} + pip: name=flask version={{ flask_version }} state=forcereinstall executable={{ pip_executable }} become: yes environment: "{{ proxy_env | default({}) }}" diff --git a/ansible/roles/vm_set/tasks/docker.yml b/ansible/roles/vm_set/tasks/docker.yml index e9158eba253..e80a59d38f7 100644 --- a/ansible/roles/vm_set/tasks/docker.yml +++ b/ansible/roles/vm_set/tasks/docker.yml @@ -47,17 +47,51 @@ become: yes when: host_distribution_version.stdout == "20.04" and docker_repo.matched == 0 -- name: Install docker-ce - apt: pkg=docker-ce update_cache=yes +- name: Add docker repository for 22.04 + apt_repository: + repo: deb [arch=amd64] https://download.docker.com/linux/ubuntu jammy stable + state: present become: yes - environment: "{{ proxy_env | default({}) }}" + when: host_distribution_version.stdout == "22.04" and docker_repo.matched == 0 -- name: remove old python packages - pip: name=docker-py state=absent executable={{ pip_executable }} +# In ansible 2.8, there isn't update_cache_retries option in apt module, we can manually run update as a seperate and retryable step +- name: Run the "apt-get update" as a separate and retryable step + apt: + update_cache: yes become: yes environment: "{{ proxy_env | default({}) }}" + register: apt_update_res + until: apt_update_res.cache_updated is defined and apt_update_res.cache_updated + retries: 5 + delay: 10 -- name: Install python packages - pip: name=docker version=4.1.0 state=forcereinstall executable={{ pip_executable }} +- name: Install docker-ce + apt: pkg=docker-ce become: yes environment: "{{ proxy_env | default({}) }}" + +- name: Update python2 packages + block: + - name: remove old python packages + pip: name=docker-py state=absent executable={{ pip_executable }} + become: yes + environment: "{{ proxy_env | default({}) }}" + ignore_errors: yes + - name: Install python packages + pip: name=docker version=4.1.0 state=forcereinstall executable={{ pip_executable }} + become: yes + environment: "{{ proxy_env | default({}) }}" + when: pip_executable=="pip" + +- name: Update python3 packages + block: + - name: remove old python packages + pip: name=docker-py state=absent executable={{ pip_executable }} + become: yes + environment: "{{ proxy_env | default({}) }}" + ignore_errors: yes + - name: Install python packages + pip: name=docker version=6.1.0 state=forcereinstall executable={{ pip_executable }} + become: yes + environment: "{{ proxy_env | default({}) }}" + when: pip_executable=="pip3" diff --git a/ansible/roles/vm_set/tasks/main.yml b/ansible/roles/vm_set/tasks/main.yml index 7d5b4475dac..a0cebb89459 100644 --- a/ansible/roles/vm_set/tasks/main.yml +++ b/ansible/roles/vm_set/tasks/main.yml @@ -111,6 +111,11 @@ pip_executable: pip3 when: pip_executable is not defined and host_distribution_version.stdout == "20.04" +- name: remove old python packages + pip: name=docker-py state=absent executable={{ pip_executable }} + environment: "{{ proxy_env | default({}) }}" + ignore_errors: yes + - include_tasks: docker.yml when: package_installation|bool diff --git a/ansible/roles/vm_set/tasks/remove_topo.yml b/ansible/roles/vm_set/tasks/remove_topo.yml index b336a48cc6d..a02aaed6d25 100644 --- a/ansible/roles/vm_set/tasks/remove_topo.yml +++ b/ansible/roles/vm_set/tasks/remove_topo.yml @@ -20,6 +20,14 @@ mux_simulator_action: stop when: "'dualtor' in topo" + - name: Stop exabgp processes + include_tasks: announce_routes.yml + vars: + exabgp_action: stop + when: + - topo != 'fullmesh' + - not 'ptf' in topo + - name: Stop PTF portchannel service include_tasks: ptf_portchannel.yml vars: @@ -55,6 +63,13 @@ loop_control: loop_var: dut_name + - name: Stop ptf container ptf_{{ vm_set_name }} + docker_container: + name: ptf_{{ vm_set_name }} + state: stopped + become: yes + ignore_errors: yes + - name: Remove ptf docker container ptf_{{ vm_set_name }} docker_container: name: "ptf_{{ vm_set_name }}" diff --git a/ansible/roles/vm_set/tasks/renumber_topo.yml b/ansible/roles/vm_set/tasks/renumber_topo.yml index 75828b45cdd..b2f71a6939e 100644 --- a/ansible/roles/vm_set/tasks/renumber_topo.yml +++ b/ansible/roles/vm_set/tasks/renumber_topo.yml @@ -21,6 +21,14 @@ mux_simulator_action: stop when: "'dualtor' in topo" + - name: Stop exabgp processes + include_tasks: announce_routes.yml + vars: + exabgp_action: stop + when: + - topo != 'fullmesh' + - not 'ptf' in topo + - name: Stop PTF portchannel service include_tasks: ptf_portchannel.yml vars: @@ -44,6 +52,13 @@ echo "-----------------------------" >> /tmp/ptf_network_{{ vm_set_name }}.log when: ptf_docker_info.exists + - name: Stop ptf container ptf_{{ vm_set_name }} + docker_container: + name: ptf_{{ vm_set_name }} + state: stopped + become: yes + ignore_errors: yes + - name: Remove ptf container ptf_{{ vm_set_name }} docker_container: name: ptf_{{ vm_set_name }} diff --git a/ansible/roles/vm_set/templates/cleanup.sh.j2 b/ansible/roles/vm_set/templates/cleanup.sh.j2 index 1073d59abb6..155ba66e07b 100644 --- a/ansible/roles/vm_set/templates/cleanup.sh.j2 +++ b/ansible/roles/vm_set/templates/cleanup.sh.j2 @@ -1,6 +1,6 @@ #!/bin/bash -set -e +set -ex # stop rynning VMs test -z "$(virsh list --state-running --name)" || virsh list --state-running --name | xargs -I % virsh destroy % @@ -17,6 +17,20 @@ rm -f {{ root_path }}/disks/* # remove all ovs bridges test -z "$(ovs-vsctl list-br)" || ovs-vsctl list-br | xargs -I % ovs-vsctl del-br % +# stop exabgp process before stopping ptf container +ptf_container_names=$(docker ps | grep ptf | awk '{print $NF}') +for ptf in $ptf_container_names; +do + process_name="$(docker exec $ptf supervisorctl status | awk '{print $1}')" + for process in $process_name; + do + process_status="$(docker exec $ptf supervisorctl status | grep $process | awk '{print $2}')" + if [[ $process_status == "RUNNING" ]]; then + docker exec $ptf supervisorctl stop $process + fi + done +done + # stop all running docker containers test -z "$(docker ps -q)" || docker stop $(docker ps -q) diff --git a/ansible/roles/vm_set/templates/exabgp.conf.j2 b/ansible/roles/vm_set/templates/exabgp.conf.j2 new file mode 100644 index 00000000000..1c3f8bbde26 --- /dev/null +++ b/ansible/roles/vm_set/templates/exabgp.conf.j2 @@ -0,0 +1,2 @@ +[group:{{ program_group_name }}] +programs={{ program_group_programs }} diff --git a/ansible/roles/vm_set/templates/mux-simulator.service.j2 b/ansible/roles/vm_set/templates/mux-simulator.service.j2 index 139d213fac9..575dcf19dfd 100644 --- a/ansible/roles/vm_set/templates/mux-simulator.service.j2 +++ b/ansible/roles/vm_set/templates/mux-simulator.service.j2 @@ -3,4 +3,4 @@ Description=mux simulator After=network.target [Service] -ExecStart=/usr/bin/env python {{ abs_root_path }}/mux_simulator.py {{ mux_simulator_port }} {{ vm_set_name }} -v +ExecStart=/usr/bin/env {{python_command}} {{ abs_root_path }}/mux_simulator.py {{ mux_simulator_port }} {{ vm_set_name }} -v diff --git a/ansible/setup-management-network.sh b/ansible/setup-management-network.sh index ff58f76b394..bc81e7945e2 100755 --- a/ansible/setup-management-network.sh +++ b/ansible/setup-management-network.sh @@ -4,6 +4,31 @@ if [[ $(id -u) -ne 0 ]]; then exit fi +function show_help_and_exit() +{ + echo "Usage ${SCRIPT} [options]" + echo " options with (*) must be provided" + echo " -h -? : get this help" + echo " -d : Delete existed bridge" + + + exit $1 +} + +DEL_EXISTED_BRIDGE=false + +while getopts "h?d" opt; do + case ${opt} in + h|\? ) + show_help_and_exit 0 + ;; + d) + DEL_EXISTED_BRIDGE=true + ;; + esac +done + + echo "Refreshing apt package lists..." apt-get update echo @@ -41,7 +66,17 @@ if ! command -v ethtool; then fi echo -echo "STEP 5: Checking if bridge br1 already exists..." +echo "STEP 5: Delete existed br1..." +if [ "$DEL_EXISTED_BRIDGE" = true ] && ifconfig br1 >/dev/null 2>&1; then + echo "br1 exists, remove it." + ifconfig br1 down + brctl delbr br1 +else + echo "Not delete existed bridge or br1 not exists, skipping..." +fi +echo + +echo "STEP 6: Checking if bridge br1 already exists..." if ! ifconfig br1; then echo "br1 not found, creating bridge network" brctl addbr br1 @@ -49,7 +84,7 @@ if ! ifconfig br1; then fi echo -echo "STEP 6: Configuring br1 interface..." +echo "STEP 7: Configuring br1 interface..." echo "Assigning 10.250.0.1/24 to br1" ifconfig br1 10.250.0.1/24 ifconfig br1 inet6 add fec0::1/64 diff --git a/ansible/templates/minigraph_meta.j2 b/ansible/templates/minigraph_meta.j2 index 3f459d61da8..2ca82f24762 100644 --- a/ansible/templates/minigraph_meta.j2 +++ b/ansible/templates/minigraph_meta.j2 @@ -28,6 +28,13 @@ True {% endif %} +{% if ('t1' in topo) and (enable_tunnel_qos_remap|default('false')|bool) %} + + DownstreamRedundancyTypes + + Gemini + +{% endif %} {% if dhcp_servers %} DhcpResources diff --git a/ansible/templates/minigraph_png.j2 b/ansible/templates/minigraph_png.j2 index b263cb2e3e2..bb915b5378f 100644 --- a/ansible/templates/minigraph_png.j2 +++ b/ansible/templates/minigraph_png.j2 @@ -77,7 +77,6 @@ {% endfor %} {% endif %} {% for asic_intf in front_panel_asic_ifnames %} -{% if inventory_hostname not in device_conn or port_alias[loop.index - 1] in device_conn[inventory_hostname] %} DeviceInterfaceLink {% if port_alias[loop.index - 1] in port_speed %} @@ -93,7 +92,6 @@ {{ port_alias[loop.index - 1] }} true -{% endif %} {% endfor %} {% endif %} @@ -187,6 +185,10 @@ {% set dev_type = 'CoreRouter' %} {% elif 'T0' in dev %} {% set dev_type = 'ToRRouter' %} +{% elif 'M1' in dev %} +{% set dev_type = 'MgmtLeafRouter' %} +{% elif 'MX' in dev %} +{% set dev_type = 'BmcMgmtToRRouter' %} {% else %} {% set dev_ytpe = 'Unknown' %} {% endif %} diff --git a/ansible/testbed-cli.sh b/ansible/testbed-cli.sh index 544a32eb8f2..b7421bf4f1e 100755 --- a/ansible/testbed-cli.sh +++ b/ansible/testbed-cli.sh @@ -17,6 +17,7 @@ function usage echo " $0 [options] (config-y-cable) " echo " $0 [options] (create-master | destroy-master) " echo " $0 [options] restart-ptf " + echo " $0 [options] collect-show-tech " echo echo "Options:" echo " -t : testbed CSV file name (default: 'testbed.csv')" @@ -67,6 +68,9 @@ function usage echo "To create Kubernetes master on a server: $0 -m k8s_ubuntu create-master 'k8s-server-name' ~/.password" echo "To destroy Kubernetes master on a server: $0 -m k8s_ubuntu destroy-master 'k8s-server-name' ~/.password" echo "To restart ptf of specified testbed: $0 restart-ptf 'testbed-name' ~/.password" + echo "To collect show techsupport result of a testbed: $0 collect-show-tech 'testbed-name' 'inventory' ~/.password" + echo " collect-show-tech supports specify output path for dumped files" + echo " -e output_path=" echo echo "You should define your testbed in testbed CSV file" echo @@ -550,6 +554,23 @@ function cleanup_vmhost --vault-password-file="${passwd}" -l "${server}" $@ } +function collect_show_tech +{ + testbed_name=$1 + inventory=$2 + passfile=$3 + shift + shift + shift + + echo "Collect show techsupport result on testbed '$testbed_name'" + + ansible-playbook -i "$inventory" collect_show_tech.yml --vault-password-file="$passfile" -e testbed_name="$testbed_name" -e testbed_file=$tbfile $@ + + echo Done + +} + vmfile=veos tbfile=testbed.csv vm_type=veos @@ -635,6 +656,8 @@ case "${subcmd}" in ;; restart-ptf) restart_ptf $@ ;; + collect-show-tech) collect_show_tech $@ + ;; *) usage ;; esac diff --git a/ansible/upgrade_sonic.py b/ansible/upgrade_sonic.py new file mode 100755 index 00000000000..6e0562e3319 --- /dev/null +++ b/ansible/upgrade_sonic.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python2 + +import argparse +import logging +import sys + +from devutil.devices.factory import init_localhost, init_sonichosts, init_testbed_sonichosts +from devutil.devices.sonic import upgrade_image + +logging.basicConfig( + stream=sys.stdout, + level=logging.DEBUG, + format="%(asctime)s %(filename)s#%(lineno)d %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + + +DISK_USED_PERCENT = 50 + +RC_INIT_FAILED = 1 +RC_UPGRADE_FAILED = 3 +RC_INVALID_ARGS = 5 + + +def main(args): + + localhost = init_localhost(args.inventory, options={"verbosity": args.verbosity}) + if not localhost: + sys.exit(RC_INIT_FAILED) + + if args.testbed_name: + sonichosts = init_testbed_sonichosts(args.inventory, args.testbed_name, options={"verbosity": args.verbosity}) + else: + sonichosts = init_sonichosts(args.inventory, args.devices, options={"verbosity": args.verbosity}) + if not sonichosts: + sys.exit(RC_INIT_FAILED) + + result = upgrade_image( + sonichosts, + localhost, + args.image_url, + upgrade_type=args.upgrade_type, + disk_used_percent=args.disk_used_percent, + onie_pause_time=args.pause_time + ) + if not result: + sys.exit(RC_UPGRADE_FAILED) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Tool for SONiC image upgrade.") + + parser.add_argument( + "-i", "--inventory", + type=str, + dest="inventory", + required=True, + help="Ansible inventory file") + + group = parser.add_mutually_exclusive_group() + + group.add_argument( + "-d", "--device", + type=str, + dest="devices", + help="Name of the device to be upgraded." + "Mutually exclusive with testbed name argument." + ) + + group.add_argument( + "-t", "--testbed-name", + type=str, + dest="testbed_name", + help="Testbed name. DUTs of the specified testbed will be upgraded." + "This argument is mutually exclusive with device name argument '-d' or '--device'." + ) + + parser.add_argument( + "-u", "--url", + type=str, + dest="image_url", + required=True, + help="SONiC image url." + ) + + parser.add_argument( + "-y", "--type", + type=str, + choices=["sonic", "onie"], + dest="upgrade_type", + required=False, + default="sonic", + help="Upgrade type." + ) + + parser.add_argument( + "-f", "--tbfile", + type=str, + dest="tbfile", + default="testbed.yaml", + help="Testbed definition file." + ) + + parser.add_argument( + "-p", "--pause-time", + type=int, + dest="pause_time", + default=0, + help="Seconds to pause after ONIE upgrade." + ) + + parser.add_argument( + "--disk-used-percent", + type=int, + dest="disk_used_percent", + default=50, + help="Disk used percent." + ) + + parser.add_argument( + "-v", "--verbosity", + type=int, + dest="verbosity", + default=2, + help="Log verbosity." + ) + + args = parser.parse_args() + + if not args.testbed_name and not args.devices: + logger.error("Either testbed name or dut devices must be specified.") + parser.print_help() + sys.exit(RC_INVALID_ARGS) + + main(args) diff --git a/ansible/vars/topo_m0.yml b/ansible/vars/topo_m0.yml new file mode 100644 index 00000000000..1c4c7c70083 --- /dev/null +++ b/ansible/vars/topo_m0.yml @@ -0,0 +1,238 @@ +topology: + host_interfaces: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + - 16 + - 17 + - 18 + - 19 + - 20 + - 21 + - 22 + - 23 + - 24 + - 25 + - 26 + - 27 + - 28 + - 29 + - 30 + - 31 + - 32 + - 33 + - 34 + - 35 + - 36 + - 37 + - 38 + - 39 + - 40 + - 41 + - 42 + - 43 + - 44 + - 45 + VMs: + ARISTA01MX: + vlans: + - 46 + vm_offset: 0 + ARISTA02MX: + vlans: + - 47 + vm_offset: 1 + ARISTA01M1: + vlans: + - 48 + vm_offset: 2 + ARISTA02M1: + vlans: + - 49 + vm_offset: 3 + ARISTA03M1: + vlans: + - 50 + vm_offset: 4 + ARISTA04M1: + vlans: + - 51 + vm_offset: 5 + DUT: + vlan_configs: + default_vlan_config: one_vlan_a + one_vlan_a: + Vlan1000: + id: 1000 + intfs: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45] + prefix: 192.168.0.1/24 + prefix_v6: fc02:1000::1/64 + tag: 1000 + +configuration_properties: + common: + dut_asn: 65100 + dut_type: MgmtToRRouter + failure_rate: 0 + nhipv4: 10.10.246.254 + nhipv6: FC0A::FF + colo_number: 30 + m0_number: 16 + m0_asn_start: 64600 + m0_subnet_number: 1 + m0_subnet_size: 64 + mx_subnet_size: 64 + mx_subnet_number: 1 + mx_asn_start: 68000 + mx: + swrole: mx + m1: + swrole: m1 + +configuration: + ARISTA01MX: + properties: + - common + - mx + bgp: + asn: 64001 + peers: + 65100: + - 10.0.0.64 + - FC00::81 + interfaces: + Loopback0: + ipv4: 100.1.0.33/32 + ipv6: 2064:100::21/128 + Ethernet1: + ipv4: 10.0.0.65/31 + ipv6: fc00::82/126 + bp_interface: + ipv4: 10.10.246.33/24 + ipv6: fc0a::44/64 + + ARISTA02MX: + properties: + - common + - mx + bgp: + asn: 64002 + peers: + 65100: + - 10.0.0.66 + - FC00::85 + interfaces: + Loopback0: + ipv4: 100.1.0.34/32 + ipv6: 2064:100::22/128 + Ethernet1: + ipv4: 10.0.0.67/31 + ipv6: fc00::86/126 + bp_interface: + ipv4: 10.10.246.34/24 + ipv6: fc0a::47/64 + + ARISTA01M1: + properties: + - common + - m1 + bgp: + asn: 65200 + peers: + 65100: + - 10.0.0.56 + - FC00::71 + interfaces: + Loopback0: + ipv4: 100.1.0.29/32 + ipv6: 2064:100::1d/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.57/31 + ipv6: fc00::72/126 + bp_interface: + ipv4: 10.10.246.29/24 + ipv6: fc0a::3a/64 + + ARISTA02M1: + properties: + - common + - m1 + bgp: + asn: 65200 + peers: + 65100: + - 10.0.0.58 + - FC00::75 + interfaces: + Loopback0: + ipv4: 100.1.0.30/32 + ipv6: 2064:100::1e/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.59/31 + ipv6: fc00::76/126 + bp_interface: + ipv4: 10.10.246.30/24 + ipv6: fc0a::3d/64 + + ARISTA03M1: + properties: + - common + - m1 + bgp: + asn: 65200 + peers: + 65100: + - 10.0.0.60 + - FC00::79 + interfaces: + Loopback0: + ipv4: 100.1.0.31/32 + ipv6: 2064:100::1f/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.61/31 + ipv6: fc00::7a/126 + bp_interface: + ipv4: 10.10.246.31/24 + ipv6: fc0a::3e/64 + + ARISTA04M1: + properties: + - common + - m1 + bgp: + asn: 65200 + peers: + 65100: + - 10.0.0.62 + - FC00::7D + interfaces: + Loopback0: + ipv4: 100.1.0.32/32 + ipv6: 2064:100::20/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.63/31 + ipv6: fc00::7e/126 + bp_interface: + ipv4: 10.10.246.32/24 + ipv6: fc0a::41/64 diff --git a/ansible/vars/topo_t0-54-po2vlan.yml b/ansible/vars/topo_t0-54-po2vlan.yml new file mode 100644 index 00000000000..a59a3ff5b35 --- /dev/null +++ b/ansible/vars/topo_t0-54-po2vlan.yml @@ -0,0 +1,321 @@ +topology: + host_interfaces: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 14 + - 15 + - 16 + - 17 + - 18 + - 19 + - 20 + - 21 + - 22 + - 23 + - 24 + - 25 + - 26 + - 27 + - 28 + - 29 + - 30 + - 31 + - 32 + - 33 + - 34 + - 35 + - 36 + - 37 + - 42 + - 43 + - 44 + - 45 + - 46 + - 47 + - 48 + - 49 + - 50 + - 51 + - 52 + - 53 + disabled_host_interfaces: + - 3 + - 5 + - 7 + - 9 + - 15 + - 17 + - 19 + - 20 + - 21 + - 23 + - 25 + - 27 + - 29 + - 31 + - 33 + - 34 + - 35 + - 37 + - 43 + - 45 + - 47 + - 49 + - 51 + - 53 + VMs: + ARISTA01T1: + vlans: + - 10 + vm_offset: 0 + ARISTA02T1: + vlans: + - 11 + vm_offset: 1 + ARISTA03T1: + vlans: + - 12 + vm_offset: 2 + ARISTA04T1: + vlans: + - 13 + vm_offset: 3 + ARISTA05T1: + vlans: + - 38 + vm_offset: 4 + ARISTA06T1: + vlans: + - 39 + vm_offset: 5 + ARISTA07T1: + vlans: + - 40 + vm_offset: 6 + ARISTA08T1: + vlans: + - 41 + vm_offset: 7 + DUT: + portchannel_config: + PortChannel201: + intfs: [0, 1] + vlan_configs: + default_vlan_config: two_vlan_a + two_vlan_a: + Vlan101: + id: 101 + intfs: [] + portchannels: ['PortChannel201'] + prefix: 192.168.0.1/22 + prefix_v6: fc02:100::1/64 + tag: 101 + Vlan102: + id: 102 + intfs: [2, 4, 6, 8, 14, 16, 18, 22, 24, 26, 28, 30, 32, 36, 42, 44, 46, 48, 50, 52] + portchannels: ['PortChannel201'] + prefix: 192.168.4.1/22 + prefix_v6: fc02:200::1/64 + tag: 102 + +configuration_properties: + common: + dut_asn: 65100 + dut_type: ToRRouter + swrole: leaf + podset_number: 200 + tor_number: 16 + tor_subnet_number: 2 + max_tor_subnet_number: 16 + tor_subnet_size: 128 + spine_asn: 65534 + leaf_asn_start: 64600 + tor_asn_start: 65100 + failure_rate: 0 + nhipv4: 10.10.246.254 + nhipv6: FC0A::FF + +configuration: + ARISTA01T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.56 + - FC00::71 + interfaces: + Loopback0: + ipv4: 100.1.0.29/32 + ipv6: 2064:100::1d/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.57/31 + ipv6: fc00::72/126 + bp_interface: + ipv4: 10.10.246.29/24 + ipv6: fc0a::3a/64 + + ARISTA02T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.58 + - FC00::75 + interfaces: + Loopback0: + ipv4: 100.1.0.30/32 + ipv6: 2064:100::1e/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.59/31 + ipv6: fc00::76/126 + bp_interface: + ipv4: 10.10.246.30/24 + ipv6: fc0a::3b/64 + + ARISTA03T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.60 + - FC00::79 + interfaces: + Loopback0: + ipv4: 100.1.0.31/32 + ipv6: 2064:100::1f/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.61/31 + ipv6: fc00::7a/126 + bp_interface: + ipv4: 10.10.246.31/24 + ipv6: fc0a::3c/64 + + ARISTA04T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.62 + - FC00::7D + interfaces: + Loopback0: + ipv4: 100.1.0.32/32 + ipv6: 2064:100::20/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.63/31 + ipv6: fc00::7e/126 + bp_interface: + ipv4: 10.10.246.32/24 + ipv6: fc0a::3d/64 + + ARISTA05T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.64 + - FC00::81 + interfaces: + Loopback0: + ipv4: 100.1.0.33/32 + ipv6: 2064:100::21/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.65/31 + ipv6: fc00::82/126 + bp_interface: + ipv4: 10.10.246.33/24 + ipv6: fc0a::3e/64 + + ARISTA06T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.66 + - FC00::85 + interfaces: + Loopback0: + ipv4: 100.1.0.34/32 + ipv6: 2064:100::22/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.67/31 + ipv6: fc00::86/126 + bp_interface: + ipv4: 10.10.246.34/24 + ipv6: fc0a::3f/64 + + ARISTA07T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.68 + - FC00::89 + interfaces: + Loopback0: + ipv4: 100.1.0.35/32 + ipv6: 2064:100::23/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.69/31 + ipv6: fc00::8a/126 + bp_interface: + ipv4: 10.10.246.35/24 + ipv6: fc0a::40/64 + + ARISTA08T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.70 + - FC00::8D + interfaces: + Loopback0: + ipv4: 100.1.0.36/32 + ipv6: 2064:100::24/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.71/31 + ipv6: fc00::8e/126 + bp_interface: + ipv4: 10.10.246.36/24 + ipv6: fc0a::41/64 diff --git a/ansible/vars/topo_t0-56-d48c8.yml b/ansible/vars/topo_t0-56-d48c8.yml new file mode 100644 index 00000000000..a65c9aff9f9 --- /dev/null +++ b/ansible/vars/topo_t0-56-d48c8.yml @@ -0,0 +1,350 @@ +topology: + host_interfaces: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + - 16 + - 17 + - 18 + - 19 + - 24 + - 25 + - 26 + - 27 + - 28 + - 29 + - 30 + - 31 + - 36 + - 37 + - 38 + - 39 + - 40 + - 41 + - 42 + - 43 + - 44 + - 45 + - 46 + - 47 + - 48 + - 49 + - 50 + - 51 + - 52 + - 53 + - 54 + - 55 + disabled_host_interfaces: + - 2 + - 3 + - 6 + - 7 + - 10 + - 11 + - 14 + - 15 + - 18 + - 19 + - 26 + - 27 + - 30 + - 31 + - 38 + - 39 + - 42 + - 43 + - 46 + - 47 + - 50 + - 51 + - 54 + - 55 + VMs: + ARISTA01T1: + vlans: + - 20 + vm_offset: 0 + ARISTA02T1: + vlans: + - 21 + vm_offset: 1 + ARISTA03T1: + vlans: + - 22 + vm_offset: 2 + ARISTA04T1: + vlans: + - 23 + vm_offset: 3 + ARISTA05T1: + vlans: + - 32 + vm_offset: 4 + ARISTA06T1: + vlans: + - 33 + vm_offset: 5 + ARISTA07T1: + vlans: + - 34 + vm_offset: 6 + ARISTA08T1: + vlans: + - 35 + vm_offset: 7 + DUT: + vlan_configs: + default_vlan_config: one_vlan_a + one_vlan_a: + Vlan1000: + id: 1000 + intfs: [0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 24, 25, 28, 29, 36, 37, 40, 41, 44, 45, 48, 49, 52, 53] + prefix: 192.168.0.1/21 + prefix_v6: fc02:1000::1/64 + tag: 1000 + two_vlan_a: + Vlan100: + id: 100 + intfs: [0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 24, 25] + prefix: 192.168.0.1/22 + prefix_v6: fc02:100::1/64 + tag: 100 + Vlan200: + id: 200 + intfs: [28, 29, 36, 37, 40, 41, 44, 45, 48, 49, 52, 53] + prefix: 192.168.4.1/22 + prefix_v6: fc02:200::1/64 + tag: 200 + four_vlan_a: + Vlan1000: + id: 1000 + intfs: [0, 1, 4, 5, 8, 9] + prefix: 192.168.0.1/23 + prefix_v6: fc02:400::1/64 + tag: 1000 + Vlan2000: + id: 2000 + intfs: [12, 13, 16, 17, 24, 25] + prefix: 192.168.2.1/23 + prefix_v6: fc02:401::1/64 + tag: 2000 + Vlan3000: + id: 3000 + intfs: [28, 29, 36, 37, 40, 41] + prefix: 192.168.4.1/23 + prefix_v6: fc02:402::1/64 + tag: 3000 + Vlan4000: + id: 4000 + intfs: [44, 45, 48, 49, 52, 53] + prefix: 192.168.6.1/23 + prefix_v6: fc02:403::1/64 + tag: 4000 + +configuration_properties: + common: + dut_asn: 65100 + dut_type: ToRRouter + swrole: leaf + podset_number: 200 + tor_number: 16 + tor_subnet_number: 2 + max_tor_subnet_number: 16 + tor_subnet_size: 128 + spine_asn: 65534 + leaf_asn_start: 64600 + tor_asn_start: 65100 + failure_rate: 0 + nhipv4: 10.10.246.254 + nhipv6: FC0A::FF + +configuration: + ARISTA01T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.56 + - FC00::71 + interfaces: + Loopback0: + ipv4: 100.1.0.1/32 + ipv6: 2064:100::1/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.57/31 + ipv6: fc00::72/126 + bp_interface: + ipv4: 10.10.246.1/24 + ipv6: fc0a::1/64 + + ARISTA02T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.58 + - FC00::75 + interfaces: + Loopback0: + ipv4: 100.1.0.2/32 + ipv6: 2064:100::2/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.59/31 + ipv6: fc00::76/126 + bp_interface: + ipv4: 10.10.246.2/24 + ipv6: fc0a::2/64 + + ARISTA03T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.60 + - FC00::79 + interfaces: + Loopback0: + ipv4: 100.1.0.3/32 + ipv6: 2064:100::3/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.61/31 + ipv6: fc00::7a/126 + bp_interface: + ipv4: 10.10.246.3/24 + ipv6: fc0a::3/64 + + ARISTA04T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.62 + - FC00::7D + interfaces: + Loopback0: + ipv4: 100.1.0.4/32 + ipv6: 2064:100::4/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.63/31 + ipv6: fc00::7e/126 + bp_interface: + ipv4: 10.10.246.4/24 + ipv6: fc0a::4/64 + + ARISTA05T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.64 + - FC00::81 + interfaces: + Loopback0: + ipv4: 100.1.0.5/32 + ipv6: 2064:100::5/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.65/31 + ipv6: fc00::82/126 + bp_interface: + ipv4: 10.10.246.5/24 + ipv6: fc0a::5/64 + + ARISTA06T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.66 + - FC00::85 + interfaces: + Loopback0: + ipv4: 100.1.0.6/32 + ipv6: 2064:100::6/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.67/31 + ipv6: fc00::86/126 + bp_interface: + ipv4: 10.10.246.6/24 + ipv6: fc0a::6/64 + + ARISTA07T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.68 + - FC00::89 + interfaces: + Loopback0: + ipv4: 100.1.0.7/32 + ipv6: 2064:100::7/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.69/31 + ipv6: fc00::8a/126 + bp_interface: + ipv4: 10.10.246.7/24 + ipv6: fc0a::7/64 + + ARISTA08T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.70 + - FC00::8D + interfaces: + Loopback0: + ipv4: 100.1.0.8/32 + ipv6: 2064:100::8/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.71/31 + ipv6: fc00::8e/126 + bp_interface: + ipv4: 10.10.246.8/24 + ipv6: fc0a::8/64 diff --git a/ansible/vars/topo_t0-56-po2vlan.yml b/ansible/vars/topo_t0-56-po2vlan.yml index 31bb730d49c..470eee88a6e 100644 --- a/ansible/vars/topo_t0-56-po2vlan.yml +++ b/ansible/vars/topo_t0-56-po2vlan.yml @@ -110,7 +110,7 @@ topology: vm_offset: 7 DUT: portchannel_config: - PortChannel101: + PortChannel201: intfs: [0, 4] vlan_configs: default_vlan_config: two_vlan_a @@ -118,14 +118,14 @@ topology: Vlan101: id: 101 intfs: [] - portchannels: ['PortChannel101'] + portchannels: ['PortChannel201'] prefix: 192.168.0.1/22 prefix_v6: fc02:100::1/64 tag: 101 Vlan102: id: 102 intfs: [8, 10, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 44, 46, 48, 50, 52, 54] - portchannels: ['PortChannel101'] + portchannels: ['PortChannel201'] prefix: 192.168.4.1/22 prefix_v6: fc02:200::1/64 tag: 102 diff --git a/ansible/vars/topo_t0-8-lag.yml b/ansible/vars/topo_t0-8-lag.yml new file mode 100644 index 00000000000..68565bdc5d0 --- /dev/null +++ b/ansible/vars/topo_t0-8-lag.yml @@ -0,0 +1,319 @@ +topology: + host_interfaces: + - 0 + - 1 + - 2 + - 3 + - 12 + - 13 + - 14 + - 15 + - 16 + - 17 + - 18 + - 19 + - 28 + - 29 + - 30 + - 31 + disabled_host_interfaces: + - 0 + VMs: + ARISTA01T1: + vlans: + - 6 + - 7 + vm_offset: 0 + ARISTA02T1: + vlans: + - 8 + - 9 + vm_offset: 1 + ARISTA03T1: + vlans: + - 22 + - 23 + vm_offset: 2 + ARISTA04T1: + vlans: + - 24 + - 25 + vm_offset: 3 + ARISTA05T1: + vlans: + - 4 + - 5 + vm_offset: 4 + ARISTA06T1: + vlans: + - 10 + - 11 + vm_offset: 5 + ARISTA07T1: + vlans: + - 20 + - 21 + vm_offset: 6 + ARISTA08T1: + vlans: + - 26 + - 27 + vm_offset: 7 + DUT: + vlan_configs: + default_vlan_config: one_vlan_a + one_vlan_a: + Vlan1000: + id: 1000 + intfs: [1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 28, 29, 30, 31] + prefix: 192.168.0.1/21 + prefix_v6: fc02:1000::1/64 + tag: 1000 + two_vlan_a: + Vlan100: + id: 100 + intfs: [2, 29, 31] + prefix: 192.168.0.1/22 + prefix_v6: fc02:100::1/64 + tag: 100 + Vlan200: + id: 200 + intfs: [1, 3, 12, 13, 14, 15, 16, 17, 18, 19, 28, 30] + prefix: 192.168.4.1/22 + prefix_v6: fc02:200::1/64 + tag: 200 + four_vlan_a: + Vlan1000: + id: 1000 + intfs: [0, 1] + prefix: 192.168.0.1/23 + prefix_v6: fc02:400::1/64 + tag: 1000 + Vlan2000: + id: 2000 + intfs: [2, 3] + prefix: 192.168.2.1/23 + prefix_v6: fc02:401::1/64 + tag: 2000 + Vlan3000: + id: 3000 + intfs: [12, 13] + prefix: 192.168.4.1/23 + prefix_v6: fc02:402::1/64 + tag: 3000 + Vlan4000: + id: 4000 + intfs: [14, 15] + prefix: 192.168.6.1/23 + prefix_v6: fc02:403::1/64 + tag: 4000 + +configuration_properties: + common: + dut_asn: 65100 + dut_type: ToRRouter + swrole: leaf + nhipv4: 10.10.246.254 + nhipv6: FC0A::FF + podset_number: 200 + tor_number: 16 + tor_subnet_number: 2 + max_tor_subnet_number: 16 + tor_subnet_size: 128 + spine_asn: 65534 + leaf_asn_start: 64600 + tor_asn_start: 65500 + failure_rate: 0 + +configuration: + ARISTA01T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.56 + - FC00::71 + interfaces: + Loopback0: + ipv4: 100.1.0.29/32 + ipv6: 2064:100::1d/128 + Ethernet1: + lacp: 1 + Ethernet2: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.57/31 + ipv6: fc00::72/126 + bp_interface: + ipv4: 10.10.246.29/24 + ipv6: fc0a::1d/64 + + ARISTA02T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.58 + - FC00::75 + interfaces: + Loopback0: + ipv4: 100.1.0.30/32 + ipv6: 2064:100::1e/128 + Ethernet1: + lacp: 1 + Ethernet2: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.59/31 + ipv6: fc00::76/126 + bp_interface: + ipv4: 10.10.246.30/24 + ipv6: fc0a::1e/64 + + ARISTA03T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.60 + - FC00::79 + interfaces: + Loopback0: + ipv4: 100.1.0.31/32 + ipv6: 2064:100::1f/128 + Ethernet1: + lacp: 1 + Ethernet2: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.61/31 + ipv6: fc00::7a/126 + bp_interface: + ipv4: 10.10.246.31/24 + ipv6: fc0a::1f/64 + + ARISTA04T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.62 + - FC00::7D + interfaces: + Loopback0: + ipv4: 100.1.0.32/32 + ipv6: 2064:100::20/128 + Ethernet1: + lacp: 1 + Ethernet2: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.63/31 + ipv6: fc00::7e/126 + bp_interface: + ipv4: 10.10.246.32/24 + ipv6: fc0a::20/64 + + ARISTA05T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.64 + - FC00::81 + interfaces: + Loopback0: + ipv4: 100.1.0.33/32 + ipv6: 2064:100::21/128 + Ethernet1: + lacp: 1 + Ethernet2: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.65/31 + ipv6: fc00::82/126 + bp_interface: + ipv4: 10.10.246.33/24 + ipv6: fc0a::21/64 + + ARISTA06T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.66 + - FC00::85 + interfaces: + Loopback0: + ipv4: 100.1.0.34/32 + ipv6: 2064:100::22/128 + Ethernet1: + lacp: 1 + Ethernet2: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.67/31 + ipv6: fc00::86/126 + bp_interface: + ipv4: 10.10.246.34/24 + ipv6: fc0a::22/64 + + ARISTA07T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.68 + - FC00::89 + interfaces: + Loopback0: + ipv4: 100.1.0.35/32 + ipv6: 2064:100::23/128 + Ethernet1: + lacp: 1 + Ethernet2: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.69/31 + ipv6: fc00::8a/126 + bp_interface: + ipv4: 10.10.246.35/24 + ipv6: fc0a::23/64 + + ARISTA08T1: + properties: + - common + bgp: + asn: 64600 + peers: + 65100: + - 10.0.0.70 + - FC00::8D + interfaces: + Loopback0: + ipv4: 100.1.0.36/32 + ipv6: 2064:100::24/128 + Ethernet1: + lacp: 1 + Ethernet2: + lacp: 1 + Port-Channel1: + ipv4: 10.0.0.71/31 + ipv6: fc00::8e/126 + bp_interface: + ipv4: 10.10.246.36/24 + ipv6: fc0a::24/64 diff --git a/ansible/vars/topo_tgen-t1-64-3.yml b/ansible/vars/topo_tgen-t1-64-3.yml new file mode 100644 index 00000000000..3a2f43b8258 --- /dev/null +++ b/ansible/vars/topo_tgen-t1-64-3.yml @@ -0,0 +1,161 @@ +topology: + disabled_host_interfaces: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + - 16 + - 17 + - 18 + - 19 + - 20 + - 21 + - 22 + - 23 + - 24 + - 25 + - 26 + - 27 + - 28 + - 29 + - 30 + - 31 + - 32 + - 33 + - 34 + - 35 + - 36 + - 37 + - 38 + - 39 + - 40 + - 41 + - 42 + - 43 + - 44 + - 48 + - 49 + - 50 + - 51 + - 52 + - 53 + - 54 + - 55 + - 56 + - 57 + - 58 + - 59 + - 60 + - 61 + - 62 + - 63 + + VMs: + ARISTA01T2: + vlans: + - 45 + vm_offset: 0 + ARISTA01T0: + vlans: + - 46 + vm_offset: 1 + ARISTA02T0: + vlans: + - 47 + vm_offset: 2 + +configuration_properties: + common: + dut_asn: 64607 + dut_type: LeafRouter + nhipv4: 10.10.246.254 + nhipv6: FC0A::FF + podset_number: 200 + tor_number: 16 + tor_subnet_number: 2 + max_tor_subnet_number: 16 + tor_subnet_size: 128 + spine: + swrole: spine + tor: + swrole: tor + +configuration: + ARISTA01T2: + properties: + - common + - spine + bgp: + asn: 65200 + peers: + 64607: + - 1.0.0.2 + - FC00::2 + interfaces: + Loopback0: + ipv4: 100.1.0.3/32 + ipv6: 2064:100::3/128 + Ethernet1: + lacp: 1 + Port-Channel1: + ipv4: 1.0.0.3/31 + ipv6: fc00::3/126 + bp_interface: + ipv4: 10.10.246.3/24 + ipv6: fc0a::3/64 + + ARISTA01T0: + properties: + - common + - tor + tornum: 1 + bgp: + asn: 64001 + peers: + 64607: + - 1.0.0.32 + - FC00::32 + interfaces: + Loopback0: + ipv4: 100.1.0.33/32 + ipv6: 2064:100::33/128 + Ethernet1: + ipv4: 1.0.0.33/31 + ipv6: fc00::33/126 + bp_interface: + ipv4: 10.10.246.33/24 + ipv6: fc0a::33/64 + + ARISTA02T0: + properties: + - common + - tor + tornum: 1 + bgp: + asn: 64001 + peers: + 64607: + - 1.0.0.35 + - FC00::35 + interfaces: + Loopback0: + ipv4: 100.1.0.36/32 + ipv6: 2064:100::36/128 + Ethernet1: + ipv4: 1.0.0.36/31 + ipv6: fc00::36/126 + bp_interface: + ipv4: 10.10.246.36/24 + ipv6: fc0a::36/64 diff --git a/ansible/veos b/ansible/veos index 7fe24b8f453..a267223ac9d 100644 --- a/ansible/veos +++ b/ansible/veos @@ -19,6 +19,7 @@ all: - t0 - t0-16 - t0-56 + - t0-56-d48c8 - t0-52 - ptf32 - ptf64 @@ -35,6 +36,7 @@ all: - tgen-t1-3-lag - mgmttor - t2 + - m0 children: server_1: server_2: diff --git a/ansible/veos_vtb b/ansible/veos_vtb index f4770e74820..4e14ebfca8c 100644 --- a/ansible/veos_vtb +++ b/ansible/veos_vtb @@ -18,6 +18,7 @@ all: - t0 - t0-16 - t0-56 + - t0-56-d48c8 - t0-52 - ptf32 - ptf64 diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 42fece148e5..c4eca23eb07 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -4,96 +4,104 @@ # https://aka.ms/yaml pr: -- master + branches: + include: + - 202012 + paths: + exclude: + - .github +trigger: none name: $(TeamProject)_$(Build.DefinitionName)_$(SourceBranchName)_$(Date:yyyyMMdd)$(Rev:.r) +resources: + repositories: + - repository: sonic-mgmt + type: github + name: sonic-net/sonic-mgmt + ref: master + endpoint: sonic-net + stages: -- stage: Test +- stage: Pre_test + variables: + - group: GIT_SECRETS + jobs: + - job: validate_test_cases + displayName: "Validate Test Cases" + timeoutInMinutes: 20 + continueOnError: false + pool: ubuntu-20.04 + steps: + - template: .azure-pipelines/pytest-collect-only.yml +- stage: Test + dependsOn: Pre_test + condition: and(succeeded(), in(dependencies.Pre_test.result, 'Succeeded')) variables: + - group: SONiC-Elastictest - name: inventory value: veos_vtb - name: testbed_file value: vtestbed.csv + - group: GIT_SECRETS jobs: - - job: t0_part1 - pool: sonictest - displayName: "kvmtest-t0-part1" + - job: t0_elastictest + pool: ubuntu-20.04 + displayName: "kvmtest-t0 by Elastictest" timeoutInMinutes: 240 - + continueOnError: false steps: - - template: .azure-pipelines/run-test-template.yml + - template: .azure-pipelines/run-test-elastictest-template.yml@sonic-mgmt parameters: - dut: vlab-01 - tbname: vms-kvm-t0 - ptf_name: ptf_vms6-1 - tbtype: t0 - vmtype: ceos - section: part-1 + TOPOLOGY: t0 + MIN_WORKER: $(T0_INSTANCE_NUM) + MAX_WORKER: $(T0_INSTANCE_NUM) + KVM_IMAGE_BRANCH: "202012" + MGMT_BRANCH: "202012" - - job: t0_part2 - pool: sonictest - displayName: "kvmtest-t0-part2" + - job: t0_2vlans_elastictest + pool: ubuntu-20.04 + displayName: "kvmtest-t0-2vlans by Elastictest" timeoutInMinutes: 240 - + continueOnError: false steps: - - template: .azure-pipelines/run-test-template.yml + - template: .azure-pipelines/run-test-elastictest-template.yml@sonic-mgmt parameters: - dut: vlab-01 - tbname: vms-kvm-t0 - ptf_name: ptf_vms6-1 - tbtype: t0 - vmtype: ceos - section: part-2 - - - job: - pool: sonictest - displayName: "kvmtest-t0" - timeoutInMinutes: 400 - dependsOn: - - t0_part1 - - t0_part2 - condition: always() - variables: - resultOfPart1: $[ dependencies.t0_part1.result ] - resultOfPart2: $[ dependencies.t0_part2.result ] - - steps: - - script: | - if [ $(resultOfPart1) == "Succeeded" ] && [ $(resultOfPart2) == "Succeeded" ]; then - echo "Both job kvmtest-t0-part1 and kvmtest-t0-part2 are passed." - exit 0 - else - echo "Either job kvmtest-t0-part1 or job kvmtest-t0-part2 failed! Please check the detailed information." - exit 1 - fi + TOPOLOGY: t0 + TEST_SET: t0-2vlans + MIN_WORKER: $(T0_2VLANS_INSTANCE_NUM) + MAX_WORKER: $(T0_2VLANS_INSTANCE_NUM) + DEPLOY_MG_EXTRA_PARAMS: "-e vlan_config=two_vlan_a" + KVM_IMAGE_BRANCH: "202012" + MGMT_BRANCH: "202012" - - job: - pool: sonictest-t1-lag - displayName: "kvmtest-t1-lag" + - job: t1_lag_elastictest + pool: ubuntu-20.04 + displayName: "kvmtest-t1-lag by Elastictest" timeoutInMinutes: 240 - + continueOnError: false steps: - - template: .azure-pipelines/run-test-template.yml + - template: .azure-pipelines/run-test-elastictest-template.yml@sonic-mgmt parameters: - dut: vlab-03 - tbname: vms-kvm-t1-lag - ptf_name: ptf_vms6-2 - tbtype: t1-lag - vmtype: ceos + TOPOLOGY: t1-lag + MIN_WORKER: $(T1_LAG_INSTANCE_NUM) + MAX_WORKER: $(T1_LAG_INSTANCE_NUM) + KVM_IMAGE_BRANCH: "202012" + MGMT_BRANCH: "202012" - - job: - pool: sonictest-sonic-t0 - displayName: "kvmtest-t0-sonic" + - job: dualtor_elastictest + pool: ubuntu-20.04 + displayName: "kvmtest-dualtor-t0 by Elastictest" timeoutInMinutes: 240 - + continueOnError: false steps: - - template: .azure-pipelines/run-test-template.yml + - template: .azure-pipelines/run-test-elastictest-template.yml@sonic-mgmt parameters: - dut: vlab-01 - tbname: vms-kvm-t0 - ptf_name: ptf_vms6-1 - tbtype: t0-sonic - vmtype: vsonic + TOPOLOGY: dualtor + MIN_WORKER: $(T0_DUALTOR_INSTANCE_NUM) + MAX_WORKER: $(T0_DUALTOR_INSTANCE_NUM) + COMMON_EXTRA_PARAMS: "--disable_loganalyzer " + KVM_IMAGE_BRANCH: "202012" + MGMT_BRANCH: "202012" diff --git a/docs/api_wiki/README.md b/docs/api_wiki/README.md index 689c60347a0..d365b8e5db2 100644 --- a/docs/api_wiki/README.md +++ b/docs/api_wiki/README.md @@ -119,6 +119,8 @@ def test_fun(duthosts, rand_one_dut_hostname, ptfhost): - [check_bgp_session_state](sonichost_methods/check_bgp_session_state.md) - Check whether the state of the bgp session matches a specified state for a list of bgp neighbors. +- [check_bgp_session_state_all_asics](sonichost_methods/check_bgp_session_state_all_asics.md) - Check whether the state of the bgp session matches a specified state for a list of bgp neighbors for each asic. + - [check_default_route](sonichost_methods/check_default_route.md) - Provides the status of the default route - [critical_process_status](sonichost_methods/critical_process_status.md) - Gets status of service and provides list of exited and running member processes. @@ -139,6 +141,8 @@ def test_fun(duthosts, rand_one_dut_hostname, ptfhost): - [get_bgp_neighbors](sonichost_methods/get_bgp_neighbors.md) - This command provides a summary of the bgp neighbors peered with the DUT. Returns a dictionary that maps the BGP address for each neighbor to another dictionary listing information on that neighbor device. +- [get_bgp_neighbors_per_asic](sonichost_methods/get_bgp_neighbors_per_asic.md) - This command provides a summary of the bgp neighbors peered with the DUT for each asic. Returns a dictionary of dictionary that maps the BGP address for each neighbor to another dictionary listing information on that neighbor device for each asic namespace. + - [get_bgp_neighbor_info](sonichost_methods/get_bgp_neighbor_info.md) - Provides BGP neighbor info - [get_container_autorestart_states](sonichost_methods/get_container_autorestart_states.md) - Get container names and their autorestart states. Containers that do not have the autorestart feature implemented are skipped by this test. @@ -159,6 +163,8 @@ def test_fun(duthosts, rand_one_dut_hostname, ptfhost): - [get_image_info](sonichost_methods/get_image_info.md) - Get list of images installed on the DUT. +- [get_interfaces_status](sonichost_methods/get_interfaces_status.md) - Get interfaces status on the DUT and parse the result into a dict. + - [get_ip_route_info](sonichost_methods/get_ip_route_info.md) - Returns route information for a destionation. The destination could an ip address or ip prefix. - [get_monit_services_status](sonichost_methods/get_monit_services_status.md) - Get metadata on services monitored by Monit. @@ -336,6 +342,8 @@ the remote host. - [check_bgp_statistic](sonic_asic_methods/check_bgp_statistic.md) - Checks that the BGP statistic matches some expected value. +- [check_bgp_session_state](sonichost_methods/check_bgp_session_state.md) - Check whether the state of the bgp session matches a specified state for a list of bgp neighbors for the current ASIC. + - [command](sonic_asic_methods/command.md) - Runs commands specified for the ASIC calling the method. - [config_facts](sonic_asic_methods/config_facts.md) - Current config facts for ASIC. @@ -440,4 +448,4 @@ the remote host. - [ptfhost](preconfigured/ptfhost.md) - The PTF container host instance. Used to run ptf methods and anisble modules from the PTF. -- [rand_one_dut_hostname](preconfigured/rand_one_dut_hostname.md) - A random hostname belonging to one of the DUT instances defined by the deployed testbed. \ No newline at end of file +- [rand_one_dut_hostname](preconfigured/rand_one_dut_hostname.md) - A random hostname belonging to one of the DUT instances defined by the deployed testbed. diff --git a/docs/api_wiki/sonic_asic_methods/check_bgp_session_state.md b/docs/api_wiki/sonic_asic_methods/check_bgp_session_state.md new file mode 100644 index 00000000000..f459d1ee39b --- /dev/null +++ b/docs/api_wiki/sonic_asic_methods/check_bgp_session_state.md @@ -0,0 +1,32 @@ +# check_bgp_session_state + +- [Overview](#overview) +- [Examples](#examples) +- [Arguments](#arguments) +- [Expected Output](#expected-output) + +## Overview +Checks that the BGP statistic matches some expected value. + +## Examples +``` +def test_fun(duthosts, rand_one_dut_hostname, enum_frontend_asic_index): + duthost = duthosts[rand_one_dut_hostname] + + sonic_asic = duthost.asic_instance(asic_index=enum_frontend_asic_index) + + asic_bgp_info = asic.check_bgp_session_state(neigh_ips, state) +``` + +## Arguments +- `neigh_ips` - List of neighbor BGP IPs that are being checked against the `state` param + - Required: `True` + - Type: `List` + - Element-Type: `unicode` +- `state` - What state the BGP sessions are expected to be in + - Required: `False` + - Type: `String` + - Default: `established` + +## Expected Output +`True` if all neighbors in `neigh_ips` match the `state` param, `False` otherwise diff --git a/docs/api_wiki/sonichost_methods/check_bgp_session_state_all_asics.md b/docs/api_wiki/sonichost_methods/check_bgp_session_state_all_asics.md new file mode 100644 index 00000000000..6932dc0189b --- /dev/null +++ b/docs/api_wiki/sonichost_methods/check_bgp_session_state_all_asics.md @@ -0,0 +1,30 @@ +# check_bgp_session_state_all_asics + +- [Overview](#overview) +- [Examples](#examples) +- [Arguments](#arguments) +- [Expected Output](#expected-output) + +## Overview +Check whether the state of the bgp session matches a specified state for a list of bgp neighbors. + +## Examples +``` +def test_fun(duthosts, rand_one_dut_hostname): + duthost = duthosts[rand_one_dut_hostname] + + bgp_info = duthost.get_bgp_neighbors_per_asic() + bgp_match = duthost.check_bgp_session_state_all_asics(bgp_info, state) +``` + +## Arguments +- `bgp_neighbors` - Dictionary with List of neighbor BGP IPs that are being checked against the `state` param for each namespace. + - Required: `True` + - Type: `Dict` +- `state` - What state the BGP sessions are expected to be in + - Required: `False` + - Type: `String` + - Default: `established` + +## Expected Output +`True` if all neighbors in `bgp_neighbors` match the `state` param, `False` otherwise diff --git a/docs/api_wiki/sonichost_methods/get_bgp_neighbors_per_asic.md b/docs/api_wiki/sonichost_methods/get_bgp_neighbors_per_asic.md new file mode 100644 index 00000000000..55fefc13059 --- /dev/null +++ b/docs/api_wiki/sonichost_methods/get_bgp_neighbors_per_asic.md @@ -0,0 +1,62 @@ +# get_bgp_neighbors + +- [Overview](#overview) +- [Examples](#examples) +- [Arguments](#arguments) +- [Expected Output](#expected-output) + + +## Overview +This command provides a summary of the bgp neighbors peered with the DUT. Returns a dictionary of dictionary that maps the BGP address for each neighbor to another dictionary listing information on that neighbor device for each asic namespace. + +## Examples +``` +def test_fun(duthosts, rand_one_dut_hostname): + duthost = duthosts[rand_one_dut_hostname] + + bgp_info = duthost.get_bgp_neighbors() +``` + +## Arguments + +This command takes no arguments + +## Expected Output +This command returns a dictionary of dictionary mapping the neigbor's BGP address to another dictionary describing the neighbor device for each asic namespace. The dictionary hierarchy is described below, with each indentation describing a sub-dictionary: + +- `{namespace}` - Dictionary with BGP neighbor information matching the provided namespace. + - `{bgp-ip}` - dictionary with information on the neihboring device matching the provided BGP ip + - `remote AS` - ASN defined for the peer group + - `local AS` - Local ASN for neighbor + - `description` - The name of the BGP neighbor + - `admin` - Admin status for interface used to communicate with neighbor + - `accepted prefixes` - Number of accepted prefixes for that BGP neigbhor + - `connections established` - number of connections established by BGP neighbor + - `connections dropped` - number of connections dropped by neighbor + - `peer group` - Name of peer group + - `state` - Current state of BGP neighbor + - `remote routerid` - remote router id on neighbor + - `mrai` - Minimum Route Advertisement Interval + - `ip_version` - version of IP used for communication with neighbor + - `message statistics` - Dictionary describing statistics on communication with neighbor + - `Capability` + - `rcvd` - Capability messages received + - `sent` - Capability messages sent + - `Notifications` + - `rcvd` - Notification messages received + - `sent` - Notification messages sent + - `Route Refresh` + - `rcvd` - Route Refresh messages received + - `sent` - Route Refresh messages sent + - `Updates` + - `rcvd` - Update messages received + - `sent` - Update messages sent + - `Keepalives` + - `rcvd` - Keepalive messages received + - `sent` - Keepalive messages sent + - `Opens` + - `rcvd` - Open messages received + - `sent` - Open messages sent + - `Total` + - `rcvd` - total messages received + - `sent` - total messages sent diff --git a/docs/api_wiki/sonichost_methods/get_interfaces_status.md b/docs/api_wiki/sonichost_methods/get_interfaces_status.md new file mode 100644 index 00000000000..4a88d05ede9 --- /dev/null +++ b/docs/api_wiki/sonichost_methods/get_interfaces_status.md @@ -0,0 +1,56 @@ +# get_interfaces_status + +- [Overview](#overview) +- [Examples](#examples) +- [Arguments](#arguments) +- [Expected Output](#expected-output) + +## Overview +Get interfaces status on the DUT and parse the result into a dict. + +## Examples +```python +def test_fun(duthosts, rand_one_dut_hostname): + duthost = duthosts[rand_one_dut_hostname] + + images = duthost.get_interfaces_status() +``` + +## Arguments +This function takes no arguments. + +## Expected Output +Returns dicitonary with the DUT interfaces status. + +Example output: + +```json +{ + "Ethernet0": { + "oper": "down", + "lanes": "25,26,27,28", + "fec": "N/A", + "asym pfc": "off", + "admin": "down", + "type": "N/A", + "vlan": "routed", + "mtu": "9100", + "alias": "fortyGigE0/0", + "interface": "Ethernet0", + "speed": "40G" + }, + "PortChannel101": { + "oper": "up", + "lanes": "N/A", + "fec": "N/A", + "asym pfc": "N/A", + "admin": "up", + "type": "N/A", + "vlan": "routed", + "mtu": "9100", + "alias": "N/A", + "interface": "PortChannel101", + "speed": "40G" + } +} +``` diff --git a/docs/testbed/READ.testbed.AnnounceRoutes.Internal.md b/docs/testbed/READ.testbed.AnnounceRoutes.Internal.md new file mode 100644 index 00000000000..a22504d4385 --- /dev/null +++ b/docs/testbed/READ.testbed.AnnounceRoutes.Internal.md @@ -0,0 +1,54 @@ +# Announce routes + +Announce routes to the exabgp processes running in the PTF container. + +## Overview + +This method is automatically run in add-topo period or manually run from localhost. (Related api doc: [announce_routes.md](../api_wiki/ansible_methods/announce_routes.md)). +In this method, we generate routes for different topos by configuration defined in `ansible/vars/topo_*.yml` files. + +Get the configuration of all neighbor VMs, and use different rules to generate routes according to the router type of the neighbor. Then send post requests to the exabgp processes running in the PTF container to announce routes to DUT. + +|topo type|upstream router type|downstream router type| +|:----:|:----:|:----:| +|t0|leaf|N/A| +|t1|spine|tor| +|t2|core|leaf| +|t0-mclag|leaf|N/A| +|m0|m1|mx| + +## M0 + +### Design + +For M0, we have 2 sets of routes that we are going to advertise: +- 1st set routes are advertised by the upstream VMs (M1 devices). +- 2nd set routes are advertised by the downstream VMs (MX devices). + +The picture below shows how the routes is announces to DUT. The green arrows indicate routes that announced by upstream M1. The blue arrows indicate routes that announced by downstream MX. The yellow line indicates subnets that directly connected to DUT, which need to be skipped when generating routes. +![](./img/announce_routes_m0.png) + +### Details + +Some definitions: +|definition|description| +|:----|:----| +|colo|cluster of M0 devices| +|colo_number|number of COLOs| +|m0_number|number of subnet in a M0| +|m0_subnet_number|number of members in a M0 subnet| +|mx_number|number of MXs connected to a M0| +|mx_subnet_number|number of members in a MX subnet| + +The total number of routes are controlled by the colo_number, m0_number, mx_subnet_number, m0_subnet_number and number of MX devices from the topology file. +We would have the following distribution: +- Routes announced by per M1 device, total number: 1 + 1 + (colo_number * m0_number - 1) * (m0_subnet_number + mx_number * mx_subnet_number) + - 1 default route, prefix: 0.0.0.0/0. + - 1 loopback route. + - Subnet routes of M0 devices connected to M1 devices other than directly connected to DUT, + count: (colo_number * m0_number - 1) * m0_subnet_number. + - Subnet routes of MX devices connected to M0 devices connected M1 devices, + count: (colo_number * m0_number - 1) * mx_number * mx_subnet_number. +- Routes announced by per MX routes, total number: 1 + mx_subnet_number + - 1 loopback route. + - Subunet routes of MX, count: mx_subnet_number. diff --git a/docs/testbed/README.testbed.Config.md b/docs/testbed/README.testbed.Config.md index 316c470f851..0fde005305e 100644 --- a/docs/testbed/README.testbed.Config.md +++ b/docs/testbed/README.testbed.Config.md @@ -12,6 +12,12 @@ - [```ansible/files/sonic_lab_links.csv```](/ansible/files/sonic_lab_links.csv): Helper file helps you to create lab_connection_graph.xml, list all physical links between DUT, Fanoutleaf and Fanout root switches, servers and vlan configurations for each link +- [```ansible/files/sonic_lab_pdu_links.csv```](/ansible/files/sonic_lab_pdu_links.csv): Helper file helps you to create lab_connection_graph.xml, list all pdu links between devices and pdu devices. For details about pdu configuraions, check doc [pdu wiring](./README.testbed.PDUWiring.md) + +- [```ansible/files/sonic_lab_bmc_links.csv```](/ansible/files/sonic_lab_bmc_links.csv): Helper file helps you to create lab_connection_graph.xml, list all bmc links between devices and management devices. + +- [```ansible/files/sonic_lab_console_links.csv```](/ansible/files/sonic_lab_console_links.csv): Helper file helps you to create lab_connection_graph.xml, list all console links between devices and management devices. + - [```ansible/files/lab_connection_graph.xml```](/ansible/files/lab_connection_graph.xml): This is the lab graph file for library/conn_graph_facts.py to parse and get all lab fanout switch connections information. If you have only one fanout switch, you may go head manually modify the sample lab_connection_graph.xml file to set bot your fanout leaf and fanout root switch management IP point to the same fanout switch management IP and make sure all DUT and Fanout name and IP are matching your testbed. - [```ansible/files/creategraph.py```](/ansible/files/creategraph.py): Helper file helps you generate a lab_connection_graph.xml based on the device file and link file specified above. diff --git a/docs/testbed/README.testbed.Fanout.md b/docs/testbed/README.testbed.Fanout.md new file mode 100644 index 00000000000..dce6ab4af4a --- /dev/null +++ b/docs/testbed/README.testbed.Fanout.md @@ -0,0 +1,11 @@ +# Fanout Credentials + +To deploy fanout, use the ansible playbook [fanout playbook](ansible/fanout.yml), during the execution, credentials will be read as group variables and used. + +To define its passwords, there are several ways. When tacacs is enabled and all fanout devices use the same credential, set fanout_tacacs_user/password (it will override everything). When tacacs is enabled but different type of devices use different credentials, do not set fanout_tacacs_user/password but fanout_tacacs_{OS}_user/password. When instead tacacs is not enabled and fanout uses local authentication, do not set tacacs password for that type of device and set its local user name and passwords. + +Local credential names have been set arbitrarily and is listed below. Sonic devices use fanout_sonic_user/password, eos devices and mlnx devices use fanout_mlnx_user/password. Eos devices are special and will be discussed below. + +Eos devices are different in that we want it to have 2 sets of credentials, one for accessing eos network configurations and one for accessing eos shell. Ansible playbook will have both network and shell accounts setup before running setup, but the shell credential is transient, and the playbook will make it persistent. Shell credential is read from fanout_admin_user/password to login to eos shell, and a template is put in place to make fanout_admin_user/password persistent shell account on eos. Network related credentials are not used in fanout playbook so far, but if it is, it should be tacacs credentials like the others or local credential fanout_network_user/password. + +Pytest is another place where group variables are read to access fanout. Credential setup are similar for sonic and mlnx devices but different for eos. When test is run, we expect eos to have 2 sets of credentials already. Pytest will read fanout_network_user/password as network credential and fanout_shell_user/password as shell credential. When tacacs credential is set, it overrides local network credential but not shell credential. diff --git a/docs/testbed/README.testbed.PDUWiring.md b/docs/testbed/README.testbed.PDUWiring.md new file mode 100644 index 00000000000..27c0b7fd22d --- /dev/null +++ b/docs/testbed/README.testbed.PDUWiring.md @@ -0,0 +1,28 @@ +# Define your PDU wiring. + +1. Navigate to ansible/files. +2. Define PDU devices in sonic__devices.csv, which is sonic_lab_devices.csv in our example. For PDU devices, type should be Pdu and currently Protocol only supports snmp. HwSku was discovered through snmp and the field HwSku is never actually used in previous versions before this README, and it will be used starting from this PR. There could be mistakes with HwSku and things might work fine. HwSku currently supports value Apc, ApcRPDU, Sentry, Sentry4, Vertiv and Emerson. +3. Define PDU to DUT link in sonic__pdu_links.csv file, which is sonic_lab_pdu_links.csv in our example. It defines which PDU outlet is connected to which DUT port. +4. Generate new connection graph by running the command below. +``` +python2 ./creategraph.py -i -o +``` +Devutils tools will read from the connection graph and fall back to inventory when missing. + +# Use devutils to control devics. + +To control a device (pdu_on, pdu_off, pdu_reboot, pdu_status), use devutils tool. +``` +./devutils -g -i -l -a +``` + +# What devutils does. + +After we inform devutils of the hostname and expected action, devutils attempt to retrieve its PDU information from connection graph. We can also directly see it under PowerControlLinksInfo section in the connection graph xml, which shows the PDU names and ports that a devices is connected to. PDU devices themselves are listed under DevicePowerControlInfo, with its HwSku, ManagementIP, Protocol (snmp only) and Type (Pdu). +If getting from connection graph failed, script attempts to retrieve from the inventory file, under the section pdu_host of a device, but they are not guaranteed to exist. +After collecting PDU info, devutils proceed with pdu_manager_factory from tests/common/plugins/pdu_controller/pdu_manager.py. Exact PDU management information is acquired from the PDU name and added to the controller. When carrying out an operation, the matching controller, which is always snmp controller for now, uses the correct OID for every particular HwSku, which is provided to pdu_manager_factory. + +![](./img/devutils.jpg) + + + diff --git a/docs/testbed/README.testbed.Routing.md b/docs/testbed/README.testbed.Routing.md index ad0be635ffc..17afe5ec8f9 100644 --- a/docs/testbed/README.testbed.Routing.md +++ b/docs/testbed/README.testbed.Routing.md @@ -161,6 +161,9 @@ def test_announce_routes(fib): assert True ``` +## Announce routes +For more details about announce_routes, please refer to: [AnnounceRoutes.Internal.md](./READ.testbed.AnnounceRoutes.Internal.md) + ## Q&A Q: Why not use exabgp to advertise routes directly to the DUT? A: Yes, we can. But, we could not simulate the BGP over LAG as there is no LAG protocol diff --git a/docs/testbed/img/announce_routes_m0.png b/docs/testbed/img/announce_routes_m0.png new file mode 100644 index 00000000000..a9c9418e699 Binary files /dev/null and b/docs/testbed/img/announce_routes_m0.png differ diff --git a/docs/testbed/img/devutils.jpg b/docs/testbed/img/devutils.jpg new file mode 100644 index 00000000000..b409cc82cbc Binary files /dev/null and b/docs/testbed/img/devutils.jpg differ diff --git a/lgtm.yml b/lgtm.yml deleted file mode 100644 index 1300b6840c4..00000000000 --- a/lgtm.yml +++ /dev/null @@ -1,7 +0,0 @@ -path_classifiers: - test: - exclude: "/" # We want to analyze all our test code. -extraction: - python: - python_setup: - version: "2" diff --git a/setup-container.sh b/setup-container.sh index 312ab3e7991..0d2b7304df7 100755 --- a/setup-container.sh +++ b/setup-container.sh @@ -194,9 +194,12 @@ fi # User configuration RUN if getent passwd {{ USER_NAME }}; \ -then usermod -o -g {{ GROUP_ID }} -u {{ USER_ID }} -m -d /home/{{ USER_NAME }} {{ USER_NAME }}; \ -else useradd -o -g {{ GROUP_ID }} -u {{ USER_ID }} -m -d /home/{{ USER_NAME }} -s /bin/bash {{ USER_NAME }}; \ +# Usermod will hang when user_id is large (https://github.com/moby/moby/issues/5419), and it can not work around this issue itself. +# So, we first delete the user and use `useradd -l` to work around this issue. +#then usermod -o -g {{ GROUP_ID }} -u {{ USER_ID }} -m -d /home/{{ USER_NAME }} {{ USER_NAME }}; \ +then userdel {{ USER_NAME }}; \ fi +RUN useradd -o -l -g {{ GROUP_ID }} -u {{ USER_ID }} -m -d /home/{{ USER_NAME }} -s /bin/bash {{ USER_NAME }}; # Docker configuration RUN if getent group {{ DGROUP_NAME }}; \ @@ -237,8 +240,15 @@ RUN chmod 0600 ${HOME}/.ssh/authorized_keys WORKDIR ${HOME} -# Setup python3 virtual env -RUN if [ '{{ USER_NAME }}' != 'AzDevOps' ] && [ -d /var/AzDevOps/env-python3 ]; then \ +# Setup python3 virtual env if some conditions are met: +# 1. pytest is not globally installed. If pytest is gloablly installed, this assumes that all the packages in +# the python3 virtual env are installed globally. No need to create python3 virtual env. +# 2. The user is not AzDevOps. By default python3 virtual env is installed for AzDevOps user. +# No need to install it again when current user is AzDevOps. +# 3. The python3 virtual env is not installed for AzDevOps. Then, it is not required for other users either. +RUN if ! pip3 list | grep -c pytest >/dev/null && \ +[ '{{ USER_NAME }}' != 'AzDevOps' ] && \ +[ -d /var/AzDevOps/env-python3 ]; then \ /bin/bash -c 'python3 -m venv ${HOME}/env-python3'; \ /bin/bash -c '${HOME}/env-python3/bin/pip install wheel'; \ /bin/bash -c '${HOME}/env-python3/bin/pip install $(/var/AzDevOps/env-python3/bin/pip freeze)'; \ diff --git a/test_reporting/README.md b/test_reporting/README.md index 8379601e5b2..3768f51bca5 100644 --- a/test_reporting/README.md +++ b/test_reporting/README.md @@ -1,10 +1,9 @@ # SONiC Test Reporting -## Setup and Sanity Check +## Setup environment In the sonic-mgmt container: ``` source /var/johnar/env-python3/bin/activate -pytest ``` On a Linux host (verified against Ubuntu 20.04, but should work anywhere python3/virtualenv are supported): @@ -12,7 +11,6 @@ On a Linux host (verified against Ubuntu 20.04, but should work anywhere python3 virtualenv env source env/bin/activate pip3 install -r requirements.txt -pytest ``` ## Uploading test results to a Kusto/Azure Data Explorer (ADX) cluster @@ -39,6 +37,18 @@ Optionally you can add an external/tracking ID that will be uploaded as well: % python3 report_uploader.py -c "test_result" -e PR#1995 ../results SonicTestData ``` +## Run sanity check +This folder contains some test code for junit XML parser. If any change was made to the parser, please do remember to update the tests and run tests as well to ensure that there is no regression. +To run the tests, need to install more dependent packages to the same python3 virtual environment. +``` +pip3 install -r requirements_dev.txt +``` + +Just use the `pytest` command to run tests: +``` +pytest +``` + ## Components ### Report Uploader diff --git a/test_reporting/collect_azp_results.py b/test_reporting/collect_azp_results.py new file mode 100644 index 00000000000..eefc186dfe0 --- /dev/null +++ b/test_reporting/collect_azp_results.py @@ -0,0 +1,76 @@ +"""Script to collect failed/cancelled/success tasks for specific azure pipeline and save it to json file.""" +import os +import requests +import argparse +import json + + +TOKEN = os.environ.get('AZURE_DEVOPS_MSSONIC_TOKEN') +if not TOKEN: + raise Exception('Must export environment variable AZURE_DEVOPS_MSSONIC_TOKEN') +AUTH = ('', TOKEN) + +TASK_RESULT_FILE = "pipeline_task_results.json" + + +def get_tasks_results(buildid): + """Collect previous tasks' results and save to file' + + Returns: + dict: Dict of tasks' results + """ + task_results = { + "start_time": "", + "success_tasks": "", + "failed_tasks": "", + "cancelled_tasks": "" + } + + pipeline_url = "https://dev.azure.com/mssonic/internal/_apis/build/builds/"+ str(buildid) + print("Collect pipeline startTime from here:{}".format(pipeline_url)) + api_result = requests.get(pipeline_url, auth=AUTH) + starttime_str = api_result.json()["startTime"] + + # Convert the time format from 2022-08-09T03:00:32.7088577Z + # to 2022-08-09 03:00:32.7088577 + starttime_str = starttime_str.replace("T", " ") + starttime_str = starttime_str.replace("Z", "") + task_results["start_time"] = starttime_str + + timeline_url = "https://dev.azure.com/mssonic/internal/_apis/build/builds/" + str(buildid) + "/timeline?api-version=5.1" + print("Collect task results from here:{}".format(timeline_url)) + api_result = requests.get(timeline_url, auth=AUTH) + build_records = api_result.json()["records"] + if not build_records: + print("Failed to get build records for buildid {}".format(buildid)) + return + for task in build_records: + if task and task["state"] == "completed": + if task["result"] == 'succeeded': + task_results["success_tasks"] += task["name"] + ";" + if task["result"] == 'failed': + task_results["failed_tasks"] += task["name"] + ";" + if task["result"] == 'canceled': + task_results["cancelled_tasks"] += task["name"] + ";" + with open(TASK_RESULT_FILE, "w") as f: + json.dump(task_results, f) + return task_results + +def main(): + parser = argparse.ArgumentParser( + description="Upload test reports to Kusto.", + formatter_class=argparse.RawTextHelpFormatter, + epilog=""" + Examples: + python3 collect_azp_results.py 88888 + """, + ) + parser.add_argument("build_id", metavar="buildid", type=str, help="build ids of pipeline, ie 88888") + + args = parser.parse_args() + build_id = args.build_id + get_tasks_results(build_id) + + +if __name__ == "__main__": + main() diff --git a/test_reporting/junit_xml_parser.py b/test_reporting/junit_xml_parser.py old mode 100644 new mode 100755 index 22513d296b3..375b5761f32 --- a/test_reporting/junit_xml_parser.py +++ b/test_reporting/junit_xml_parser.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + """Utilities for validating and parsing JUnit XML files generated by Pytest and Spytest. This library/script should work for any test result XML file generated by Pytest or Spytest. @@ -41,6 +43,7 @@ MAXIMUM_SUMMARY_SIZE = 1024 # 1MB # Fields found in the testsuite/root section of the JUnit XML file. +TESTSUITES_TAG = "testsuites" TESTSUITE_TAG = "testsuite" REQUIRED_TESTSUITE_ATTRIBUTES = { ("time", float), @@ -76,6 +79,17 @@ "name", "time", ] + +# Fields found in the testcase/properties section of the JUnit XML file. +# FIXME: These are specific to pytest, needs to be extended to support spytest. +TESTCASE_PROPERTIES_TAG = "properties" +TESTCASE_PROPERTY_TAG = "property" +REQUIRED_TESTCASE_PROPERTIES = [ + "start", + "end", + "CustomMsg" +] + REQUIRED_TESTCASE_JSON_FIELDS = ["result", "error", "summary"] @@ -156,7 +170,8 @@ def validate_junit_xml_archive(directory_name, strict=False): - Any of the provided files are missing required fields """ if not os.path.exists(directory_name) or not os.path.isdir(directory_name): - raise JUnitXMLValidationError("file not found") + print("directory {} not found".format(directory_name)) + return roots = [] metadata_source = None @@ -199,8 +214,7 @@ def validate_junit_xml_archive(directory_name, strict=False): print(f"could not parse {document}: {e} - skipping") if not roots: - raise JUnitXMLValidationError(f"provided directory {directory_name} does not contain any valid XML files") - + print("provided directory {} does not contain any valid XML files".format(directory_name)) return roots @@ -222,20 +236,26 @@ def _validate_junit_xml(root): def _validate_test_summary(root): - if root.tag != TESTSUITE_TAG: - raise JUnitXMLValidationError(f"{TESTSUITE_TAG} tag not found on root element") + if root.tag == TESTSUITES_TAG: + testsuit_element = root.find(TESTSUITE_TAG) + if not testsuit_element: + raise JUnitXMLValidationError(f"{TESTSUITE_TAG} tag not found") + elif root.tag == TESTSUITE_TAG: + testsuit_element = root + else: + raise JUnitXMLValidationError(f"Either {TESTSUITES_TAG} or {TESTSUITE_TAG} tag are not found on root element") for xml_field, expected_type in REQUIRED_TESTSUITE_ATTRIBUTES: - if xml_field not in root.keys(): + if xml_field not in testsuit_element.keys(): raise JUnitXMLValidationError(f"{xml_field} not found in <{TESTSUITE_TAG}> element") try: - expected_type(root.get(xml_field)) + expected_type(testsuit_element.get(xml_field)) except Exception as e: raise JUnitXMLValidationError( f"invalid type for {xml_field} in {TESTSUITE_TAG}> element: " f"expected a number, received " - f'"{root.get(xml_field)}"' + f'"{testsuit_element.get(xml_field)}"' ) from e @@ -273,6 +293,41 @@ def _validate_test_metadata(root): raise JUnitXMLValidationError("missing metadata element(s)") +def _validate_test_case_properties(root): + testcase_properties_element = root.find(TESTCASE_PROPERTIES_TAG) + + if not testcase_properties_element: + return + + seen_testcase_properties = [] + for testcase_prop in testcase_properties_element.iterfind(TESTCASE_PROPERTY_TAG): + testcase_property_name = testcase_prop.get("name", None) + + if not testcase_property_name: + continue + + if testcase_property_name not in REQUIRED_TESTCASE_PROPERTIES: + continue + + if testcase_property_name in seen_testcase_properties: + raise JUnitXMLValidationError( + f"duplicate metadata element: {testcase_property_name} seen more than once" + ) + + testcase_property_value = testcase_prop.get("value", None) + + if testcase_property_value is None: # Some fields may be empty + raise JUnitXMLValidationError( + f'invalid metadata element: no "value" field provided for {testcase_property_name}' + ) + + seen_testcase_properties.append(testcase_property_name) + + missing_testcase_property = set(REQUIRED_TESTCASE_PROPERTIES) - set(seen_testcase_properties) + if missing_testcase_property: + print("missing testcase property: {}".format(list(missing_testcase_property))) + + def _validate_test_cases(root): def _validate_test_case(test_case): for attribute in REQUIRED_TESTCASE_ATTRIBUTES: @@ -281,6 +336,7 @@ def _validate_test_case(test_case): f'"{attribute}" not found in test case ' f"\"{test_case.get('name', 'Name Not Found')}\"" ) + _validate_test_case_properties(test_case) cases = root.findall(TESTCASE_TAG) @@ -298,8 +354,14 @@ def parse_test_result(roots): A dict containing the parsed test result. """ test_result_json = defaultdict(dict) + if not roots: + print("No XML file needs to be parsed or the file is empty.") + return for root in roots: + if root.tag == TESTSUITES_TAG: + root = root.find(TESTSUITE_TAG) + test_result_json["test_metadata"] = _update_test_metadata(test_result_json["test_metadata"], _parse_test_metadata(root)) test_cases = _parse_test_cases(root) @@ -322,7 +384,7 @@ def _extract_test_summary(test_cases): test_result_summary = defaultdict(int) for _, cases in test_cases.items(): for case in cases: - # Error may occur along with other test results, to count error separately. + # Error may occur along with other test results, to count error separately. # The result field is unique per test case, either error or failure. # xfails is the counter for all kinds of xfail results (include success/failure/error/skipped) test_result_summary["tests"] += 1 @@ -330,10 +392,9 @@ def _extract_test_summary(test_cases): test_result_summary["skipped"] += case["result"] == "skipped" test_result_summary["errors"] += case["error"] test_result_summary["time"] += float(case["time"]) - test_result_summary["xfails"] += case["result"] == "xfail_failure" or \ - case["result"] == "xfail_error" or \ - case["result"] == "xfail_skipped" or \ - case["result"] == "xfail_success" + test_result_summary["xfails"] += \ + case["result"] == "xfail_failure" or case["result"] == \ + "xfail_error" or case["result"] == "xfail_skipped" or case["result"] == "xfail_success" test_result_summary = {k: str(v) for k, v in test_result_summary.items()} return test_result_summary @@ -353,6 +414,27 @@ def _parse_test_metadata(root): return test_result_metadata +def _parse_testcase_properties(root): + testcase_properties_element = root.find(TESTCASE_PROPERTIES_TAG) + + if not testcase_properties_element: + return {} + + testcase_properties = {} + for testcase_prop in testcase_properties_element.iterfind(TESTCASE_PROPERTY_TAG): + if testcase_prop.get("value"): + if testcase_prop.get("name") == "CustomMsg": + if not testcase_properties.get(testcase_prop.get("name")): + testcase_properties[testcase_prop.get("name")] = testcase_prop.get("value") + else: + testcase_properties[testcase_prop.get("name")] = testcase_prop.get("value") + ", " + \ + testcase_properties[testcase_prop.get("name")] + else: + testcase_properties[testcase_prop.get("name")] = testcase_prop.get("value") + + return testcase_properties + + def _parse_test_cases(root): test_case_results = defaultdict(list) @@ -365,6 +447,10 @@ def _parse_test_case(test_case): for attribute in REQUIRED_TESTCASE_ATTRIBUTES: result[attribute] = test_case.get(attribute) + for attribute in REQUIRED_TESTCASE_PROPERTIES: + testcase_properties = _parse_testcase_properties(test_case) + if attribute in testcase_properties: + result[attribute] = testcase_properties[attribute] # NOTE: "if failure" and "if error" does not work with the ETree library. failure = test_case.find("failure") @@ -419,10 +505,12 @@ def _update_test_summary(current, update): new_summary = {} for attribute, attr_type in REQUIRED_TESTSUITE_ATTRIBUTES: - new_summary[attribute] = str(round(attr_type(current.get(attribute, 0)) + attr_type(update.get(attribute, 0)), 3)) + new_summary[attribute] = str(round(attr_type(current.get(attribute, 0)) + + attr_type(update.get(attribute, 0)), 3)) for attribute, attr_type in EXTRA_XML_SUMMARY_ATTRIBUTES: - new_summary[attribute] = str(round(attr_type(current.get(attribute, 0)) + attr_type(update.get(attribute, 0)), 3)) + new_summary[attribute] = str(round(attr_type(current.get(attribute, 0)) + + attr_type(update.get(attribute, 0)), 3)) return new_summary @@ -480,7 +568,8 @@ def validate_junit_json_file(path): - The provided file is missing required fields """ test_result_json = validate_json_file(path) - + if not test_result_json: + return _validate_json_metadata(test_result_json) _validate_json_summary(test_result_json) _validate_json_cases(test_result_json) @@ -544,6 +633,9 @@ def _validate_test_case(test_case): f'"{attribute}" not found in test case ' f"\"{test_case.get('name', 'Name Not Found')}\"" ) + for attribute in REQUIRED_TESTCASE_PROPERTIES: + if attribute not in test_case: + print("missing testcase property {} in testcase {}".format(attribute, test_case["classname"])) for _, feature in test_result_json["test_cases"].items(): for test_case in feature: @@ -582,7 +674,8 @@ def _run_script(): "--json", "-j", action="store_true", - help="Load an existing test result JSON file from path_name. Will perform validation only regardless of --validate-only option.", + help="Load an existing test result JSON file from path_name. " + "Will perform validation only regardless of --validate-only option.", ) args = parser.parse_args() @@ -609,6 +702,9 @@ def _run_script(): sys.exit(0) test_result_json = parse_test_result(roots) + if test_result_json is None: + print("XML file doesn't exist or no data in the file.") + sys.exit(1) if args.compact: output = json.dumps(test_result_json, separators=(",", ":"), sort_keys=True) diff --git a/test_reporting/kusto/setup.kql b/test_reporting/kusto/setup.kql index 6ac32373a58..ac95b647dac 100644 --- a/test_reporting/kusto/setup.kql +++ b/test_reporting/kusto/setup.kql @@ -101,6 +101,26 @@ '{"column":"TestbedName","Properties":{"path":"$.testbed"}},' '{"column":"UploadTimestamp","Properties":{"path":"$.upload_time"}}]' +############################################################################### +# PIPELINE TABLE SETUP # +# 1. Create a TestReportPipeline table to store pipeline task results # +# 2. Add a JSON mapping for the table # +############################################################################### +.create table TestReportPipeline (StartTimestamp: datetime, UploadTimestamp: datetime, TrackingId: string, + ReportId: string, TestbedName: string, + OSVersion: string, SuccessTasks: string, + FailedTasks: string, CancelledTasks: string) + +.create table TestReportPipeline ingestion json mapping 'FlatPipelineMappingV1' @'[{"column":"StartTimestamp","Properties":{"path":"$.start_time"}},' + '{"column":"UploadTimestamp","Properties":{"path":"$.upload_time"}},' + '{"column":"TrackingId","Properties":{"path":"$.tracking_id"}},' + '{"column":"ReportId","Properties":{"path":"$.id"}},' + '{"column":"TestbedName","Properties":{"path":"$.testbed"}},' + '{"column":"OSVersion","Properties":{"path":"$.os_version"}},' + '{"column":"SuccessTasks","Properties":{"path":"$.success_tasks"}},' + '{"column":"FailedTasks","Properties":{"path":"$.failed_tasks"}},' + '{"column":"CancelledTasks","Properties":{"path":"$.cancelled_tasks"}}]' + ############################################################################### # EXPECTED TEST RUNS TABLE SETUP # # 1. Create a ExpectedTestRuns table to store expected test runs data # diff --git a/test_reporting/report_data_storage.py b/test_reporting/report_data_storage.py index 5948146529c..1aa113f017d 100644 --- a/test_reporting/report_data_storage.py +++ b/test_reporting/report_data_storage.py @@ -24,6 +24,8 @@ from typing import Dict, List +TASK_RESULT_FILE = "pipeline_task_results.json" + class ReportDBConnector(ABC): """ReportDBConnector is a wrapper for a back-end data store for JUnit test reports. @@ -93,6 +95,7 @@ class KustoConnector(ReportDBConnector): REBOOT_TIMING_TABLE = "RebootTimingData" TEST_CASE_TABLE = "TestCases" EXPECTED_TEST_RUNS_TABLE = "ExpectedTestRuns" + PIPELINE_TABLE = "TestReportPipeline" TABLE_FORMAT_LOOKUP = { METADATA_TABLE: DataFormat.JSON, @@ -104,6 +107,7 @@ class KustoConnector(ReportDBConnector): REBOOT_TIMING_TABLE: DataFormat.MULTIJSON, TEST_CASE_TABLE: DataFormat.JSON, EXPECTED_TEST_RUNS_TABLE: DataFormat.JSON, + PIPELINE_TABLE: DataFormat.JSON } TABLE_MAPPING_LOOKUP = { @@ -115,7 +119,8 @@ class KustoConnector(ReportDBConnector): RAW_REBOOT_TIMING_TABLE: "RawRebootTimingDataMapping", REBOOT_TIMING_TABLE: "RebootTimingDataMapping", TEST_CASE_TABLE: "TestCasesMappingV1", - EXPECTED_TEST_RUNS_TABLE: "ExpectedTestRunsV1" + EXPECTED_TEST_RUNS_TABLE: "ExpectedTestRunsV1", + PIPELINE_TABLE: "FlatPipelineMappingV1" } def __init__(self, db_name: str): @@ -140,7 +145,26 @@ def __init__(self, db_name: str): tenant_id) self._ingestion_client = KustoIngestClient(kcsb) - def upload_report(self, report_json: Dict, external_tracking_id: str = "", report_guid: str = "") -> None: + """ + Kusto performance depends on the work load of cluster, to improve the high availability of test result data service + by hosting a backup cluster, which is optional. + """ + ingest_cluster = os.getenv("TEST_REPORT_INGEST_KUSTO_CLUSTER_BACKUP") + tenant_id = os.getenv("TEST_REPORT_AAD_TENANT_ID_BACKUP") + service_id = os.getenv("TEST_REPORT_AAD_CLIENT_ID_BACKUP") + service_key = os.getenv("TEST_REPORT_AAD_CLIENT_KEY_BACKUP") + + if not ingest_cluster or not tenant_id or not service_id or not service_key: + print("Could not load backup Kusto Credentials from environment") + self._ingestion_client_backup = None + else: + kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(ingest_cluster, + service_id, + service_key, + tenant_id) + self._ingestion_client_backup = KustoIngestClient(kcsb) + + def upload_report(self, report_json: Dict, external_tracking_id: str = "", report_guid: str = "", testbed: str = "", os_version: str = "") -> None: """Upload a report to the back-end data store. Args: @@ -150,6 +174,12 @@ def upload_report(self, report_json: Dict, external_tracking_id: str = "", repor This id does not have to be unique. report_guid: A randomly generated UUID that is used to query for a specific test run across tables. """ + if not report_json: + print("Test result file is not found or empty. We will only upload pipeline results and summary.") + self._upload_pipeline_results(external_tracking_id, report_guid, testbed, os_version) + self._upload_summary(report_json, report_guid) + return + self._upload_pipeline_results(external_tracking_id, report_guid, testbed, os_version) self._upload_metadata(report_json, external_tracking_id, report_guid) self._upload_summary(report_json, report_guid) self._upload_test_cases(report_json, report_guid) @@ -185,14 +215,33 @@ def upload_reboot_report(self, path_name: str = "", report_guid: str = "") -> No reboot_timing_dict = validate_json_file(path_name) reboot_timing_data.update(reboot_timing_dict) print("Uploading {} report with contents: {}".format(path_name, reboot_timing_data)) - if "reboot_summary" in path_name: + if "summary.json" in path_name: self._ingest_data(self.REBOOT_TIMING_TABLE, reboot_timing_data) - elif "reboot_report" in path_name: + elif "report.json" in path_name: self._ingest_data(self.RAW_REBOOT_TIMING_TABLE, reboot_timing_data) def upload_expected_runs(self, expected_runs: List) -> None: self._ingest_data(self.EXPECTED_TEST_RUNS_TABLE, expected_runs) + def _upload_pipeline_results(self, external_tracking_id, report_guid, testbed, os_version): + pipeline_data = { + "id": report_guid, + "tracking_id": external_tracking_id, + "testbed": testbed, + "os_version": os_version, + "upload_time": str(datetime.utcnow()) + } + try: + # load pipeline task result json file + with open(TASK_RESULT_FILE, 'r') as f: + task_results = json.load(f) + except Exception as e: + print("Failed to load file {} with exception {}".format(TASK_RESULT_FILE, repr(e))) + task_results = {} + pipeline_data.update(task_results) + print("Upload pipeline result") + self._ingest_data(self.PIPELINE_TABLE, pipeline_data) + def _upload_metadata(self, report_json, external_tracking_id, report_guid): metadata = { "id": report_guid, @@ -200,15 +249,26 @@ def _upload_metadata(self, report_json, external_tracking_id, report_guid): "upload_time": str(datetime.utcnow()) } metadata.update(report_json["test_metadata"]) - + print("Upload metadata") self._ingest_data(self.METADATA_TABLE, metadata) def _upload_summary(self, report_json, report_guid): summary = { "id": report_guid } - summary.update(report_json["test_summary"]) - + if not report_json: + report_json = { + "time": 0.0, + "tests": 0, + "skipped": 0, + "failures": 0, + "errors": 0, + "xfails": 0 + } + summary.update(report_json) + else: + summary.update(report_json["test_summary"]) + print("Upload summary") self._ingest_data(self.SUMMARY_TABLE, summary) def _upload_test_cases(self, report_json, report_guid): @@ -220,7 +280,7 @@ def _upload_test_cases(self, report_json, report_guid): "feature": feature }) test_cases.append(case) - + print("Upload test case") self._ingest_data(self.TEST_CASE_TABLE, test_cases) def _ingest_data(self, table, data): @@ -237,4 +297,8 @@ def _ingest_data(self, table, data): else: temp.write(json.dumps(data)) temp.seek(0) + print("Ingest to primary cluster...") self._ingestion_client.ingest_from_file(temp.name, ingestion_properties=props) + if self._ingestion_client_backup: + print("Ingest to backup cluster...") + self._ingestion_client_backup.ingest_from_file(temp.name, ingestion_properties=props) diff --git a/test_reporting/report_uploader.py b/test_reporting/report_uploader.py index 6b02327f01f..6d410489b25 100644 --- a/test_reporting/report_uploader.py +++ b/test_reporting/report_uploader.py @@ -2,6 +2,7 @@ import json import sys import uuid +import re from junit_xml_parser import ( validate_junit_json_file, @@ -10,6 +11,20 @@ ) from report_data_storage import KustoConnector +def _parse_os_version(image_url): + """Parse os version from image url""" + os_version = '' + items = image_url.split("/") + if "public" in items or "master" in items: + os_version = "master" + elif "internal" in items: + os_version = "internal" + else: + # For other images, such as 202012, there is internal-202012 in url. + for item in items: + if "internal" in item: + os_version = item.split("-")[-1] + return os_version if os_version else "UNKNOWN" def _run_script(): parser = argparse.ArgumentParser( @@ -31,6 +46,16 @@ def _run_script(): parser.add_argument( "--category", "-c", type=str, help="Type of data to upload (i.e. test_result, reachability, etc.)" ) + parser.add_argument( + "--testbed", "-t", type=str, help="Name of testbed." + ) + os_version = parser.add_mutually_exclusive_group(required=False) + os_version.add_argument( + "--image_url", "-i", type=str, help="Image url. If has this argument, will ignore version. They are mutually exclusive." + ) + os_version.add_argument( + "--version", "-o", type=str, help="OS version. If has this argument, will ignore image_url. They are mutually exclusive." + ) args = parser.parse_args() kusto_db = KustoConnector(args.db_name) @@ -38,8 +63,16 @@ def _run_script(): if args.category == "test_result": tracking_id = args.external_id if args.external_id else "" report_guid = str(uuid.uuid4()) + testbed = args.testbed + if args.image_url: + version = _parse_os_version(args.image_url) + elif args.version: + version = args.version + else: + version = "UNKNOWN" for path_name in args.path_list: - if "reboot_summary" in path_name or "reboot_report" in path_name: + reboot_data_regex = re.compile('.*test.*_(reboot|sad.*|upgrade_path)_(summary|report).json') + if reboot_data_regex.match(path_name): kusto_db.upload_reboot_report(path_name, report_guid) else: if args.json: @@ -47,7 +80,7 @@ def _run_script(): else: roots = validate_junit_xml_path(path_name) test_result_json = parse_test_result(roots) - kusto_db.upload_report(test_result_json, tracking_id, report_guid) + kusto_db.upload_report(test_result_json, tracking_id, report_guid, testbed, version) elif args.category == "reachability": reachability_data = [] for path_name in args.path_list: diff --git a/test_reporting/requirements.txt b/test_reporting/requirements.txt index 80d44007817..8e26e60aa8e 100644 --- a/test_reporting/requirements.txt +++ b/test_reporting/requirements.txt @@ -1,5 +1,3 @@ -azure-kusto-data==3.0.0 -azure-kusto-ingest==3.0.0 +azure-kusto-data==3.1.3 +azure-kusto-ingest==3.1.3 defusedxml==0.7.1 -pytest==6.2.5 -aiohttp==3.8.1 diff --git a/test_reporting/requirements_dev.txt b/test_reporting/requirements_dev.txt new file mode 100644 index 00000000000..927094516e6 --- /dev/null +++ b/test_reporting/requirements_dev.txt @@ -0,0 +1 @@ +pytest==6.2.5 diff --git a/test_reporting/utilities.py b/test_reporting/utilities.py index 0b2c47b76d2..db9ddd5d58a 100644 --- a/test_reporting/utilities.py +++ b/test_reporting/utilities.py @@ -1,5 +1,4 @@ import os -import sys import json @@ -10,11 +9,10 @@ class TestResultJSONValidationError(Exception): def validate_json_file(path): if not os.path.exists(path): print(f"{path} not found") - sys.exit(1) + return if not os.path.isfile(path): print(f"{path} is not a JSON file") - sys.exit(1) - + return try: with open(path) as f: test_result_json = json.load(f) diff --git a/tests/acl/conftest.py b/tests/acl/conftest.py index 8b137891791..4321f78f937 100644 --- a/tests/acl/conftest.py +++ b/tests/acl/conftest.py @@ -1 +1,6 @@ +import pytest + +@pytest.fixture(scope='module') +def get_function_conpleteness_level(pytestconfig): + return pytestconfig.getoption("--completeness_level") diff --git a/tests/acl/null_route/test_null_route_helper.py b/tests/acl/null_route/test_null_route_helper.py index 88922878053..c4434b99f99 100644 --- a/tests/acl/null_route/test_null_route_helper.py +++ b/tests/acl/null_route/test_null_route_helper.py @@ -16,7 +16,7 @@ logger = logging.getLogger(__name__) pytestmark = [ - pytest.mark.topology("t0"), + pytest.mark.topology("t0", "m0"), pytest.mark.disable_loganalyzer, # Disable automatic loganalyzer, since we use it for the test ] diff --git a/tests/acl/templates/acltb_test_rules.j2 b/tests/acl/templates/acltb_test_rules.j2 index cc6435bcf41..5f0b88e1c63 100644 --- a/tests/acl/templates/acltb_test_rules.j2 +++ b/tests/acl/templates/acltb_test_rules.j2 @@ -450,6 +450,36 @@ "code": 1 } } + }, + "30": { + "actions": { + "config": { + "forwarding-action": "ACCEPT" + } + }, + "config": { + "sequence-id": 30 + }, + "ip": { + "config": { + "destination-ip-address": "192.168.1.66/32" + } + } + }, + "31": { + "actions": { + "config": { + "forwarding-action": "DROP" + } + }, + "config": { + "sequence-id": 31 + }, + "ip": { + "config": { + "destination-ip-address": "192.168.1.67/32" + } + } } } } diff --git a/tests/acl/templates/acltb_test_rules_part_2.j2 b/tests/acl/templates/acltb_test_rules_part_2.j2 index cc6435bcf41..5f0b88e1c63 100644 --- a/tests/acl/templates/acltb_test_rules_part_2.j2 +++ b/tests/acl/templates/acltb_test_rules_part_2.j2 @@ -450,6 +450,36 @@ "code": 1 } } + }, + "30": { + "actions": { + "config": { + "forwarding-action": "ACCEPT" + } + }, + "config": { + "sequence-id": 30 + }, + "ip": { + "config": { + "destination-ip-address": "192.168.1.66/32" + } + } + }, + "31": { + "actions": { + "config": { + "forwarding-action": "DROP" + } + }, + "config": { + "sequence-id": 31 + }, + "ip": { + "config": { + "destination-ip-address": "192.168.1.67/32" + } + } } } } diff --git a/tests/acl/templates/acltb_test_stress_acl_rules.j2 b/tests/acl/templates/acltb_test_stress_acl_rules.j2 new file mode 100644 index 00000000000..1f4ef7631df --- /dev/null +++ b/tests/acl/templates/acltb_test_stress_acl_rules.j2 @@ -0,0 +1,54 @@ +{ + "ACL_RULE": { + "STRESS_ACL| RULE_1": { + "SRC_IP": "20.0.0.1/32", + "PACKET_ACTION": "DROP", + "PRIORITY": "1" + }, + "STRESS_ACL| RULE_2": { + "SRC_IP": "20.0.0.2/32", + "PACKET_ACTION": "DROP", + "PRIORITY": "2" + }, + "STRESS_ACL| RULE_3": { + "SRC_IP": "20.0.0.3/32", + "PACKET_ACTION": "DROP", + "PRIORITY": "3" + }, + "STRESS_ACL| RULE_4": { + "SRC_IP": "20.0.0.4/32", + "PACKET_ACTION": "DROP", + "PRIORITY": "4" + }, + "STRESS_ACL| RULE_5": { + "SRC_IP": "20.0.0.5/32", + "PACKET_ACTION": "DROP", + "PRIORITY": "5" + }, + "STRESS_ACL| RULE_6": { + "SRC_IP": "20.0.0.6/32", + "PACKET_ACTION": "DROP", + "PRIORITY": "6" + }, + "STRESS_ACL| RULE_7": { + "SRC_IP": "20.0.0.7/32", + "PACKET_ACTION": "DROP", + "PRIORITY": "7" + }, + "STRESS_ACL| RULE_8": { + "SRC_IP": "20.0.0.8/32", + "PACKET_ACTION": "DROP", + "PRIORITY": "8" + }, + "STRESS_ACL| RULE_9": { + "SRC_IP": "20.0.0.9/32", + "PACKET_ACTION": "DROP", + "PRIORITY": "9" + }, + "STRESS_ACL| RULE_10": { + "SRC_IP": "20.0.0.10/32", + "PACKET_ACTION": "DROP", + "PRIORITY": "10" + } + } +} diff --git a/tests/acl/templates/acltb_test_stress_acl_table.j2 b/tests/acl/templates/acltb_test_stress_acl_table.j2 new file mode 100644 index 00000000000..9c197bb7a06 --- /dev/null +++ b/tests/acl/templates/acltb_test_stress_acl_table.j2 @@ -0,0 +1,9 @@ +{ + "ACL_TABLE_TYPE": { + "L3": { + "MATCHES": "SRC_IP", + "ACTIONS": "PACKET_ACTION", + "BIND_POINTS": "PORT" + } + } +} diff --git a/tests/acl/templates/acltb_v6_test_rules.j2 b/tests/acl/templates/acltb_v6_test_rules.j2 index 23424403c38..9686a363035 100644 --- a/tests/acl/templates/acltb_v6_test_rules.j2 +++ b/tests/acl/templates/acltb_v6_test_rules.j2 @@ -492,6 +492,36 @@ "code": "0" } } + }, + "32": { + "actions": { + "config": { + "forwarding-action": "ACCEPT" + } + }, + "config": { + "sequence-id": 32 + }, + "ip": { + "config": { + "destination-ip-address": "20c0:a800:0:1::1/128" + } + } + }, + "33": { + "actions": { + "config": { + "forwarding-action": "DROP" + } + }, + "config": { + "sequence-id": 33 + }, + "ip": { + "config": { + "destination-ip-address": "20c0:a800:0:1::9/128" + } + } } } } diff --git a/tests/acl/templates/acltb_v6_test_rules_part_2.j2 b/tests/acl/templates/acltb_v6_test_rules_part_2.j2 index 82089cf26e2..dc7a82b5d0c 100644 --- a/tests/acl/templates/acltb_v6_test_rules_part_2.j2 +++ b/tests/acl/templates/acltb_v6_test_rules_part_2.j2 @@ -450,6 +450,36 @@ "code": 1 } } + }, + "32": { + "actions": { + "config": { + "forwarding-action": "ACCEPT" + } + }, + "config": { + "sequence-id": 32 + }, + "ip": { + "config": { + "destination-ip-address": "20c0:a800:0:1::1/128" + } + } + }, + "33": { + "actions": { + "config": { + "forwarding-action": "DROP" + } + }, + "config": { + "sequence-id": 33 + }, + "ip": { + "config": { + "destination-ip-address": "20c0:a800:0:1::9/128" + } + } } } } diff --git a/tests/acl/templates/del_acltb_test_stress_acl_table.j2 b/tests/acl/templates/del_acltb_test_stress_acl_table.j2 new file mode 100644 index 00000000000..38b0e66055b --- /dev/null +++ b/tests/acl/templates/del_acltb_test_stress_acl_table.j2 @@ -0,0 +1,3 @@ +[{ + "ACL_TABLE_TYPE": {} +}] diff --git a/tests/acl/test_acl.py b/tests/acl/test_acl.py index 791ab23b3dd..d128ad9ca45 100644 --- a/tests/acl/test_acl.py +++ b/tests/acl/test_acl.py @@ -17,11 +17,13 @@ from tests.common import reboot, port_toggle from tests.common.helpers.assertions import pytest_require from tests.common.plugins.loganalyzer.loganalyzer import LogAnalyzer, LogAnalyzerError -from tests.common.fixtures.duthost_utils import backup_and_restore_config_db_on_duts +from tests.common.config_reload import config_reload from tests.common.fixtures.ptfhost_utils import copy_arp_responder_py, run_garp_service, change_mac_addresses from tests.common.utilities import wait_until from tests.common.dualtor.dual_tor_mock import mock_server_base_ip_addr from tests.common.helpers.assertions import pytest_assert +from tests.common.helpers.constants import DEFAULT_NAMESPACE +from tests.common.utilities import get_upstream_neigh_type, get_downstream_neigh_type logger = logging.getLogger(__name__) @@ -29,7 +31,6 @@ pytest.mark.acl, pytest.mark.disable_loganalyzer, # Disable automatic loganalyzer, since we use it for the test pytest.mark.topology("any"), - pytest.mark.usefixtures('backup_and_restore_config_db_on_duts') ] BASE_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -71,6 +72,34 @@ "ipv6": "20c0:a800::8" } +# Below M0_L3 IPs are announced to DUT by annouce_route.py, it point to neighbor mx +DOWNSTREAM_DST_IP_M0_L3 = { + "ipv4": "192.168.1.65", + "ipv6": "20c0:a800:0:1::14" +} +DOWNSTREAM_IP_TO_ALLOW_M0_L3 = { + "ipv4": "192.168.1.66", + "ipv6": "20c0:a800:0:1::1" +} +DOWNSTREAM_IP_TO_BLOCK_M0_L3 = { + "ipv4": "192.168.1.67", + "ipv6": "20c0:a800:0:1::9" +} + +# Below M0_VLAN IPs are ip in vlan range +DOWNSTREAM_DST_IP_M0_VLAN = { + "ipv4": "192.168.0.253", + "ipv6": "20c0:a800::14" +} +DOWNSTREAM_IP_TO_ALLOW_M0_VLAN = { + "ipv4": "192.168.0.252", + "ipv6": "20c0:a800::1" +} +DOWNSTREAM_IP_TO_BLOCK_M0_VLAN = { + "ipv4": "192.168.0.251", + "ipv6": "20c0:a800::9" +} + DOWNSTREAM_IP_PORT_MAP = {} UPSTREAM_DST_IP = { @@ -96,9 +125,35 @@ PACKETS_COUNT = "packets_count" BYTES_COUNT = "bytes_count" +@pytest.fixture(scope="module", autouse=True) +def remove_dataacl_table(duthosts): + """ + Remove DATAACL to free TCAM resources. + The change is written to configdb as we don't want DATAACL recovered after reboot + """ + TABLE_NAME = "DATAACL" + for duthost in duthosts: + lines = duthost.shell(cmd="show acl table {}".format(TABLE_NAME))['stdout_lines'] + data_acl_existing = False + for line in lines: + if TABLE_NAME in line: + data_acl_existing = True + break + if data_acl_existing: + # Remove DATAACL + logger.info("Removing ACL table {}".format(TABLE_NAME)) + cmds = [ + "config acl remove table {}".format(TABLE_NAME), + "config save -y" + ] + duthost.shell_cmds(cmds=cmds) + yield + # Recover DUT by reloading minigraph + for duthost in duthosts: + config_reload(duthost, config_source="minigraph") @pytest.fixture(scope="module") -def setup(duthosts, ptfhost, rand_selected_dut, rand_unselected_dut, tbinfo, ptfadapter): +def setup(duthosts, ptfhost, rand_selected_dut, rand_unselected_dut, tbinfo, ptfadapter, topo_scenario): """Gather all required test information from DUT and tbinfo. Args: @@ -116,8 +171,21 @@ def setup(duthosts, ptfhost, rand_selected_dut, rand_unselected_dut, tbinfo, ptf vlan_ports = [] vlan_mac = None - - if topo == "t0": + # Need to refresh below constants for two scenarios of M0 + global DOWNSTREAM_DST_IP, DOWNSTREAM_IP_TO_ALLOW, DOWNSTREAM_IP_TO_BLOCK + + # Announce routes for m0 is something different from t1/t0 + if topo_scenario == "m0_vlan_scenario": + topo = "m0_vlan" + DOWNSTREAM_DST_IP = DOWNSTREAM_DST_IP_M0_VLAN + DOWNSTREAM_IP_TO_ALLOW = DOWNSTREAM_IP_TO_ALLOW_M0_VLAN + DOWNSTREAM_IP_TO_BLOCK = DOWNSTREAM_IP_TO_BLOCK_M0_VLAN + elif topo_scenario == "m0_l3_scenario": + topo = "m0_l3" + DOWNSTREAM_DST_IP = DOWNSTREAM_DST_IP_M0_L3 + DOWNSTREAM_IP_TO_ALLOW = DOWNSTREAM_IP_TO_ALLOW_M0_L3 + DOWNSTREAM_IP_TO_BLOCK = DOWNSTREAM_IP_TO_BLOCK_M0_L3 + if topo in ["t0", "m0_vlan"]: vlan_ports = [mg_facts["minigraph_ptf_indices"][ifname] for ifname in mg_facts["minigraph_vlans"].values()[0]["members"]] @@ -135,17 +203,21 @@ def setup(duthosts, ptfhost, rand_selected_dut, rand_unselected_dut, tbinfo, ptf upstream_port_id_to_router_mac_map = {} downstream_port_id_to_router_mac_map = {} - # For T0/dual ToR testbeds, we need to use the VLAN MAC to interact with downstream ports - # For T1 testbeds, no VLANs are present so using the router MAC is acceptable + # For M0_VLAN/MX/T0/dual ToR scenario, we need to use the VLAN MAC to interact with downstream ports + # For T1/M0_L3 scenario, no VLANs are present so using the router MAC is acceptable downlink_dst_mac = vlan_mac if vlan_mac is not None else rand_selected_dut.facts["router_mac"] + upstream_neigh_type = get_upstream_neigh_type(topo) + downstream_neigh_type = get_downstream_neigh_type(topo) + pytest_require(upstream_neigh_type is not None and downstream_neigh_type is not None, + "Cannot get neighbor type for unsupported topo: {}".format(topo)) for interface, neighbor in mg_facts["minigraph_neighbors"].items(): port_id = mg_facts["minigraph_ptf_indices"][interface] - if (topo == "t1" and "T0" in neighbor["name"]) or (topo == "t0" and "Server" in neighbor["name"]): + if downstream_neigh_type in neighbor["name"].upper(): downstream_ports[neighbor['namespace']].append(interface) downstream_port_ids.append(port_id) downstream_port_id_to_router_mac_map[port_id] = downlink_dst_mac - elif (topo == "t1" and "T2" in neighbor["name"]) or (topo == "t0" and "T1" in neighbor["name"]): + elif upstream_neigh_type in neighbor["name"].upper(): upstream_ports[neighbor['namespace']].append(interface) upstream_port_ids.append(port_id) upstream_port_id_to_router_mac_map[port_id] = rand_selected_dut.facts["router_mac"] @@ -160,7 +232,7 @@ def setup(duthosts, ptfhost, rand_selected_dut, rand_unselected_dut, tbinfo, ptf if 'dualtor' in tbinfo['topo']['name'] and rand_unselected_dut is not None: peer_mg_facts = rand_unselected_dut.get_extended_minigraph_facts(tbinfo) for interface, neighbor in peer_mg_facts['minigraph_neighbors'].items(): - if (topo == "t1" and "T2" in neighbor["name"]) or (topo == "t0" and "T1" in neighbor["name"]): + if (topo == "t1" and "T2" in neighbor["name"]) or (topo == "t0" and "T1" in neighbor["name"]) or (topo == "m0" and "M1" in neighbor["name"]): port_id = peer_mg_facts["minigraph_ptf_indices"][interface] upstream_port_ids.append(port_id) upstream_port_id_to_router_mac_map[port_id] = rand_unselected_dut.facts["router_mac"] @@ -171,14 +243,14 @@ def setup(duthosts, ptfhost, rand_selected_dut, rand_unselected_dut, tbinfo, ptf # TODO: We should make this more robust (i.e. bind all active front-panel ports) acl_table_ports = defaultdict(list) - if topo == "t0" or tbinfo["topo"]["name"] in ("t1", "t1-lag"): + if topo in ["t0", "m0_vlan", "m0_l3"] or tbinfo["topo"]["name"] in ("t1", "t1-lag"): for namespace, port in downstream_ports.iteritems(): acl_table_ports[namespace] += port # In multi-asic we need config both in host and namespace. if namespace: acl_table_ports[''] += port - if topo == "t0" or tbinfo["topo"]["name"] in ("t1-lag", "t1-64-lag", "t1-64-lag-clet"): + if topo in ["t0", "m0_vlan", "m0_l3"] or tbinfo["topo"]["name"] in ("t1-lag", "t1-64-lag", "t1-64-lag-clet"): for k, v in port_channels.iteritems(): acl_table_ports[v['namespace']].append(k) # In multi-asic we need config both in host and namespace. @@ -220,9 +292,11 @@ def setup(duthosts, ptfhost, rand_selected_dut, rand_unselected_dut, tbinfo, ptf @pytest.fixture(scope="module", params=["ipv4", "ipv6"]) -def ip_version(request, tbinfo, duthosts, rand_one_dut_hostname): - if tbinfo["topo"]["type"] == "t0" and request.param == "ipv6": - pytest.skip("IPV6 ACL test not currently supported on t0 testbeds") +def ip_version(request, tbinfo, duthosts, rand_one_dut_hostname, topo_scenario): + if tbinfo["topo"]["type"] in ["t0"] and request.param == "ipv6": + pytest.skip("IPV6 ACL test not currently supported on t0/m0 testbeds") + if topo_scenario == "m0_vlan_scenario" and request.param == "ipv6": + pytest.skip("IPV6 ACL test not currently supported on m0_vlan") return request.param @@ -230,14 +304,17 @@ def ip_version(request, tbinfo, duthosts, rand_one_dut_hostname): @pytest.fixture(scope="module") def populate_vlan_arp_entries(setup, ptfhost, duthosts, rand_one_dut_hostname, ip_version): """Set up the ARP responder utility in the PTF container.""" + global DOWNSTREAM_IP_PORT_MAP + # For m0 topo, need to refresh this constant for two different scenario + DOWNSTREAM_IP_PORT_MAP = {} duthost = duthosts[rand_one_dut_hostname] - if setup["topo"] != "t0": + if setup["topo"] not in ["t0", "m0_vlan"]: def noop(): pass yield noop - return # Don't fall through to t0 case + return # Don't fall through to t0/mx/m0_vlan case addr_list = [DOWNSTREAM_DST_IP[ip_version], DOWNSTREAM_IP_TO_ALLOW[ip_version], DOWNSTREAM_IP_TO_BLOCK[ip_version]] @@ -284,7 +361,7 @@ def populate_arp_table(): @pytest.fixture(scope="module", params=["ingress", "egress"]) -def stage(request, duthosts, rand_one_dut_hostname): +def stage(request, duthosts, rand_one_dut_hostname, tbinfo): """Parametrize tests for Ingress/Egress stage testing. Args: @@ -712,21 +789,45 @@ def test_rules_priority_dropped(self, setup, direction, ptfadapter, counters_san self._verify_acl_traffic(setup, direction, ptfadapter, pkt, True, ip_version) counters_sanity_check.append(7) - def test_dest_ip_match_forwarded(self, setup, direction, ptfadapter, counters_sanity_check, ip_version): + def test_dest_ip_match_forwarded(self, setup, direction, ptfadapter, counters_sanity_check, ip_version, + topo_scenario): """Verify that we can match and forward a packet on destination IP.""" dst_ip = DOWNSTREAM_IP_TO_ALLOW[ip_version] if direction == "uplink->downlink" else UPSTREAM_IP_TO_ALLOW[ip_version] pkt = self.tcp_packet(setup, direction, ptfadapter, ip_version, dst_ip=dst_ip) self._verify_acl_traffic(setup, direction, ptfadapter, pkt, False, ip_version) - counters_sanity_check.append(2 if direction == "uplink->downlink" else 3) + # Because m0_l3_scenario use differnet IPs, so need to verify different acl rules. + if direction == "uplink->downlink": + if topo_scenario == "m0_l3_scenario": + if ip_version == "ipv6": + rule_id = 32 + else: + rule_id = 30 + else: + rule_id = 2 + else: + rule_id = 3 + counters_sanity_check.append(rule_id) - def test_dest_ip_match_dropped(self, setup, direction, ptfadapter, counters_sanity_check, ip_version): + def test_dest_ip_match_dropped(self, setup, direction, ptfadapter, counters_sanity_check, ip_version, + topo_scenario): """Verify that we can match and drop a packet on destination IP.""" dst_ip = DOWNSTREAM_IP_TO_BLOCK[ip_version] if direction == "uplink->downlink" else UPSTREAM_IP_TO_BLOCK[ip_version] pkt = self.tcp_packet(setup, direction, ptfadapter, ip_version, dst_ip=dst_ip) self._verify_acl_traffic(setup, direction, ptfadapter, pkt, True, ip_version) - counters_sanity_check.append(15 if direction == "uplink->downlink" else 16) + # Because m0_l3_scenario use differnet IPs, so need to verify different acl rules. + if direction == "uplink->downlink": + if topo_scenario == "m0_l3_scenario": + if ip_version == "ipv6": + rule_id = 33 + else: + rule_id = 31 + else: + rule_id = 15 + else: + rule_id = 16 + counters_sanity_check.append(rule_id) def test_source_ip_match_dropped(self, setup, direction, ptfadapter, counters_sanity_check, ip_version): """Verify that we can match and drop a packet on source IP.""" diff --git a/tests/acl/test_acl_outer_vlan.py b/tests/acl/test_acl_outer_vlan.py index 078906d9ee5..afb1622029a 100644 --- a/tests/acl/test_acl_outer_vlan.py +++ b/tests/acl/test_acl_outer_vlan.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) pytestmark = [ - pytest.mark.topology('t0'), + pytest.mark.topology('t0', 'm0'), pytest.mark.disable_loganalyzer, # Disable automatic loganalyzer, since we use it for the test ] diff --git a/tests/acl/test_stress_acl.py b/tests/acl/test_stress_acl.py new file mode 100644 index 00000000000..a3ec26e3c27 --- /dev/null +++ b/tests/acl/test_stress_acl.py @@ -0,0 +1,166 @@ +import logging +import random +import pytest +import ptf.testutils as testutils +from ptf import mask, packet +from collections import defaultdict +from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_rand_selected_tor # noqa F401 +from tests.common.utilities import wait + +pytestmark = [ + pytest.mark.topology("t0", "t1", "m0", "mx"), + pytest.mark.device_type('vs') +] + +logger = logging.getLogger(__name__) + +LOOP_TIMES_LEVEL_MAP = { + 'debug': 10, + 'basic': 50, + 'confident': 200 +} + +# Template json file used to test scale rules +STRESS_ACL_TABLE_TEMPLATE = "acl/templates/acltb_test_stress_acl_table.j2" +STRESS_ACL_RULE_TEMPLATE = "acl/templates/acltb_test_stress_acl_rules.j2" +STRESS_ACL_READD_RULE_TEMPLATE = "acl/templates/acltb_test_stress_acl_readd_rules.j2" +DEL_STRESS_ACL_TABLE_TEMPLATE = "acl/templates/del_acltb_test_stress_acl_table.j2" +STRESS_ACL_TABLE_JSON_FILE = "/tmp/acltb_test_stress_acl_table.json" +STRESS_ACL_RULE_JSON_FILE = "/tmp/acltb_test_stress_acl_rules.json" +DEL_STRESS_ACL_TABLE_JSON_FILE = "/tmp/del_acltb_test_stress_acl_table.json" + +LOG_EXPECT_ACL_TABLE_CREATE_RE = ".*Created ACL table.*" +LOG_EXPECT_ACL_RULE_FAILED_RE = ".*Failed to create ACL rule.*" + +ACL_RULE_NUMS = 10 + + +@pytest.fixture(scope='module') +def prepare_test_file(rand_selected_dut): + # Define a custom table type CUSTOM_TYPE by loading a json configuration + rand_selected_dut.copy(src=STRESS_ACL_TABLE_TEMPLATE, dest=STRESS_ACL_TABLE_JSON_FILE, mode="0755") + rand_selected_dut.shell("sonic-cfggen -j {} -w".format(STRESS_ACL_TABLE_JSON_FILE)) + # Copy acl rules + rand_selected_dut.copy(src=STRESS_ACL_RULE_TEMPLATE, dest=STRESS_ACL_RULE_JSON_FILE, mode="0755") + + yield + + rand_selected_dut.copy(src=DEL_STRESS_ACL_TABLE_TEMPLATE, dest=DEL_STRESS_ACL_TABLE_JSON_FILE) + rand_selected_dut.shell("configlet -d -j {}".format(DEL_STRESS_ACL_TABLE_JSON_FILE)) + rand_selected_dut.shell("rm -f {}".format(DEL_STRESS_ACL_TABLE_JSON_FILE)) + + +@pytest.fixture(scope='module') +def prepare_test_port(rand_selected_dut, tbinfo): + mg_facts = rand_selected_dut.get_extended_minigraph_facts(tbinfo) + if tbinfo["topo"]["type"] == "mx": + dut_port = rand_selected_dut.acl_facts()["ansible_facts"]["ansible_acl_facts"]["DATAACL"]["ports"][0] + else: + dut_port = list(mg_facts['minigraph_portchannels'].keys())[0] + if not dut_port: + pytest.skip('No portchannels found') + if "Ethernet" in dut_port: + dut_eth_port = dut_port + elif "PortChannel" in dut_port: + dut_eth_port = mg_facts["minigraph_portchannels"][dut_port]["members"][0] + ptf_src_port = mg_facts["minigraph_ptf_indices"][dut_eth_port] + + topo = tbinfo["topo"]["type"] + # Get the list of upstream ports + upstream_ports = defaultdict(list) + upstream_port_ids = [] + for interface, neighbor in mg_facts["minigraph_neighbors"].items(): + port_id = mg_facts["minigraph_ptf_indices"][interface] + if (topo == "t1" and "T2" in neighbor["name"]) or (topo == "t0" and "T1" in neighbor["name"]) or \ + (topo == "m0" and "M1" in neighbor["name"]) or (topo == "mx" and "M0" in neighbor["name"]): + upstream_ports[neighbor['namespace']].append(interface) + upstream_port_ids.append(port_id) + + return ptf_src_port, upstream_port_ids, dut_port + + +def verify_acl_rules(rand_selected_dut, ptfadapter, ptf_src_port, + ptf_dst_ports, acl_rule_list, del_rule_id, verity_status): + + for acl_id in acl_rule_list: + ip_addr1 = acl_id % 256 + ip_addr2 = acl_id / 256 + + src_ip_addr = "20.0.{}.{}".format(ip_addr2, ip_addr1) + dst_ip_addr = "10.0.0.1" + pkt = testutils.simple_ip_packet( + eth_dst=rand_selected_dut.facts['router_mac'], + eth_src=ptfadapter.dataplane.get_mac(0, ptf_src_port), + ip_src=src_ip_addr, + ip_dst=dst_ip_addr, + ip_proto=47, + ip_tos=0x84, + ip_id=0, + ip_ihl=5, + ip_ttl=121 + ) + + pkt_copy = pkt.copy() + pkt_copy.ttl = pkt_copy.ttl - 1 + exp_pkt = mask.Mask(pkt_copy) + exp_pkt.set_do_not_care_scapy(packet.Ether, 'dst') + exp_pkt.set_do_not_care_scapy(packet.Ether, 'src') + exp_pkt.set_do_not_care_scapy(packet.IP, "chksum") + + ptfadapter.dataplane.flush() + testutils.send(test=ptfadapter, port_id=ptf_src_port, pkt=pkt) + if verity_status == "forward" or acl_id == del_rule_id: + testutils.verify_packet_any_port(test=ptfadapter, pkt=exp_pkt, ports=ptf_dst_ports) + elif verity_status == "drop" and acl_id != del_rule_id: + testutils.verify_no_packet_any(test=ptfadapter, pkt=exp_pkt, ports=ptf_dst_ports) + + +def test_acl_add_del_stress(rand_selected_dut, tbinfo, ptfadapter, prepare_test_file, + prepare_test_port, get_function_conpleteness_level, + toggle_all_simulator_ports_to_rand_selected_tor): # noqa F811 + + ptf_src_port, ptf_dst_ports, dut_port = prepare_test_port + + cmd_create_table = "config acl add table STRESS_ACL L3 -s ingress -p {}".format(dut_port) + cmd_remove_table = "config acl remove table STRESS_ACL" + cmd_add_rules = "sonic-cfggen -j {} -w".format(STRESS_ACL_RULE_JSON_FILE) + cmd_rm_all_rules = "acl-loader delete STRESS_ACL" + + normalized_level = get_function_conpleteness_level + if normalized_level is None: + normalized_level = 'basic' + loop_times = LOOP_TIMES_LEVEL_MAP[normalized_level] + wait_time = 2 + + rand_selected_dut.shell(cmd_create_table) + acl_rule_list = list(range(1, ACL_RULE_NUMS + 1)) + verify_acl_rules(rand_selected_dut, ptfadapter, ptf_src_port, ptf_dst_ports, acl_rule_list, 0, "forward") + try: + loops = 0 + while loops <= loop_times: + logger.info("loops: {}".format(loops)) + if loops == 0: + rand_selected_dut.shell(cmd_add_rules) + else: + readd_id = loops + ACL_RULE_NUMS + ip_addr1 = readd_id % 256 + ip_addr2 = readd_id / 256 + rand_selected_dut.shell('sonic-db-cli CONFIG_DB hset "ACL_RULE|STRESS_ACL| RULE_{}" \ + "SRC_IP" "20.0.{}.{}/32" "PACKET_ACTION" "DROP" "PRIORITY" "{}"' + .format(readd_id, ip_addr2, ip_addr1, readd_id)) + acl_rule_list.append(readd_id) + + wait(wait_time, "Waiting {} sec acl rules to be loaded".format(wait_time)) + verify_acl_rules(rand_selected_dut, ptfadapter, ptf_src_port, ptf_dst_ports, acl_rule_list, 0, "drop") + + del_rule_id = random.choice(acl_rule_list) + rand_selected_dut.shell('sonic-db-cli CONFIG_DB del "ACL_RULE|STRESS_ACL| RULE_{}"'.format(del_rule_id)) + wait(wait_time, "Waiting {} sec acl rules to be loaded".format(wait_time)) + verify_acl_rules(rand_selected_dut, ptfadapter, ptf_src_port, ptf_dst_ports, acl_rule_list, del_rule_id, "drop") + acl_rule_list.remove(del_rule_id) + + loops += 1 + finally: + rand_selected_dut.shell(cmd_rm_all_rules) + rand_selected_dut.shell(cmd_remove_table) + logger.info("End") diff --git a/tests/arp/conftest.py b/tests/arp/conftest.py index 60988902d24..92f67a44175 100644 --- a/tests/arp/conftest.py +++ b/tests/arp/conftest.py @@ -1,4 +1,7 @@ import logging +import ptf.testutils as testutils +import ptf.mask as mask +import ptf.packet as packet import pytest import time @@ -10,6 +13,9 @@ from tests.arp.arp_utils import increment_ipv6_addr, increment_ipv4_addr from tests.common.helpers.assertions import pytest_require as pt_require from tests.common.utilities import wait +from scapy.all import Ether, IPv6, ICMPv6ND_NS, ICMPv6ND_NA, \ + ICMPv6NDOptSrcLLAddr, in6_getnsmac, \ + in6_getnsma, inet_pton, inet_ntop, socket CRM_POLLING_INTERVAL = 1 @@ -139,7 +145,7 @@ def intfs_for_test(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_fro @pytest.fixture(scope="module") -def common_setup_teardown(duthosts, ptfhost, enum_rand_one_per_hwsku_frontend_hostname, enum_frontend_asic_index, tbinfo): +def common_setup_teardown(duthosts, ptfhost, enum_rand_one_per_hwsku_frontend_hostname, enum_frontend_asic_index): try: duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] config_facts = duthost.asic_instance(enum_frontend_asic_index).config_facts(host=duthost.hostname, source="running")['ansible_facts'] @@ -230,17 +236,16 @@ def ip_and_intf_info(config_facts, intfs_for_test, ptfhost, ptfadapter): except ValueError: continue - # The VLAN interface on the DUT has an x.x.x.1 address assigned (or x::1 in the case of IPv6) - # But the network_address property returns an x.x.x.0 address (or x::0 for IPv6) so we increment by two to avoid conflict + # Increment address by 3 to offset it from the intf on which the address may be learned if intf_ipv4_addr is not None: - ptf_intf_ipv4_addr = increment_ipv4_addr(intf_ipv4_addr.network_address, incr=2) + ptf_intf_ipv4_addr = increment_ipv4_addr(intf_ipv4_addr.network_address, incr=3) ptf_intf_ipv4_hosts = intf_ipv4_addr.hosts() else: ptf_intf_ipv4_addr = None ptf_intf_ipv4_hosts = None if intf_ipv6_addr is not None: - ptf_intf_ipv6_addr = increment_ipv6_addr(intf_ipv6_addr.network_address, incr=2) + ptf_intf_ipv6_addr = increment_ipv6_addr(intf_ipv6_addr.network_address, incr=3) else: ptf_intf_ipv6_addr = None @@ -284,6 +289,72 @@ def proxy_arp_enabled(rand_selected_dut, config_facts): yield all('enabled' in val for val in new_proxy_arp_vals) + proxy_arp_del_cmd = 'sonic-db-cli CONFIG_DB HDEL "VLAN_INTERFACE|Vlan{}" proxy_arp' for vid, proxy_arp_val in old_proxy_arp_vals.items(): if 'enabled' not in proxy_arp_val: - duthost.shell(proxy_arp_config_cmd.format(vid, 'disabled')) + # Delete the DB entry instead of using the config command to satisfy check_dut_health_status + duthost.shell(proxy_arp_del_cmd.format(vid)) + +def generate_link_local_addr(mac): + parts = mac.split(":") + parts.insert(3, "ff") + parts.insert(4, "fe") + parts[0] = "{:x}".format(int(parts[0], 16) ^ 2) + + ipv6Parts = [] + for i in range(0, len(parts), 2): + ipv6Parts.append("".join(parts[i:i+2])) + ipv6 = "fe80::{}".format(":".join(ipv6Parts)) + return ipv6 + +@pytest.fixture(params=['v4', 'v6']) +def packets_for_test(request, ptfadapter, duthost, config_facts, tbinfo, ip_and_intf_info): + ip_version = request.param + src_addr_v4, _, src_addr_v6, _, ptf_intf_index = ip_and_intf_info + ptf_intf_mac = ptfadapter.dataplane.get_mac(0, ptf_intf_index) + vlans = config_facts['VLAN'] + topology = tbinfo['topo']['name'] + dut_mac = '' + for vlan_details in vlans.values(): + if 'dualtor' in topology: + dut_mac = vlan_details['mac'].lower() + else: + dut_mac = duthost.shell('sonic-cfggen -d -v \'DEVICE_METADATA.localhost.mac\'')["stdout_lines"][0].decode("utf-8") + break + + if ip_version == 'v4': + tgt_addr = increment_ipv4_addr(src_addr_v4) + out_pkt = testutils.simple_arp_packet( + eth_dst='ff:ff:ff:ff:ff:ff', + eth_src=ptf_intf_mac, + ip_snd=src_addr_v4, + ip_tgt=tgt_addr, + arp_op=1, + hw_snd=ptf_intf_mac + ) + exp_pkt = testutils.simple_arp_packet( + eth_dst=ptf_intf_mac, + eth_src=dut_mac, + ip_snd=tgt_addr, + ip_tgt=src_addr_v4, + arp_op=2, + hw_snd=dut_mac, + hw_tgt=ptf_intf_mac + ) + elif ip_version == 'v6': + tgt_addr = increment_ipv6_addr(src_addr_v6) + ll_src_addr = generate_link_local_addr(ptf_intf_mac) + multicast_tgt_addr = in6_getnsma(inet_pton(socket.AF_INET6, tgt_addr)) + multicast_tgt_mac = in6_getnsmac(multicast_tgt_addr) + out_pkt = Ether(src=ptf_intf_mac, dst=multicast_tgt_mac) + out_pkt /= IPv6(dst=inet_ntop(socket.AF_INET6, multicast_tgt_addr), src=ll_src_addr) + out_pkt /= ICMPv6ND_NS(tgt=tgt_addr) + out_pkt /= ICMPv6NDOptSrcLLAddr(lladdr=ptf_intf_mac) + + exp_pkt = Ether(src=dut_mac, dst=ptf_intf_mac) + exp_pkt /= IPv6(dst=ll_src_addr, src=generate_link_local_addr(dut_mac)) + exp_pkt /= ICMPv6ND_NA(tgt=tgt_addr, S=1, R=1, O=0) + exp_pkt /= ICMPv6NDOptSrcLLAddr(type=2, lladdr=dut_mac) + exp_pkt = mask.Mask(exp_pkt) + exp_pkt.set_do_not_care_scapy(packet.IPv6, 'fl') + return ip_version, out_pkt, exp_pkt diff --git a/tests/arp/files/ferret.py b/tests/arp/files/ferret.py index 4f37fa5f1fd..323df83a2c2 100644 --- a/tests/arp/files/ferret.py +++ b/tests/arp/files/ferret.py @@ -96,6 +96,7 @@ class RestAPI(object): PORT = 448 def __init__(self, obj, db, src_ip): + SocketServer.TCPServer.allow_reuse_address = True self.httpd = SocketServer.TCPServer(("", self.PORT), obj) self.context = ssl.SSLContext(ssl.PROTOCOL_TLS) self.context.verify_mode = ssl.CERT_NONE @@ -199,6 +200,9 @@ def action(self, interface): # References: https://tools.ietf.org/html/rfc1701 # https://tools.ietf.org/html/draft-foschiano-erspan-00 arp_request = data[0x2E:] + elif ASIC_TYPE == "cisco-8000": + # Ethernet(14) + IP(20) + GRE(8) + ERSPAN(8) = 50 = 0x32 + arp_request = data[0x32:] elif gre_type_r == 0x8949: # Mellanox arp_request = data[0x3c:] @@ -214,7 +218,7 @@ def action(self, interface): print return - remote_mac, remote_ip, request_ip, op_type = self.extract_arp_info(arp_request) + vlan_id, remote_mac, remote_ip, request_ip, op_type = self.extract_arp_info(arp_request) # Don't send ARP response if the ARP op code is not request if op_type != self.ARP_OP_REQUEST: return @@ -243,7 +247,7 @@ def action(self, interface): new_pkt += binascii.unhexlify('c00012b5004c1280') # udp new_pkt += binascii.unhexlify('08000000%06x00' % r.vxlan_id) # vxlan - arp_reply = self.generate_arp_reply(binascii.unhexlify(r.mac), remote_mac, request_ip, remote_ip) + arp_reply = self.generate_arp_reply(binascii.unhexlify(r.mac), remote_mac, request_ip, remote_ip, vlan_id) new_pkt += arp_reply else: print 'Support of family %s is not implemented' % r.family @@ -273,11 +277,18 @@ def calculate_header_crc(self, ipv4): return binascii.unhexlify("%x" % s) def extract_arp_info(self, data): - # remote_mac, remote_ip, request_ip, op_type - return data[6:12], data[28:32], data[38:42], (ord(data[20]) * 256 + ord(data[21])) + vlan_id = ord(data[14]) * 256 + ord(data[15]) + if vlan_id == 1: + offset = 0 + else: + offset = 4 + # vlan_id, remote_mac, remote_ip, request_ip, op_type + return vlan_id, data[6:12], data[offset+28:offset+32], data[offset+38:offset+42], (ord(data[offset+20]) * 256 + ord(data[offset+21])) - def generate_arp_reply(self, local_mac, remote_mac, local_ip, remote_ip): + def generate_arp_reply(self, local_mac, remote_mac, local_ip, remote_ip, vlan_id): eth_hdr = remote_mac + local_mac + #if vlan_id != 1: + # eth_hdr = eth_hdr + binascii.unhexlify("8100%04x" % vlan_id) return eth_hdr + self.arp_chunk + local_mac + local_ip + remote_mac + remote_ip + self.arp_pad def get_bpf_for_bgp(): diff --git a/tests/arp/test_arp_dualtor.py b/tests/arp/test_arp_dualtor.py index 737b13f6658..c46efeaa93f 100644 --- a/tests/arp/test_arp_dualtor.py +++ b/tests/arp/test_arp_dualtor.py @@ -1,140 +1,194 @@ +""" +This module tests ARP scenarios specific to dual ToR testbeds +""" +from ipaddress import ip_address, ip_interface import logging -import ptf.testutils as testutils +import random +import time import pytest -import ptf.mask as mask -import ptf.packet as packet -from scapy.all import Ether, IPv6, ICMPv6ND_NS, ICMPv6ND_NA, \ - ICMPv6NDOptSrcLLAddr, in6_getnsmac, \ - in6_getnsma, inet_pton, inet_ntop, socket -from tests.arp.arp_utils import clear_dut_arp_cache, increment_ipv6_addr, increment_ipv4_addr +import ptf.testutils as testutils from tests.common.helpers.assertions import pytest_assert, pytest_require -from tests.common.fixtures.ptfhost_utils import change_mac_addresses +from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_upper_tor +from tests.common.dualtor.dual_tor_utils import upper_tor_host, lower_tor_host, show_muxcable_status, mux_config, config_dualtor_arp_responder # lgtm[py/unused-import] +from tests.common.fixtures.ptfhost_utils import run_garp_service, change_mac_addresses, run_icmp_responder, pause_garp_service + +from tests.common.utilities import wait_until -pytestmark = [ - pytest.mark.topology('t0', 'dualtor') +pytestmark= [ + pytest.mark.topology('dualtor') ] logger = logging.getLogger(__name__) -def test_arp_garp_enabled(rand_selected_dut, garp_enabled, ip_and_intf_info, intfs_for_test, config_facts, ptfadapter): +FAILED = "FAILED" +INCOMPLETE = "INCOMPLETE" +STALE = "STALE" +REACHABLE = "REACHABLE" + + +@pytest.fixture +def restore_mux_auto_config(duthosts): + """ + Fixture to ensure ToRs have all mux interfaces set to auto after testing + """ + + yield + + for duthost in duthosts: + duthost.shell("sudo config mux mode auto all") + + +@pytest.fixture +def pause_arp_update(duthosts): + """ + Temporarily stop arp_update process during test cases + + Some test cases manually call arp_update so we use this fixture to pause it on + the testbed to prevent interference with the test case + """ + arp_update_stop_cmd = "docker exec -it swss supervisorctl stop arp_update" + for duthost in duthosts: + duthost.shell(arp_update_stop_cmd) + + yield + + arp_update_start_cmd = "docker exec -it swss supervisorctl start arp_update" + for duthost in duthosts: + duthost.shell(arp_update_start_cmd) + + +@pytest.fixture(params=['IPv4', 'IPv6']) +def neighbor_ip(request, mux_config): """ - Send a gratuitous ARP (GARP) packet from the PTF to the DUT + Provide the neighbor IP used for testing - The DUT should learn the (previously unseen) ARP info from the packet + Randomly select an IP from the server IPs configured in the config DB MUX_CABLE table """ - pytest_require(garp_enabled, 'Gratuitous ARP not enabled for this device') - duthost = rand_selected_dut - ptf_intf_ipv4_addr = ip_and_intf_info[0] - - arp_request_ip = increment_ipv4_addr(ptf_intf_ipv4_addr) - arp_src_mac = '00:00:07:08:09:0a' - _, _, intf1_index, _, = intfs_for_test - - pkt = testutils.simple_arp_packet(pktlen=60, - eth_dst='ff:ff:ff:ff:ff:ff', - eth_src=arp_src_mac, - vlan_pcp=0, - arp_op=2, - ip_snd=arp_request_ip, - ip_tgt=arp_request_ip, - hw_snd=arp_src_mac, - hw_tgt='ff:ff:ff:ff:ff:ff' - ) - - clear_dut_arp_cache(duthost) - - logger.info("Sending GARP for target {} from PTF interface {}".format(arp_request_ip, intf1_index)) - testutils.send_packet(ptfadapter, intf1_index, pkt) - - vlan_intfs = config_facts['VLAN_INTERFACE'].keys() - - switch_arptable = duthost.switch_arptable()['ansible_facts'] - pytest_assert(switch_arptable['arptable']['v4'][arp_request_ip]['macaddress'].lower() == arp_src_mac.lower()) - pytest_assert(switch_arptable['arptable']['v4'][arp_request_ip]['interface'] in vlan_intfs) - -def generate_link_local_addr(mac): - parts = mac.split(":") - parts.insert(3, "ff") - parts.insert(4, "fe") - parts[0] = "{:x}".format(int(parts[0], 16) ^ 2) - - ipv6Parts = [] - for i in range(0, len(parts), 2): - ipv6Parts.append("".join(parts[i:i+2])) - ipv6 = "fe80::{}".format(":".join(ipv6Parts)) - return ipv6 - -@pytest.fixture(params=['v4', 'v6']) -def packets_for_test(request, ptfadapter, duthost, config_facts, tbinfo, ip_and_intf_info): ip_version = request.param - src_addr_v4, _, src_addr_v6, _, ptf_intf_index = ip_and_intf_info - ptf_intf_mac = ptfadapter.dataplane.get_mac(0, ptf_intf_index) - vlans = config_facts['VLAN'] - topology = tbinfo['topo']['name'] - dut_mac = '' - for vlan_details in vlans.values(): - if 'dualtor' in topology: - dut_mac = vlan_details['mac'].lower() - else: - dut_mac = duthost.shell('sonic-cfggen -d -v \'DEVICE_METADATA.localhost.mac\'')["stdout_lines"][0].decode("utf-8") - break + selected_intf = random.choice(mux_config.values()) + neigh_ip = ip_interface(selected_intf["SERVER"][ip_version]).ip + logger.info("Using {} as neighbor IP".format(neigh_ip)) + return neigh_ip - if ip_version == 'v4': - tgt_addr = increment_ipv4_addr(src_addr_v4) - out_pkt = testutils.simple_arp_packet( - eth_dst='ff:ff:ff:ff:ff:ff', - eth_src=ptf_intf_mac, - ip_snd=src_addr_v4, - ip_tgt=tgt_addr, - arp_op=1, - hw_snd=ptf_intf_mac - ) - exp_pkt = testutils.simple_arp_packet( - eth_dst=ptf_intf_mac, - eth_src=dut_mac, - ip_snd=tgt_addr, - ip_tgt=src_addr_v4, - arp_op=2, - hw_snd=dut_mac, - hw_tgt=ptf_intf_mac - ) - elif ip_version == 'v6': - tgt_addr = increment_ipv6_addr(src_addr_v6) - ll_src_addr = generate_link_local_addr(ptf_intf_mac) - multicast_tgt_addr = in6_getnsma(inet_pton(socket.AF_INET6, tgt_addr)) - multicast_tgt_mac = in6_getnsmac(multicast_tgt_addr) - out_pkt = Ether(src=ptf_intf_mac, dst=multicast_tgt_mac) - out_pkt /= IPv6(dst=inet_ntop(socket.AF_INET6, multicast_tgt_addr), src=ll_src_addr) - out_pkt /= ICMPv6ND_NS(tgt=tgt_addr) - out_pkt /= ICMPv6NDOptSrcLLAddr(lladdr=ptf_intf_mac) - - exp_pkt = Ether(src=dut_mac, dst=ptf_intf_mac) - exp_pkt /= IPv6(dst=ll_src_addr, src=generate_link_local_addr(dut_mac)) - exp_pkt /= ICMPv6ND_NA(tgt=tgt_addr, S=1, R=1, O=0) - exp_pkt /= ICMPv6NDOptSrcLLAddr(type=2, lladdr=dut_mac) - exp_pkt = mask.Mask(exp_pkt) - exp_pkt.set_do_not_care_scapy(packet.IPv6, 'fl') - - return ip_version, out_pkt, exp_pkt - -def test_proxy_arp(proxy_arp_enabled, ip_and_intf_info, ptfadapter, packets_for_test): + +@pytest.fixture +def clear_neighbor_table(duthosts, pause_arp_update, pause_garp_service): + logger.info("Clearing neighbor table on {}".format(duthosts)) + for duthost in duthosts: + duthost.shell("sudo ip neigh flush all") + + return + + +def verify_neighbor_status(duthost, neigh_ip, expected_status): + ip_version = 'v4' if ip_address(neigh_ip).version == 4 else 'v6' + neighbor_table = duthost.switch_arptable()['ansible_facts']['arptable'] + return expected_status.lower() in neighbor_table[ip_version][str(neigh_ip)]['state'].lower() + + +def test_proxy_arp_for_standby_neighbor(proxy_arp_enabled, ip_and_intf_info, restore_mux_auto_config, + ptfadapter, packets_for_test, upper_tor_host, toggle_all_simulator_ports_to_upper_tor): """ - Send an ARP request or neighbor solicitation (NS) to the DUT for an IP address within the subnet of the DUT's VLAN. + Send an ARP request or neighbor solicitation (NS) to the DUT for an IP address within the subnet of the DUT's VLAN that is + routed via the IPinIP tunnel (i.e. that IP points to a standby neighbor) DUT should reply with an ARP reply or neighbor advertisement (NA) containing the DUT's own MAC + + Test steps: + 1. During setup, learn neighbor IPs on ToR interfaces using `run_garp_service` fixture + 2. Pick a learned IP address as the target IP and generate an ARP request/neighbor solicitation for it + 3. Set the interface this IP is learned on to standby. This will ensure the route for the IP points to the + IPinIP tunnel + 4. Send the ARP request/NS packet to the ToR on some other active interface + 5. Expect the ToR to still proxy ARP for the IP and send an ARP reply/neighbor advertisement back, even though + the route for the requested IP is pointing to the tunnel """ - pytest_require(proxy_arp_enabled, 'Proxy ARP not enabled for all VLANs') + # This should never fail since we are only running on dual ToR platforms + pytest_require(proxy_arp_enabled, 'Proxy ARP not enabled for all VLANs, check dual ToR configuration') ptf_intf_ipv4_addr, _, ptf_intf_ipv6_addr, _, ptf_intf_index = ip_and_intf_info - ip_version, outgoing_packet, expected_packet = packets_for_test if ip_version == 'v4': pytest_require(ptf_intf_ipv4_addr is not None, 'No IPv4 VLAN address configured on device') + intf_name_cmd = "show arp | grep -m 1 '{}' | awk '{{ print $3 }}'".format(ptf_intf_ipv4_addr) elif ip_version == 'v6': pytest_require(ptf_intf_ipv6_addr is not None, 'No IPv6 VLAN address configured on device') - + intf_name_cmd = "show ndp | grep -m 1 '{}' | awk '{{ print $3 }}'".format(ptf_intf_ipv6_addr) + + # Find the interface on which the target IP is learned and set it to standby to force it to point to a tunnel route + intf_name = upper_tor_host.shell(intf_name_cmd)['stdout'] + mux_mode_cmd = "sudo config mux mode standby {}".format(intf_name) + upper_tor_host.shell(mux_mode_cmd) + pytest_assert(wait_until(5, 1, 0, lambda: show_muxcable_status(upper_tor_host)[intf_name]['status'] == "standby"), + "Interface {} not standby on {}".format(intf_name, upper_tor_host)) ptfadapter.dataplane.flush() testutils.send_packet(ptfadapter, ptf_intf_index, outgoing_packet) testutils.verify_packet(ptfadapter, expected_packet, ptf_intf_index, timeout=10) + + +def test_arp_update_for_failed_standby_neighbor( + config_dualtor_arp_responder, neighbor_ip, clear_neighbor_table, + toggle_all_simulator_ports_to_upper_tor, upper_tor_host, lower_tor_host +): + """ + Test the standby ToR's ability to recover from having a failed neighbor entry + + Test steps: + 1. For the same neighbor IP, create a failed neighbor entry on the standby ToR and a reachable entry on the active ToR + 2. Run `arp_update` on the standby ToR + 3. Verify the failed entry is now incomplete and stays incomplete for 10 seconds + 4. Run `arp_update` on the active ToR + 5. Verify the incomplete entry is now reachable + """ + # We only use ping to trigger an ARP request from the kernel, so exit early to save time + ping_cmd = "timeout 0.2 ping -c1 -W1 -i0.2 -n -q {}".format(neighbor_ip) + + # Important to run on lower (standby) ToR first so that the lower ToR neighbor entry will be failed + # Otherwise, the ARP reply/NA message generated by the active ToR will create a REACHABLE entry on the lowwer ToR + lower_tor_host.shell(ping_cmd, module_ignore_errors=True) + pytest_assert(wait_until(5, 1, 0, lambda: verify_neighbor_status(lower_tor_host, neighbor_ip, FAILED))) + upper_tor_host.shell(ping_cmd, module_ignore_errors=True) + pytest_assert(wait_until(5, 1, 0, lambda: verify_neighbor_status(upper_tor_host, neighbor_ip, REACHABLE))) + + # For IPv4 neighbors, the ARP reply generated when the upper/active ToR sends an ARP request will also + # be learned by the lower/standby ToR, so we expect it already be reachable at this stage. + # However, IPv6 neighbors are not learned by the kernel the same way, so it we expect it the standby ToR + # neighbor entry to be INCOMPLETE as a result of the arp_update script + expected_midpoint_state = REACHABLE if ip_address(neighbor_ip).version == 4 else INCOMPLETE + + arp_update_cmd = "docker exec -it swss supervisorctl start arp_update" + lower_tor_host.shell(arp_update_cmd) + pytest_assert(wait_until(5, 1, 0, lambda: verify_neighbor_status(lower_tor_host, neighbor_ip, expected_midpoint_state))) + + # Need to make sure the entry does not auto-transition to FAILED + time.sleep(10) + pytest_assert(verify_neighbor_status(lower_tor_host, neighbor_ip, expected_midpoint_state)) + + upper_tor_host.shell(arp_update_cmd) + pytest_assert(wait_until(5, 1, 0, lambda: verify_neighbor_status(lower_tor_host, neighbor_ip, REACHABLE))) + + +def test_standby_unsolicited_neigh_learning( + config_dualtor_arp_responder, neighbor_ip, clear_neighbor_table, + toggle_all_simulator_ports_to_upper_tor, upper_tor_host, lower_tor_host +): + """ + Test the standby ToR's ability to perform unsolicited neighbor learning (GARP and unsolicited NA) + + Test steps: + 1. Create a reachable neighbor entry on the active ToR only + 2. Run arp_update on the active ToR + 3. Confirm that the standby ToR learned the entry and it is REACHABLE + """ + ping_cmd = "timeout 0.2 ping -c1 -W1 -i0.2 -n -q {}".format(neighbor_ip) + + upper_tor_host.shell(ping_cmd, module_ignore_errors=True) + pytest_assert(wait_until(5, 1, 0, lambda: verify_neighbor_status(upper_tor_host, neighbor_ip, REACHABLE))) + lower_tor_host.shell("sudo ip neigh flush all") + + arp_update_cmd = "docker exec -it swss supervisorctl start arp_update" + upper_tor_host.shell(arp_update_cmd) + + pytest_assert(wait_until(5, 1, 0, lambda: verify_neighbor_status(lower_tor_host, neighbor_ip, REACHABLE))) diff --git a/tests/arp/test_arp_extended.py b/tests/arp/test_arp_extended.py new file mode 100644 index 00000000000..2b777087bc5 --- /dev/null +++ b/tests/arp/test_arp_extended.py @@ -0,0 +1,78 @@ +""" +This module tests extended ARP features including gratuitous ARP and proxy ARP +""" +import logging +import ptf.testutils as testutils +import pytest +import ptf.mask as mask +import ptf.packet as packet + +from scapy.all import Ether, IPv6, ICMPv6ND_NS, ICMPv6ND_NA, \ + ICMPv6NDOptSrcLLAddr, in6_getnsmac, \ + in6_getnsma, inet_pton, inet_ntop, socket +from tests.arp.arp_utils import clear_dut_arp_cache, increment_ipv6_addr, increment_ipv4_addr +from tests.common.helpers.assertions import pytest_assert, pytest_require +from tests.common.fixtures.ptfhost_utils import change_mac_addresses + +pytestmark = [ + pytest.mark.topology('t0', 'dualtor') +] + +logger = logging.getLogger(__name__) + +def test_arp_garp_enabled(rand_selected_dut, garp_enabled, ip_and_intf_info, intfs_for_test, config_facts, ptfadapter): + """ + Send a gratuitous ARP (GARP) packet from the PTF to the DUT + + The DUT should learn the (previously unseen) ARP info from the packet + """ + pytest_require(garp_enabled, 'Gratuitous ARP not enabled for this device') + duthost = rand_selected_dut + ptf_intf_ipv4_addr = ip_and_intf_info[0] + + arp_request_ip = increment_ipv4_addr(ptf_intf_ipv4_addr) + arp_src_mac = '00:00:07:08:09:0a' + _, _, intf1_index, _, = intfs_for_test + + pkt = testutils.simple_arp_packet(pktlen=60, + eth_dst='ff:ff:ff:ff:ff:ff', + eth_src=arp_src_mac, + vlan_pcp=0, + arp_op=2, + ip_snd=arp_request_ip, + ip_tgt=arp_request_ip, + hw_snd=arp_src_mac, + hw_tgt='ff:ff:ff:ff:ff:ff' + ) + + clear_dut_arp_cache(duthost) + + logger.info("Sending GARP for target {} from PTF interface {}".format(arp_request_ip, intf1_index)) + testutils.send_packet(ptfadapter, intf1_index, pkt) + + vlan_intfs = config_facts['VLAN_INTERFACE'].keys() + + switch_arptable = duthost.switch_arptable()['ansible_facts'] + pytest_assert(switch_arptable['arptable']['v4'][arp_request_ip]['macaddress'].lower() == arp_src_mac.lower()) + pytest_assert(switch_arptable['arptable']['v4'][arp_request_ip]['interface'] in vlan_intfs) + +def test_proxy_arp(proxy_arp_enabled, ip_and_intf_info, ptfadapter, packets_for_test): + """ + Send an ARP request or neighbor solicitation (NS) to the DUT for an IP address within the subnet of the DUT's VLAN. + + DUT should reply with an ARP reply or neighbor advertisement (NA) containing the DUT's own MAC + """ + pytest_require(proxy_arp_enabled, 'Proxy ARP not enabled for all VLANs') + + ptf_intf_ipv4_addr, _, ptf_intf_ipv6_addr, _, ptf_intf_index = ip_and_intf_info + + ip_version, outgoing_packet, expected_packet = packets_for_test + + if ip_version == 'v4': + pytest_require(ptf_intf_ipv4_addr is not None, 'No IPv4 VLAN address configured on device') + elif ip_version == 'v6': + pytest_require(ptf_intf_ipv6_addr is not None, 'No IPv6 VLAN address configured on device') + + ptfadapter.dataplane.flush() + testutils.send_packet(ptfadapter, ptf_intf_index, outgoing_packet) + testutils.verify_packet(ptfadapter, expected_packet, ptf_intf_index, timeout=10) diff --git a/tests/arp/test_neighbor_mac.py b/tests/arp/test_neighbor_mac.py index f5f8799b80b..b16c26669c3 100644 --- a/tests/arp/test_neighbor_mac.py +++ b/tests/arp/test_neighbor_mac.py @@ -3,7 +3,6 @@ import time from tests.common.helpers.assertions import pytest_assert -from tests.common.config_reload import config_reload logger = logging.getLogger(__name__) @@ -42,6 +41,7 @@ def interfaceConfig(self, duthosts, rand_one_dut_hostname): logger.info("Restore the DUT interface config, remove IP address") self.__configureInterfaceIp(duthost, action="remove") + self.__shutdownInterface(duthost) @pytest.fixture(params=[0, 1]) def macIndex(self, request): @@ -92,6 +92,24 @@ def __startInterface(self, duthost): self.DUT_ETH_IF ]) + def __shutdownInterface(self, duthost): + """ + Shutdown the interface on the DUT + + Args: + duthost (AnsibleHost): Device Under Test (DUT) + + Returns: + None + """ + logger.info("Configure the interface '{0}' as DOWN".format(self.DUT_ETH_IF)) + duthost.shell(argv=[ + "config", + "interface", + "shutdown", + self.DUT_ETH_IF + ]) + def __configureInterfaceIp(self, duthost, action=None): """ Configure interface IP address on the DUT diff --git a/tests/arp/test_neighbor_mac_noptf.py b/tests/arp/test_neighbor_mac_noptf.py index 1260d735e7f..a326267f793 100644 --- a/tests/arp/test_neighbor_mac_noptf.py +++ b/tests/arp/test_neighbor_mac_noptf.py @@ -77,7 +77,7 @@ def setupDutConfig(self, duthosts, enum_rand_one_per_hwsku_frontend_hostname): yield logger.info("Reload Config DB") - config_reload(duthost, config_source='config_db', wait=120) + config_reload(duthost, config_source='config_db', safe_reload=True) @pytest.fixture(params=[4, 6]) def ipVersion(self, request): diff --git a/tests/arp/test_tagged_arp.py b/tests/arp/test_tagged_arp.py index 6465eb66465..ea96ec1075e 100644 --- a/tests/arp/test_tagged_arp.py +++ b/tests/arp/test_tagged_arp.py @@ -15,7 +15,7 @@ logger = logging.getLogger(__name__) pytestmark = [ - pytest.mark.topology('t0') + pytest.mark.topology('t0', 'm0') ] PTF_PORT_MAPPING_MODE = "use_orig_interface" diff --git a/tests/arp/test_wr_arp.py b/tests/arp/test_wr_arp.py index d7a72f27ff6..e798fd32575 100644 --- a/tests/arp/test_wr_arp.py +++ b/tests/arp/test_wr_arp.py @@ -24,7 +24,7 @@ class TestWrArp: ''' - TestWrArp Performs control plane assisted warm-reboo + TestWrArp Performs control plane assisted warm-reboot ''' def __prepareVxlanConfigData(self, duthost, ptfhost, tbinfo): ''' @@ -38,13 +38,13 @@ def __prepareVxlanConfigData(self, duthost, ptfhost, tbinfo): None ''' mgFacts = duthost.get_extended_minigraph_facts(tbinfo) + vlan_facts = duthost.vlan_facts()['ansible_facts']['ansible_vlan_facts'] vxlanConfigData = { 'minigraph_port_indices': mgFacts['minigraph_ptf_indices'], 'minigraph_portchannel_interfaces': mgFacts['minigraph_portchannel_interfaces'], 'minigraph_portchannels': mgFacts['minigraph_portchannels'], 'minigraph_lo_interfaces': mgFacts['minigraph_lo_interfaces'], - 'minigraph_vlans': mgFacts['minigraph_vlans'], - 'minigraph_vlan_interfaces': mgFacts['minigraph_vlan_interfaces'], + 'vlan_facts': vlan_facts, 'dut_mac': duthost.facts['router_mac'] } with open(VXLAN_CONFIG_FILE, 'w') as file: @@ -53,8 +53,7 @@ def __prepareVxlanConfigData(self, duthost, ptfhost, tbinfo): logger.info('Copying ferret config file to {0}'.format(ptfhost.hostname)) ptfhost.copy(src=VXLAN_CONFIG_FILE, dest='/tmp/') - @pytest.fixture(scope='class', autouse=True) - def setupFerret(self, duthosts, rand_one_dut_hostname, ptfhost, tbinfo): + def setupFerret(self, duthost, ptfhost, tbinfo): ''' Sets Ferret service on PTF host. This class-scope fixture runs once before test start @@ -65,7 +64,6 @@ def setupFerret(self, duthosts, rand_one_dut_hostname, ptfhost, tbinfo): Returns: None ''' - duthost = duthosts[rand_one_dut_hostname] ptfhost.copy(src="arp/files/ferret.py", dest="/opt") ''' @@ -135,15 +133,19 @@ def setupFerret(self, duthosts, rand_one_dut_hostname, ptfhost, tbinfo): logger.info('Refreshing supervisor control with ferret configuration') ptfhost.shell('supervisorctl reread && supervisorctl update') + @pytest.fixture(scope='class', autouse=True) + def setupFerretFixture(self, duthosts, rand_one_dut_hostname, ptfhost, tbinfo): + duthost = duthosts[rand_one_dut_hostname] + self.setupFerret(duthost, ptfhost, tbinfo) + @pytest.fixture(scope='class', autouse=True) def clean_dut(self, duthosts, rand_one_dut_hostname): duthost = duthosts[rand_one_dut_hostname] yield logger.info("Clear ARP cache on DUT") duthost.command('sonic-clear arp') - - @pytest.fixture(scope='class', autouse=True) - def setupRouteToPtfhost(self, duthosts, rand_one_dut_hostname, ptfhost): + + def setupRouteToPtfhost(self, duthost, ptfhost): ''' Sets routes up on DUT to PTF host. This class-scope fixture runs once before test start @@ -154,7 +156,6 @@ def setupRouteToPtfhost(self, duthosts, rand_one_dut_hostname, ptfhost): Returns: None ''' - duthost = duthosts[rand_one_dut_hostname] result = duthost.shell(cmd="ip route show table default | sed -n 's/default //p'") assert len(result['stderr_lines']) == 0, 'Could not find the gateway for management port' @@ -167,9 +168,13 @@ def setupRouteToPtfhost(self, duthosts, rand_one_dut_hostname, ptfhost): "Add explicit route for PTF host ({0}) through eth0 (mgmt) interface ({1})".format(ptfIp, gwIp) ) duthost.shell(cmd='ip route add {0}/32 {1}'.format(ptfIp, gwIp)) - - yield - + + return route, ptfIp, gwIp + + def teardownRouteToPtfhost(self, duthost, route, ptfIp, gwIp): + """ + Teardown the routes added by setupRouteToPtfhost + """ if 'PortChannel' in route: logger.info( "Delete explicit route for PTF host ({0}) through eth0 (mgmt) interface ({1})".format(ptfIp, gwIp) @@ -178,6 +183,30 @@ def setupRouteToPtfhost(self, duthosts, rand_one_dut_hostname, ptfhost): assert result["rc"] == 0 or "No such process" in result["stderr"], \ "Failed to delete route with error '{0}'".format(result["stderr"]) + @pytest.fixture(scope='class', autouse=True) + def setupRouteToPtfhostFixture(self, duthosts, rand_one_dut_hostname, ptfhost): + duthost = duthosts[rand_one_dut_hostname] + route, ptfIp, gwIp = self.setupRouteToPtfhost(duthost, ptfhost) + yield + self.teardownRouteToPtfhost(duthost, route, ptfIp, gwIp) + + def Setup(self, duthost, ptfhost, tbinfo): + """ + A setup function that do the exactly same thing as the autoused fixtures do + Will be called in vnet_vxlan test + """ + self.setupFerret(duthost, ptfhost, tbinfo) + self.route, self.ptfIp, self.gwIp = self.setupRouteToPtfhost(duthost, ptfhost) + + def Teardown(self, duthost): + """ + A teardown function that do some cleanup after test + Will be called in vnet_vxlan test + """ + logger.info("Clear ARP cache on DUT") + duthost.command('sonic-clear arp') + self.teardownRouteToPtfhost(duthost, self.route, self.ptfIp, self.gwIp) + def testWrArp(self, request, duthost, ptfhost, creds): ''' Control Plane Assistant test for Warm-Reboot. diff --git a/tests/autorestart/test_container_autorestart.py b/tests/autorestart/test_container_autorestart.py old mode 100755 new mode 100644 index 79353ee2a92..28fbf036a52 --- a/tests/autorestart/test_container_autorestart.py +++ b/tests/autorestart/test_container_autorestart.py @@ -10,7 +10,6 @@ from tests.common.utilities import wait_until from tests.common.helpers.assertions import pytest_assert from tests.common.helpers.assertions import pytest_require -from tests.common.helpers.dut_ports import decode_dut_port_name from tests.common import config_reload from tests.common.helpers.dut_utils import get_disabled_container_list @@ -28,13 +27,24 @@ POST_CHECK_THRESHOLD_SECS = 360 @pytest.fixture(autouse=True, scope='module') -def config_reload_after_tests(duthost): +def config_reload_after_tests(duthosts, selected_rand_one_per_hwsku_hostname): yield - config_reload(duthost) + for hostname in selected_rand_one_per_hwsku_hostname: + duthost = duthosts[hostname] + config_reload(duthost, config_source='config_db', safe_reload=True) + + +def enable_autorestart(duthost): + # Enable autorestart for all features + feature_list, _ = duthost.get_feature_status() + for feature, status in list(feature_list.items()): + if status == 'enabled': + duthost.shell("sudo config feature autorestart {} enabled".format(feature)) + @pytest.fixture(autouse=True) -def ignore_expected_loganalyzer_exception(duthosts, enum_dut_feature_container, - enum_rand_one_per_hwsku_frontend_hostname, loganalyzer): +def ignore_expected_loganalyzer_exception(duthosts, enum_rand_one_per_hwsku_hostname, enum_rand_one_asic_index, + enum_dut_feature, loganalyzer): """ Ignore expected failure/error messages during testing the autorestart feature. @@ -64,6 +74,14 @@ def ignore_expected_loganalyzer_exception(duthosts, enum_dut_feature_container, Fifth, systemd would fire an error message:"ERR systemd[1]: Failed to start SNMP/TEAMD container." since SNMP/TEAMD container hits the limitation of restart. route_check.py also wrote an error message into syslog. + Sixth, after a process is killed, its network resources are not immediately released. So it might take some time + for the ports to be available again. The problem might be more pronounced with weak devices. So we expect some + failures with listening or binding to a socket. When encountering this problem, the process will be repeated + and it typically resolves by itself. So we skip "Unable to initialize team socket" in teamsyncd and "Failed to + bind socket" in dhcprelay. + + Also invalid OID is more of a warning. So we skip messages with keyword "invalid OID". + """ swss_syncd_teamd_regex = [ ".*ERR swss[0-9]*#orchagent.*removeLag.*", @@ -74,36 +92,57 @@ def ignore_expected_loganalyzer_exception(duthosts, enum_dut_feature_container, ".*ERR syncd[0-9]*#syncd.*SAI_API_SWITCH:sai_object_type_get_availability.*", ".*ERR syncd[0-9]*#syncd.*sendApiResponse: api SAI_COMMON_API_SET failed in syncd mode.*", ".*ERR syncd[0-9]*#syncd.*processQuadEvent.*", + ".*ERR syncd[0-9]*#syncd.*process_on_fdb_event: invalid OIDs in fdb notifications.*", + ".*ERR syncd[0-9]*#syncd.*process_on_fdb_event: FDB notification was not sent since it contain invalid " + "OIDs.*", + ".*ERR syncd[0-9]*#syncd.*saiGetMacAddress: failed to get mac address: SAI_STATUS_ITEM_NOT_FOUND.*", + ".*ERR syncd[0-9]*#SDK.*mlnx_bridge_1d_oid_to_data: Unexpected bridge type 0 is not 1D.*", + ".*ERR syncd[0-9]*#SDK.*mlnx_bridge_port_lag_or_port_get: Invalid port type - 2.*", + ".*ERR syncd[0-9]*#SDK.*mlnx_bridge_port_isolation_group_get: Isolation group is only supported for " + "bridge port type port.*", + ".*ERR syncd[0-9]*#SDK.*mlnx_debug_counter_availability_get: Unsupported debug counter type - (0|1).*", + ".*ERR syncd[0-9]*#SDK.*mlnx_get_port_stats_ext: Invalid port counter (177|178|179|180|181|182).*", + ".*ERR syncd[0-9]*#SDK.*Failed getting attrib SAI_BRIDGE_.*", + ".*ERR syncd[0-9]*#SDK.*sai_get_attributes: Failed attribs dispatch.*", + ".*ERR syncd[0-9]*#SDK.*Failed command read at communication channel: Connection reset by peer.*", ".*WARNING syncd[0-9]*#syncd.*skipping since it causes crash.*", + # Known issue, captured here: https://github.com/Azure/sonic-buildimage/issues/10000 , ignore it for now + ".*ERR swss[0-9]*#fdbsyncd.*readData.*netlink reports an error=-25 on reading a netlink socket.*", ".*ERR swss[0-9]*#portsyncd.*readData.*netlink reports an error=-33 on reading a netlink socket.*", ".*ERR teamd[0-9]*#teamsyncd.*readData.*netlink reports an error=-33 on reading a netlink socket.*", + ".*ERR teamd[0-9]*#teamsyncd.*readData.*Unable to initialize team socket.*", ".*ERR swss[0-9]*#orchagent.*set status: SAI_STATUS_ATTR_NOT_IMPLEMENTED_0.*", ".*ERR swss[0-9]*#orchagent.*setIntfVlanFloodType.*", + ".*ERR swss[0-9]*#buffermgrd.*Failed to process invalid entry.*", ".*ERR snmp#snmpd.*", + ".*ERR dhcp_relay#dhcp6?relay.*bind: Failed to bind socket to link local ipv6 address on interface .* " + "after [0-9]+ retries", ] ignore_regex_dict = { 'common' : [ ".*ERR monit.*", - ".*ERR systemd.*Failed to start .* container*", + ".*ERR systemd.*Failed to start .* [Cc]ontainer.*", ".*ERR kernel.*PortChannel.*", ".*ERR route_check.*", + ".*ERR wrong number of arguments for 'hset' command: Input/output error.*" ], 'pmon' : [ ".*ERR pmon#xcvrd.*initializeGlobalConfig.*", ".*ERR pmon#thermalctld.*Caught exception while initializing thermal manager.*", ".*ERR pmon#xcvrd.*Could not establish the active side.*", + ".*ERR pmon#python3.*Expected to get redis type 2 got type 3, err: NON-STRING-REPLY.*", ], 'swss' : swss_syncd_teamd_regex, 'syncd' : swss_syncd_teamd_regex, 'teamd' : swss_syncd_teamd_regex, } - dut_name, container_name = decode_dut_port_name(enum_dut_feature_container) - pytest_require(dut_name == enum_rand_one_per_hwsku_frontend_hostname and container_name != "unknown", - "Skips testing auto-restart of container '{}' on DuT '{}' since another DuT '{}' was chosen." - .format(container_name, dut_name, enum_rand_one_per_hwsku_frontend_hostname)) - duthost = duthosts[dut_name] - feature = re.match(CONTAINER_NAME_REGEX, container_name).group(1) + # During syncd restart, the pmon container is also restarted, + # and we noticed some errors in the pmon container + ignore_regex_dict['syncd'].extend(ignore_regex_dict['pmon']) + + feature = enum_dut_feature + duthost = duthosts[enum_rand_one_per_hwsku_hostname] if loganalyzer: loganalyzer[duthost.hostname].ignore_regex.extend(ignore_regex_dict['common']) @@ -195,12 +234,12 @@ def kill_process_by_pid(duthost, container_name, program_name, program_pid): .format(program_name, container_name)) -def is_hiting_start_limit(duthost, container_name): +def is_hiting_start_limit(duthost, service_name): """ - @summary: Determine whether the container can not be restarted is due to + @summary: Determine whether the service can not be restarted is due to start-limit-hit or not """ - service_status = duthost.shell("sudo systemctl status {}.service | grep 'Active'".format(container_name)) + service_status = duthost.shell("sudo systemctl status {}.service | grep 'Active'".format(service_name)) for line in service_status["stdout_lines"]: if "start-limit-hit" in line: return True @@ -208,14 +247,14 @@ def is_hiting_start_limit(duthost, container_name): return False -def clear_failed_flag_and_restart(duthost, container_name): +def clear_failed_flag_and_restart(duthost, service_name, container_name): """ @summary: If a container hits the restart limitation, then we clear the failed flag and restart it. """ - logger.info("{} hits start limit and clear reset-failed flag".format(container_name)) - duthost.shell("sudo systemctl reset-failed {}.service".format(container_name)) - duthost.shell("sudo systemctl start {}.service".format(container_name)) + logger.info("{} hits start limit and clear reset-failed flag".format(service_name)) + duthost.shell("sudo systemctl reset-failed {}.service".format(service_name)) + duthost.shell("sudo systemctl start {}.service".format(service_name)) restarted = wait_until(CONTAINER_RESTART_THRESHOLD_SECS, CONTAINER_CHECK_INTERVAL_SECS, 0, @@ -223,7 +262,7 @@ def clear_failed_flag_and_restart(duthost, container_name): pytest_assert(restarted, "Failed to restart container '{}' after reset-failed was cleared".format(container_name)) -def verify_autorestart_with_critical_process(duthost, container_name, program_name, +def verify_autorestart_with_critical_process(duthost, container_name, service_name, program_name, program_status, program_pid): """ @summary: Kill a critical process in a container to verify whether the container @@ -252,8 +291,8 @@ def verify_autorestart_with_critical_process(duthost, container_name, program_na 0, check_container_state, duthost, container_name, True) if not restarted: - if is_hiting_start_limit(duthost, container_name): - clear_failed_flag_and_restart(duthost, container_name) + if is_hiting_start_limit(duthost, service_name): + clear_failed_flag_and_restart(duthost, service_name, container_name) else: pytest.fail("Failed to restart container '{}'".format(container_name)) @@ -307,23 +346,32 @@ def check_all_critical_processes_status(duthost): return True -def postcheck_critical_processes_status(duthost, container_autorestart_states, up_bgp_neighbors): +def postcheck_critical_processes_status(duthost, feature_autorestart_states, up_bgp_neighbors): """Restarts the containers which hit the restart limitation. Then post checks to see whether all the critical processes are alive and expected BGP sessions are up after testing the autorestart feature. Args: duthost: An ansible object of DuT. - container_autorestart_states: A dictionary includes the container name (key) and + feature_autorestart_states: A dictionary includes the feature name (key) and its auto-restart state (value). up_bgp_neighbors: A list includes the IP of neighbors whose BGP session are up. Returns: True if post check succeeds; Otherwise False. """ - for container_name in container_autorestart_states.keys(): - if is_hiting_start_limit(duthost, container_name): - clear_failed_flag_and_restart(duthost, container_name) + for feature_name in feature_autorestart_states.keys(): + if feature_name in duthost.DEFAULT_ASIC_SERVICES: + for asic in duthost.asics: + service_name = asic.get_service_name(feature_name) + container_name = asic.get_docker_name(feature_name) + if is_hiting_start_limit(duthost, service_name): + clear_failed_flag_and_restart(duthost, service_name, container_name) + else: + # service_name and container_name will be same as feature + # name for features that are not in DEFAULT_ASIC_SERVICES. + if is_hiting_start_limit(duthost, feature_name): + clear_failed_flag_and_restart(duthost, feature_name, feature_name) critical_proceses = wait_until( POST_CHECK_THRESHOLD_SECS, POST_CHECK_INTERVAL_SECS, 0, @@ -332,18 +380,19 @@ def postcheck_critical_processes_status(duthost, container_autorestart_states, u bgp_check = wait_until( POST_CHECK_THRESHOLD_SECS, POST_CHECK_INTERVAL_SECS, 0, - duthost.check_bgp_session_state, up_bgp_neighbors, "established" + duthost.check_bgp_session_state_all_asics, up_bgp_neighbors, "established" ) return critical_proceses, bgp_check -def run_test_on_single_container(duthost, container_name, tbinfo): - container_autorestart_states = duthost.get_container_autorestart_states() +def run_test_on_single_container(duthost, container_name, service_name, tbinfo): + feature_autorestart_states = duthost.get_container_autorestart_states() disabled_containers = get_disabled_container_list(duthost) skip_condition = disabled_containers[:] skip_condition.append("database") + skip_condition.append("acms") if tbinfo["topo"]["type"] != "t0": skip_condition.append("radv") @@ -357,13 +406,12 @@ def run_test_on_single_container(duthost, container_name, tbinfo): is_running = is_container_running(duthost, container_name) pytest_assert(is_running, "Container '{}' is not running. Exiting...".format(container_name)) - bgp_neighbors = duthost.get_bgp_neighbors() - up_bgp_neighbors = [ k.lower() for k, v in bgp_neighbors.items() if v["state"] == "established" ] + up_bgp_neighbors = duthost.get_bgp_neighbors_per_asic("established") logger.info("Start testing the container '{}'...".format(container_name)) restore_disabled_state = False - if container_autorestart_states[feature_name] == "disabled": + if feature_autorestart_states[feature_name] == "disabled": logger.info("Change auto-restart state of container '{}' to be 'enabled'".format(container_name)) duthost.shell("sudo config feature autorestart {} enabled".format(feature_name)) restore_disabled_state = True @@ -385,7 +433,7 @@ def run_test_on_single_container(duthost, container_name, tbinfo): continue program_status, program_pid = get_program_info(duthost, container_name, critical_process) - verify_autorestart_with_critical_process(duthost, container_name, critical_process, + verify_autorestart_with_critical_process(duthost, container_name, service_name, critical_process, program_status, program_pid) # Sleep 20 seconds in order to let the processes come into live after container is restarted. # We will uncomment the following line once the "extended" mode is added @@ -397,7 +445,7 @@ def run_test_on_single_container(duthost, container_name, tbinfo): for critical_group in critical_group_list: group_program_info = get_group_program_info(duthost, container_name, critical_group) for program_name in group_program_info: - verify_autorestart_with_critical_process(duthost, container_name, program_name, + verify_autorestart_with_critical_process(duthost, container_name, service_name, program_name, group_program_info[program_name][0], group_program_info[program_name][1]) # We are currently only testing one critical program for each critical group, which is @@ -410,10 +458,14 @@ def run_test_on_single_container(duthost, container_name, tbinfo): duthost.shell("sudo config feature autorestart {} disabled".format(feature_name)) critical_proceses, bgp_check = postcheck_critical_processes_status( - duthost, container_autorestart_states, up_bgp_neighbors + duthost, feature_autorestart_states, up_bgp_neighbors ) if not (critical_proceses and bgp_check): - config_reload(duthost) + config_reload(duthost, safe_reload=True) + # after config reload, the feature autorestart config is reset, + # so, before next test, enable again + enable_autorestart(duthost) + failed_check = "[Critical Process] " if not critical_proceses else "" failed_check += "[BGP] " if not bgp_check else "" processes_status = duthost.all_critical_process_status() @@ -421,7 +473,7 @@ def run_test_on_single_container(duthost, container_name, tbinfo): { k:{ "status": v["status"], - "exited_critical_process": processes["exited_critical_process"] + "exited_critical_process": v["exited_critical_process"] } } for k, v in processes_status.items() if v[ "status" @@ -440,17 +492,16 @@ def run_test_on_single_container(duthost, container_name, tbinfo): logger.info("End of testing the container '{}'".format(container_name)) -def test_containers_autorestart(duthosts, enum_dut_feature_container, - enum_rand_one_per_hwsku_frontend_hostname, tbinfo): +@pytest.mark.disable_loganalyzer +def test_containers_autorestart(duthosts, enum_rand_one_per_hwsku_hostname, enum_rand_one_asic_index, + enum_dut_feature, tbinfo): """ @summary: Test the auto-restart feature of each container against two scenarios: killing a non-critical process to verify the container is still running; killing each critical process to verify the container will be stopped and restarted """ - dut_name, container_name = decode_dut_port_name(enum_dut_feature_container) - pytest_require(dut_name == enum_rand_one_per_hwsku_frontend_hostname and container_name != "unknown", - "Skips testing auto-restart of container '{}' on DuT '{}' since another DuT '{}' was chosen." - .format(container_name, dut_name, enum_rand_one_per_hwsku_frontend_hostname)) - duthost = duthosts[dut_name] - - run_test_on_single_container(duthost, container_name, tbinfo) + duthost = duthosts[enum_rand_one_per_hwsku_hostname] + asic = duthost.asic_instance(enum_rand_one_asic_index) + service_name = asic.get_service_name(enum_dut_feature) + container_name = asic.get_docker_name(enum_dut_feature) + run_test_on_single_container(duthost, container_name, service_name, tbinfo) diff --git a/tests/bfd/conftest.py b/tests/bfd/conftest.py new file mode 100644 index 00000000000..6e49ab13bb7 --- /dev/null +++ b/tests/bfd/conftest.py @@ -0,0 +1,3 @@ +def pytest_addoption(parser): + parser.addoption("--num_sessions", action="store", default=5) + parser.addoption("--num_sessions_scale", action="store", default=128) diff --git a/tests/bfd/test_bfd.py b/tests/bfd/test_bfd.py index 9a57c331a03..57059201801 100644 --- a/tests/bfd/test_bfd.py +++ b/tests/bfd/test_bfd.py @@ -1,27 +1,16 @@ import pytest -import ipaddress -import natsort import random import time import json -from tests.common.fixtures.ptfhost_utils import change_mac_addresses, copy_arp_responder_py -from tests.common.dualtor.dual_tor_utils import mux_cable_server_ip -from tests.common.dualtor.dual_tor_utils import get_t1_ptf_ports -from tests.common.dualtor.mux_simulator_control import mux_server_url -from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_rand_selected_tor_m -from pkg_resources import parse_version -from tests.common import constants +from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_rand_selected_tor_m # noqa F401 pytestmark = [ - pytest.mark.topology('t0', 't1', 't1-lag') + pytest.mark.topology('t1') ] -def skip_201911_and_older(duthost): - """ Skip the current test if the DUT version is 201911 or older. - """ - if parse_version(duthost.kernel_version) <= parse_version('4.9.0'): - pytest.skip("Test not supported for 201911 images or older. Skipping the test") +BFD_RESPONDER_SCRIPT_SRC_PATH = '../ansible/roles/test/files/helpers/bfd_responder.py' +BFD_RESPONDER_SCRIPT_DEST_PATH = '/opt/bfd_responder.py' def is_dualtor(tbinfo): @@ -40,60 +29,140 @@ def get_t0_intfs(mg_facts): def add_dut_ip(duthost, intfs, ips, prefix_len): + cmd_buffer = "" for idx in range(len(intfs)): - duthost.shell('sudo config interface ip add {} {}/{}'.format(intfs[idx], ips[idx], prefix_len)) + cmd_buffer += 'sudo config interface ip add {} {}/{} ;'.format(intfs[idx], ips[idx], prefix_len) + if idx % 50 == 0: + duthost.shell(cmd_buffer) + cmd_buffer = "" + if cmd_buffer != "": + duthost.shell(cmd_buffer) def remove_dut_ip(duthost, intfs, ips, prefix_len): + cmd_buffer = "" for idx in range(len(intfs)): - duthost.shell('sudo config interface ip remove {} {}/{}'.format(intfs[idx], ips[idx], prefix_len)) + cmd_buffer += 'sudo config interface ip remove {} {}/{} ;'.format(intfs[idx], ips[idx], prefix_len) + if idx % 50 == 0: + duthost.shell(cmd_buffer) + cmd_buffer = "" + if cmd_buffer != "": + duthost.shell(cmd_buffer) def get_neighbors(duthost, tbinfo, ipv6=False, count=1): - topo_type = tbinfo['topo']['name'] mg_facts = duthost.get_extended_minigraph_facts(tbinfo) - - if 't0' in topo_type: - vlan_intf = mg_facts['minigraph_vlan_interfaces'][1 if ipv6 else 0] - prefix_len = vlan_intf['prefixlen'] - vlan_addr = vlan_intf["addr"] - - is_backend_topology = mg_facts.get(constants.IS_BACKEND_TOPOLOGY_KEY, False) - if is_dualtor(tbinfo): - server_ips = mux_cable_server_ip(duthost) - vlan_intfs = natsort.natsorted(server_ips.keys()) - neighbor_devs = [mg_facts["minigraph_ptf_indices"][_] for _ in vlan_intfs] - server_ip_key = "server_ipv6" if ipv6 else "server_ipv4" - neighbor_addrs = [server_ips[_][server_ip_key].split("/")[0] for _ in vlan_intfs] - neighbor_interfaces = neighbor_devs + prefix_len = 127 if ipv6 else 31 + ip_pattern = '2000:2000::{:x}' if ipv6 else '101.0.0.{}' + t0_intfs = get_t0_intfs(mg_facts) + ptf_ports = [mg_facts['minigraph_ptf_indices'][port] for port in t0_intfs] + count = min(count, len(t0_intfs)) + indices = random.sample(list(range(len(t0_intfs))), k=count) + port_intfs = [t0_intfs[_] for _ in indices] + neighbor_devs = [] + for intf in port_intfs: + pc_member = False + for pc in mg_facts['minigraph_portchannels']: + if intf in mg_facts['minigraph_portchannels'][pc]['members']: + neighbor_devs.append(pc) + pc_member = True + break + if not pc_member: + neighbor_devs.append(intf) + + local_addrs = [ip_pattern.format(idx * 2) for idx in indices] + neighbor_addrs = [ip_pattern.format(idx * 2 + 1) for idx in indices] + neighbor_interfaces = [ptf_ports[_] for _ in indices] + + return local_addrs, prefix_len, neighbor_addrs, neighbor_devs, neighbor_interfaces + + +def get_neighbors_scale(duthost, tbinfo, ipv6=False, scale_count=1): + mg_facts = duthost.get_extended_minigraph_facts(tbinfo) + t1_ipv4_pattern = '104.0.{}.{}' + t1_ipv6_pattern = '2002:2000::{:x}' + t0_intfs = get_t0_intfs(mg_facts) + ptf_ports = [mg_facts['minigraph_ptf_indices'][port] for port in t0_intfs] + count = min(2, len(t0_intfs)) + indices = random.sample(list(range(len(t0_intfs))), k=count) + port_intfs = [t0_intfs[_] for _ in indices] + neighbor_intfs = [] + for intf in port_intfs: + pc_member = False + for pc in mg_facts['minigraph_portchannels']: + if intf in mg_facts['minigraph_portchannels'][pc]['members']: + neighbor_intfs.append(pc) + pc_member = True + break + if not pc_member: + neighbor_intfs.append(intf) + ptf_intfs = [ptf_ports[_] for _ in indices] + # local_addrs, prefix_len, neighbor_addrs, neighbor_devs, neighbor_interfaces + local_addrs = [] + neighbor_addrs = [] + neighbor_devs = [] + ptf_devs = [] + index = 0 + for idx in range(1, scale_count): + if idx != 0 and idx % 127 == 0: + index += 1 + if ipv6: + local_addrs.append(t1_ipv6_pattern.format(idx * 2)) + neighbor_addrs.append(t1_ipv6_pattern.format(idx * 2 + 1)) + neighbor_devs.append(neighbor_intfs[index]) + ptf_devs.append(ptf_intfs[index]) else: - vlan_subnet = ipaddress.ip_network(vlan_intf['subnet']) - vlan = mg_facts['minigraph_vlans'][mg_facts['minigraph_vlan_interfaces'][1 if ipv6 else 0]['attachto']] - vlan_ports = vlan['members'] - vlan_id = vlan['vlanid'] - vlan_ptf_ports = [mg_facts['minigraph_ptf_indices'][port] for port in vlan_ports] - neighbor_devs = vlan_ptf_ports - # backend topology use ethx.x(e.g. eth30.1000) during servers and T0 in ptf - # in other topology use ethx(e.g. eth30) - if is_backend_topology: - neighbor_interfaces = [str(dev) + constants.VLAN_SUB_INTERFACE_SEPARATOR + str(vlan_id) for dev in neighbor_devs] - else: - neighbor_interfaces = neighbor_devs - neighbor_addrs = [str(vlan_subnet[i + 2]) for i in range(len(neighbor_devs))] - count = min(count, len(neighbor_devs)) - indices = random.sample(list(range(len(neighbor_devs))), k=count) - return [vlan_addr for _ in indices], prefix_len, [neighbor_addrs[_] for _ in indices], [neighbor_devs[_] for _ in indices], [neighbor_interfaces[_] for _ in indices] - elif 't1' in topo_type: - t1_ipv4_pattern = '101.0.0.{}' - t1_ipv6_pattern = '2000:2000::{:x}' - t0_intfs = get_t0_intfs(mg_facts) - ptf_ports = [mg_facts['minigraph_ptf_indices'][port] for port in t0_intfs] - count = min(count, len(t0_intfs)) - indices = random.sample(list(range(len(t0_intfs))), k=count) + rolloveridx = idx % 125 + idx2 = idx // 125 + local_addrs.append(t1_ipv4_pattern.format(idx2, rolloveridx * 2)) + neighbor_addrs.append(t1_ipv4_pattern.format(idx2, rolloveridx * 2 + 1)) + neighbor_devs.append(neighbor_intfs[index]) + ptf_devs.append(ptf_intfs[index]) + prefix = 127 if ipv6 else 31 + return local_addrs, prefix, neighbor_addrs, neighbor_devs, ptf_devs + + +def get_loopback_intf(mg_facts, ipv6): + ipv6idx = 0 if mg_facts['minigraph_lo_interfaces'][0]['prefixlen'] == 128 else 1 + if ipv6: + return mg_facts['minigraph_lo_interfaces'][ipv6idx]['addr'] + else: + return mg_facts['minigraph_lo_interfaces'][(ipv6idx + 1) % 2]['addr'] + + +def get_neighbors_multihop(duthost, tbinfo, ipv6=False, count=1): + mg_facts = duthost.get_extended_minigraph_facts(tbinfo) + t0_ipv4_pattern = '4.{}.{}.1' + t0_ipv6_pattern = '3000:3000:{:x}::3000' + t0_intfs = get_t0_intfs(mg_facts) + ptf_ports = [mg_facts['minigraph_ptf_indices'][port] for port in t0_intfs] + loopback_addr = get_loopback_intf(mg_facts, ipv6) + + index = random.sample(list(range(len(t0_intfs))), k=1)[0] + port_intf = t0_intfs[index] + ptf_intf = ptf_ports[index] + nexthop_ip = "" + neighbour_dev_name = mg_facts['minigraph_neighbors'][port_intf]['name'] + for bgpinfo in mg_facts['minigraph_bgp']: + if bgpinfo['name'] == neighbour_dev_name: + nexthop_ip = bgpinfo['addr'] + if ipv6 and ":" not in nexthop_ip: + nexthop_ip = "" + continue + break + if nexthop_ip == "": + assert False + neighbor_addrs = [] + idx2 = 0 + for idx in range(1, count): + if idx % 250 == 0: + idx2 += 1 if ipv6: - return [t1_ipv6_pattern.format(idx * 2) for idx in indices], 127, [t1_ipv6_pattern.format(idx * 2 + 1) for idx in indices], [t0_intfs[_] for _ in indices], [ptf_ports[_] for _ in indices] + neighbor_addrs.append(t0_ipv6_pattern.format(idx)) else: - return [t1_ipv4_pattern.format(idx * 2) for idx in indices], 31, [t1_ipv4_pattern.format(idx * 2 + 1) for idx in indices], [t0_intfs[_] for _ in indices], [ptf_ports[_] for _ in indices] + neighbor_addrs.append(t0_ipv4_pattern.format((idx % 250), idx2)) + + return loopback_addr, ptf_intf, nexthop_ip, neighbor_addrs def init_ptf_bfd(ptfhost): @@ -105,49 +174,76 @@ def stop_ptf_bfd(ptfhost): def add_ipaddr(ptfhost, neighbor_addrs, prefix_len, neighbor_interfaces, ipv6=False): + cmd_buffer = "" for idx in range(len(neighbor_addrs)): if ipv6: - ptfhost.shell("ip -6 addr add {}/{} dev eth{}".format(neighbor_addrs[idx], prefix_len, neighbor_interfaces[idx])) + cmd_buffer += "ip -6 addr add {}/{} dev eth{} ;".format(neighbor_addrs[idx], prefix_len, + neighbor_interfaces[idx]) else: - ptfhost.shell("ip addr add {}/{} dev eth{}".format(neighbor_addrs[idx], prefix_len, neighbor_interfaces[idx])) + cmd_buffer += "ip addr add {}/{} dev eth{} ;".format(neighbor_addrs[idx], prefix_len, + neighbor_interfaces[idx]) + if idx % 50 == 0: + ptfhost.shell(cmd_buffer) + cmd_buffer = "" + if cmd_buffer != "": + ptfhost.shell(cmd_buffer) def del_ipaddr(ptfhost, neighbor_addrs, prefix_len, neighbor_interfaces, ipv6=False): + cmd_buffer = "" for idx in range(len(neighbor_addrs)): if ipv6: - ptfhost.shell("ip -6 addr del {}/{} dev eth{}".format(neighbor_addrs[idx], prefix_len, neighbor_interfaces[idx]), module_ignore_errors=True) + cmd_buffer += "ip -6 addr del {}/{} dev eth{} ;".format(neighbor_addrs[idx], prefix_len, + neighbor_interfaces[idx]) else: - ptfhost.shell("ip addr del {}/{} dev eth{}".format(neighbor_addrs[idx], prefix_len, neighbor_interfaces[idx]), module_ignore_errors=True) + cmd_buffer += "ip addr del {}/{} dev eth{} ;".format(neighbor_addrs[idx], prefix_len, + neighbor_interfaces[idx]) + if idx % 50 == 0: + ptfhost.shell(cmd_buffer, module_ignore_errors=True) + cmd_buffer = "" + if cmd_buffer != "": + ptfhost.shell(cmd_buffer, module_ignore_errors=True) def check_ptf_bfd_status(ptfhost, neighbor_addr, local_addr, expected_state): - bfd_state = ptfhost.shell("bfdd-control status local {} remote {}".format(neighbor_addr, local_addr))["stdout"].split("\n") + bfd_state = ptfhost.shell("bfdd-control status local {} remote {}" + .format(neighbor_addr, local_addr))["stdout"].split("\n") for line in bfd_state: field = line.split('=')[0].strip() if field == "state": - assert line.split('=')[1].strip() == expected_state + assert expected_state in line.split('=')[1].strip() def check_dut_bfd_status(duthost, neighbor_addr, expected_state): - bfd_state = duthost.shell("sonic-db-cli STATE_DB HGET 'BFD_SESSION_TABLE|default|default|{}' 'state'".format(neighbor_addr), module_ignore_errors=False)['stdout_lines'] - assert bfd_state[0] == expected_state + bfd_state = duthost.shell("sonic-db-cli STATE_DB HGET 'BFD_SESSION_TABLE|default|default|{}' 'state'" + .format(neighbor_addr), module_ignore_errors=False)['stdout_lines'] + assert expected_state in bfd_state[0] -def create_bfd_sessions(ptfhost, duthost, local_addrs, neighbor_addrs): +def create_bfd_sessions(ptfhost, duthost, local_addrs, neighbor_addrs, dut_init_first, scale_test=False): # Create a tempfile for BFD sessions bfd_file_dir = duthost.shell('mktemp')['stdout'] bfd_config = [] + ptf_buffer = "" + if scale_test: + # Force the PTF initialization to be first if running a scale test. + # Doing so that we can send batches of 50 commands to PTF and keep the code readable. + assert (dut_init_first is False) for idx, neighbor_addr in enumerate(neighbor_addrs): - duthost.shell("sonic-db-cli APPL_DB hmset 'BFD_SESSION_TABLE:default:default:{}' local_addr {}".format(neighbor_addr, local_addrs[idx])) bfd_config.append({ "BFD_SESSION_TABLE:default:default:{}".format(neighbor_addr): { "local_addr": local_addrs[idx] }, "OP": "SET" }) - ptfhost.shell("bfdd-control connect local {} remote {}".format(neighbor_addr, local_addrs[idx])) + ptf_buffer += "bfdd-control connect local {} remote {} ; ".format(neighbor_addr, local_addrs[idx]) + if scale_test and idx % 50 == 0: + ptfhost.shell(ptf_buffer) + ptf_buffer = "" + if not dut_init_first and ptf_buffer != "": + ptfhost.shell(ptf_buffer) # Copy json file to DUT duthost.copy(content=json.dumps(bfd_config, indent=4), dest=bfd_file_dir, verbose=False) @@ -156,18 +252,62 @@ def create_bfd_sessions(ptfhost, duthost, local_addrs, neighbor_addrs): module_ignore_errors=True) if result['rc'] != 0: pytest.fail('Failed to apply BFD session configuration file: {}'.format(result['stderr'])) + if dut_init_first: + ptfhost.shell(ptf_buffer) -def remove_bfd_sessions(duthost, local_addrs, neighbor_addrs): +def create_bfd_sessions_multihop(ptfhost, duthost, loopback_addr, ptf_intf, neighbor_addrs): # Create a tempfile for BFD sessions bfd_file_dir = duthost.shell('mktemp')['stdout'] + ptf_file_dir = ptfhost.shell('mktemp')['stdout'] bfd_config = [] + ptf_config = [] + for neighbor_addr in neighbor_addrs: + bfd_config.append({ + "BFD_SESSION_TABLE:default:default:{}".format(neighbor_addr): { + "local_addr": loopback_addr, + "multihop": "true" + }, + "OP": "SET" + }) + ptf_config.append( + { + "neighbor_addr": loopback_addr, + "local_addr": neighbor_addr, + "multihop": "true", + "ptf_intf": "eth{}".format(ptf_intf) + } + ) + + # Copy json file to DUT + duthost.copy(content=json.dumps(bfd_config, indent=4), dest=bfd_file_dir, verbose=False) + # Apply BFD sessions with swssconfig + result = duthost.shell('docker exec -i swss swssconfig /dev/stdin < {}'.format(bfd_file_dir), + module_ignore_errors=True) + if result['rc'] != 0: + pytest.fail('Failed to apply BFD session configuration file: {}'.format(result['stderr'])) + # Copy json file to PTF + ptfhost.copy(content=json.dumps(ptf_config, indent=4), dest=ptf_file_dir, verbose=False) + + ptfhost.copy(src=BFD_RESPONDER_SCRIPT_SRC_PATH, dest=BFD_RESPONDER_SCRIPT_DEST_PATH) + + extra_vars = {"bfd_responder_args": "-c {}".format(ptf_file_dir)} + ptfhost.host.options["variable_manager"].extra_vars.update(extra_vars) + + ptfhost.template(src='templates/bfd_responder.conf.j2', dest='/etc/supervisor/conf.d/bfd_responder.conf') + ptfhost.command('supervisorctl reread') + ptfhost.command('supervisorctl update') + ptfhost.command('supervisorctl start bfd_responder') + + +def remove_bfd_sessions(duthost, neighbor_addrs): + # Create a tempfile for BFD sessions + bfd_file_dir = duthost.shell('mktemp')['stdout'] + bfd_config = [] for idx, neighbor_addr in enumerate(neighbor_addrs): - duthost.shell("sonic-db-cli APPL_DB hmset 'BFD_SESSION_TABLE:default:default:{}' local_addr {}".format(neighbor_addr, local_addrs[idx])) bfd_config.append({ "BFD_SESSION_TABLE:default:default:{}".format(neighbor_addr): { - "local_addr": local_addrs[idx] }, "OP": "DEL" }) @@ -186,77 +326,118 @@ def update_bfd_session_state(ptfhost, neighbor_addr, local_addr, state): ptfhost.shell("bfdd-control session local {} remote {} state {}".format(neighbor_addr, local_addr, state)) -@pytest.mark.skip(reason="Test may currently fail due to lack of hardware support") -def test_bfd(rand_selected_dut, ptfhost, tbinfo, toggle_all_simulator_ports_to_rand_selected_tor_m): - duthost = rand_selected_dut - bfd_session_cnt = 5 - skip_201911_and_older(duthost) - local_addrs, prefix_len, neighbor_addrs, neighbor_devs, neighbor_interfaces = get_neighbors(duthost, tbinfo, count = bfd_session_cnt) +def update_bfd_state(ptfhost, neighbor_addr, local_addr, state): + ptfhost.shell("bfdd-control session local {} remote {} {}".format(neighbor_addr, local_addr, state)) + +@pytest.mark.parametrize('dut_init_first', [True, False], ids=['dut_init_first', 'ptf_init_first']) +@pytest.mark.parametrize('ipv6', [False, True], ids=['ipv4', 'ipv6']) +def test_bfd_basic(request, rand_selected_dut, ptfhost, tbinfo, ipv6, dut_init_first): + duthost = rand_selected_dut + bfd_session_cnt = int(request.config.getoption('--num_sessions')) + local_addrs, prefix_len, neighbor_addrs, neighbor_devs, neighbor_interfaces = get_neighbors(duthost, tbinfo, ipv6, + count=bfd_session_cnt) try: - if 't1' in tbinfo['topo']['name']: - add_dut_ip(duthost, neighbor_devs, local_addrs, prefix_len) + add_dut_ip(duthost, neighbor_devs, local_addrs, prefix_len) init_ptf_bfd(ptfhost) - add_ipaddr(ptfhost, neighbor_addrs, prefix_len, neighbor_interfaces, ipv6=False) - create_bfd_sessions(ptfhost, duthost, local_addrs, neighbor_addrs) + add_ipaddr(ptfhost, neighbor_addrs, prefix_len, neighbor_interfaces, ipv6) + create_bfd_sessions(ptfhost, duthost, local_addrs, neighbor_addrs, dut_init_first) time.sleep(1) - for idx, neighbor_addr in enumerate(neighbor_addrs): check_dut_bfd_status(duthost, neighbor_addr, "Up") check_ptf_bfd_status(ptfhost, neighbor_addr, local_addrs[idx], "Up") update_idx = random.choice(range(bfd_session_cnt)) - update_bfd_session_state(ptfhost, neighbor_addrs[update_idx], local_addrs[idx], "down") + update_bfd_session_state(ptfhost, neighbor_addrs[update_idx], local_addrs[update_idx], "admin") + time.sleep(1) + + for idx, neighbor_addr in enumerate(neighbor_addrs): + if idx == update_idx: + check_dut_bfd_status(duthost, neighbor_addr, "Admin_Down") + check_ptf_bfd_status(ptfhost, neighbor_addr, local_addrs[idx], "AdminDown") + else: + check_dut_bfd_status(duthost, neighbor_addr, "Up") + check_ptf_bfd_status(ptfhost, neighbor_addr, local_addrs[idx], "Up") + + update_bfd_session_state(ptfhost, neighbor_addrs[update_idx], local_addrs[update_idx], "up") + time.sleep(1) - for idx in range(bfd_session_cnt): + check_dut_bfd_status(duthost, neighbor_addrs[update_idx], "Up") + check_ptf_bfd_status(ptfhost, neighbor_addrs[update_idx], local_addrs[update_idx], "Up") + + update_idx = random.choice(range(bfd_session_cnt)) + update_bfd_state(ptfhost, neighbor_addrs[update_idx], local_addrs[update_idx], "suspend") + time.sleep(5) + + for idx, neighbor_addr in enumerate(neighbor_addrs): if idx == update_idx: check_dut_bfd_status(duthost, neighbor_addr, "Down") - check_ptf_bfd_status(ptfhost, neighbor_addr, local_addrs[idx], "Down") + check_ptf_bfd_status(ptfhost, neighbor_addr, local_addrs[idx], "Init") else: check_dut_bfd_status(duthost, neighbor_addr, "Up") check_ptf_bfd_status(ptfhost, neighbor_addr, local_addrs[idx], "Up") + finally: stop_ptf_bfd(ptfhost) - del_ipaddr(ptfhost, neighbor_addrs, prefix_len, neighbor_interfaces, ipv6=False) - remove_bfd_sessions(duthost, local_addrs, neighbor_addrs) - if 't1' in tbinfo['topo']['name']: - remove_dut_ip(duthost, neighbor_devs, local_addrs, prefix_len) + del_ipaddr(ptfhost, neighbor_addrs, prefix_len, neighbor_interfaces, ipv6) + remove_bfd_sessions(duthost, neighbor_addrs) + remove_dut_ip(duthost, neighbor_devs, local_addrs, prefix_len) -@pytest.mark.skip(reason="Test may currently fail due to lack of hardware support") -def test_bfd_ipv6(rand_selected_dut, ptfhost, tbinfo, toggle_all_simulator_ports_to_rand_selected_tor_m): +@pytest.mark.parametrize('ipv6', [False, True], ids=['ipv4', 'ipv6']) +def test_bfd_scale(request, rand_selected_dut, ptfhost, tbinfo, ipv6): duthost = rand_selected_dut - bfd_session_cnt = 5 - skip_201911_and_older(duthost) - local_addrs, prefix_len, neighbor_addrs, neighbor_devs, neighbor_interfaces = get_neighbors(duthost, tbinfo, ipv6=True, count = bfd_session_cnt) + bfd_session_cnt = int(request.config.getoption('--num_sessions_scale')) + local_addrs, prefix_len, neighbor_addrs, neighbor_devs, neighbor_interfaces = \ + get_neighbors_scale(duthost, tbinfo, ipv6, scale_count=bfd_session_cnt) try: - if 't1' in tbinfo['topo']['name']: - add_dut_ip(duthost, neighbor_devs, local_addrs, prefix_len) + add_dut_ip(duthost, neighbor_devs, local_addrs, prefix_len) init_ptf_bfd(ptfhost) - add_ipaddr(ptfhost, neighbor_addrs, prefix_len, neighbor_interfaces, ipv6=True) - create_bfd_sessions(ptfhost, duthost, local_addrs, neighbor_addrs) + add_ipaddr(ptfhost, neighbor_addrs, prefix_len, neighbor_interfaces, ipv6) + create_bfd_sessions(ptfhost, duthost, local_addrs, neighbor_addrs, False, True) - time.sleep(1) + time.sleep(10) + bfd_state = ptfhost.shell("bfdd-control status") + dut_state = duthost.shell("show bfd summary") + for itr in local_addrs: + assert itr in bfd_state['stdout'] + assert itr in dut_state['stdout'] - for idx, neighbor_addr in enumerate(neighbor_addrs): - check_dut_bfd_status(duthost, neighbor_addr, "Up") - check_ptf_bfd_status(ptfhost, neighbor_addr, local_addrs[idx], "Up") + finally: + time.sleep(10) + stop_ptf_bfd(ptfhost) + del_ipaddr(ptfhost, neighbor_addrs, prefix_len, neighbor_interfaces, ipv6) + remove_bfd_sessions(duthost, neighbor_addrs) + remove_dut_ip(duthost, neighbor_devs, local_addrs, prefix_len) - update_idx = random.choice(range(bfd_session_cnt)) - update_bfd_session_state(ptfhost, neighbor_addrs[update_idx], local_addrs[idx], "down") - for idx in range(bfd_session_cnt): - if idx == update_idx: - check_dut_bfd_status(duthost, neighbor_addr, "Down") - check_ptf_bfd_status(ptfhost, neighbor_addr, local_addrs[idx], "Down") - else: - check_dut_bfd_status(duthost, neighbor_addr, "Up") - check_ptf_bfd_status(ptfhost, neighbor_addr, local_addrs[idx], "Up") +@pytest.mark.parametrize('ipv6', [False, True], ids=['ipv4', 'ipv6']) +def test_bfd_multihop(request, rand_selected_dut, ptfhost, tbinfo, + toggle_all_simulator_ports_to_rand_selected_tor_m, ipv6): # noqa F811 + duthost = rand_selected_dut + + bfd_session_cnt = int(request.config.getoption('--num_sessions')) + loopback_addr, ptf_intf, nexthop_ip, neighbor_addrs = get_neighbors_multihop(duthost, tbinfo, ipv6, + count=bfd_session_cnt) + try: + cmd_buffer = "" + for neighbor in neighbor_addrs: + cmd_buffer += 'sudo ip route add {} via {} ;'.format(neighbor, nexthop_ip) + duthost.shell(cmd_buffer, module_ignore_errors=True) + + create_bfd_sessions_multihop(ptfhost, duthost, loopback_addr, ptf_intf, neighbor_addrs) + + time.sleep(1) + for neighbor_addr in neighbor_addrs: + check_dut_bfd_status(duthost, neighbor_addr, "Up") + finally: - stop_ptf_bfd(ptfhost) - del_ipaddr(ptfhost, neighbor_addrs, prefix_len, neighbor_interfaces, ipv6=True) - remove_bfd_sessions(duthost, local_addrs, neighbor_addrs) - if 't1' in tbinfo['topo']['name']: - remove_dut_ip(duthost, neighbor_devs, local_addrs, prefix_len) + remove_bfd_sessions(duthost, neighbor_addrs) + cmd_buffer = "" + for neighbor in neighbor_addrs: + cmd_buffer += 'sudo ip route delete {} via {} ;'.format(neighbor, nexthop_ip) + duthost.shell(cmd_buffer, module_ignore_errors=True) + ptfhost.command('supervisorctl stop bfd_responder') + ptfhost.file(path=BFD_RESPONDER_SCRIPT_DEST_PATH, state="absent") diff --git a/tests/bgp/bgp_helpers.py b/tests/bgp/bgp_helpers.py index c1670939307..5256bd7bea9 100644 --- a/tests/bgp/bgp_helpers.py +++ b/tests/bgp/bgp_helpers.py @@ -2,7 +2,6 @@ import re import time import json -from tests.common.helpers.constants import DEFAULT_ASIC_ID from tests.common.helpers.assertions import pytest_assert from tests.common.utilities import wait_until @@ -12,7 +11,7 @@ BGP_PLAIN_TEMPLATE = 'bgp_plain.j2' BGP_NO_EXPORT_TEMPLATE = 'bgp_no_export.j2' BGP_CONFIG_BACKUP = 'backup_bgpd.conf.j2' -DEFAULT_BGP_CONFIG = 'bgp:/usr/share/sonic/templates/bgpd/bgpd.conf.j2' +DEFAULT_BGP_CONFIG = '/usr/share/sonic/templates/bgpd/bgpd.conf.j2' DUMP_FILE = "/tmp/bgp_monitor_dump.log" CUSTOM_DUMP_SCRIPT = "bgp/bgp_monitor_dump.py" CUSTOM_DUMP_SCRIPT_DEST = "/usr/share/exabgp/bgp_monitor_dump.py" @@ -30,22 +29,10 @@ def apply_bgp_config(duthost, template_name): duthost: DUT host object template_name: pathname of the bgp config on the DUT """ - duthost.shell('docker cp {} {}'.format(template_name, DEFAULT_BGP_CONFIG)) - restart_bgp(duthost) - - -def restart_bgp(duthost, asic_index=DEFAULT_ASIC_ID): - """ - Restart bgp services on the DUT - - Args: - duthost: DUT host object - """ - duthost.asic_instance(asic_index).reset_service("bgp") - duthost.asic_instance(asic_index).restart_service("bgp") - docker_name = duthost.asic_instance(asic_index).get_docker_name("bgp") - pytest_assert(wait_until(100, 10, 0, duthost.is_service_fully_started, docker_name), "BGP not started.") - + duthost.docker_copy_to_all_asics('bgp', template_name, DEFAULT_BGP_CONFIG) + duthost.restart_service("bgp") + pytest_assert(wait_until(100, 10, 0, duthost.is_service_fully_started_per_asic_or_host, "bgp"), "BGP not started.") + pytest_assert(wait_until(100, 10, 0, duthost.is_service_fully_started_per_asic_or_host, "swss"), "SWSS not started.") def define_config(duthost, template_src_path, template_dst_path): """ @@ -58,7 +45,7 @@ def define_config(duthost, template_src_path, template_dst_path): """ duthost.shell("mkdir -p {}".format(DUT_TMP_DIR)) duthost.copy(src=template_src_path, dest=template_dst_path) - + def get_no_export_output(vm_host): """ @@ -81,12 +68,13 @@ def apply_default_bgp_config(duthost, copy=False): """ bgp_config_backup = os.path.join(DUT_TMP_DIR, BGP_CONFIG_BACKUP) if copy: - duthost.shell('docker cp {} {}'.format(DEFAULT_BGP_CONFIG, bgp_config_backup)) + duthost.docker_copy_from_asic('bgp', DEFAULT_BGP_CONFIG, bgp_config_backup) else: - duthost.shell('docker cp {} {}'.format(bgp_config_backup, DEFAULT_BGP_CONFIG)) + duthost.docker_copy_to_all_asics('bgp', bgp_config_backup, DEFAULT_BGP_CONFIG) # Skip 'start-limit-hit' threshold - duthost.shell('systemctl reset-failed bgp') - restart_bgp(duthost) + duthost.reset_service("bgp") + duthost.restart_service("bgp") + pytest_assert(wait_until(100, 10, 0, duthost.is_service_fully_started_per_asic_or_host, "bgp"), "BGP not started.") def parse_exabgp_dump(host): """ @@ -149,7 +137,8 @@ def remove_bgp_neighbors(duthost, asic_index): duthost.shell(cmd) # Restart BGP instance on that asic - restart_bgp(duthost, asic_index) + duthost.restart_service_on_asic("bgp", asic_index) + pytest_assert(wait_until(100, 10, 0, duthost.is_service_fully_started_per_asic_or_host, "bgp"), "BGP not started.") return bgp_neighbors @@ -166,4 +155,5 @@ def restore_bgp_neighbors(duthost, asic_index, bgp_neighbors): duthost.shell("sudo sonic-cfggen {} -a '{}' --write-to-db".format(namespace_prefix, bgp_neigh_json)) # Restart BGP instance on that asic - restart_bgp(duthost, asic_index) + duthost.restart_service_on_asic("bgp", asic_index) + pytest_assert(wait_until(100, 10, 0, duthost.is_service_fully_started_per_asic_or_host, "bgp"), "BGP not started.") diff --git a/tests/bgp/conftest.py b/tests/bgp/conftest.py index f932e6a1160..bdad783bfd0 100644 --- a/tests/bgp/conftest.py +++ b/tests/bgp/conftest.py @@ -64,7 +64,7 @@ def check_results(results): @pytest.fixture(scope='module') -def setup_bgp_graceful_restart(duthosts, rand_one_dut_hostname, nbrhosts, tbinfo): +def setup_bgp_graceful_restart(duthosts, rand_one_dut_hostname, nbrhosts, tbinfo, cct=24): duthost = duthosts[rand_one_dut_hostname] config_facts = duthost.config_facts(host=duthost.hostname, source="running")['ansible_facts'] @@ -132,7 +132,7 @@ def restore_nbr_gr(node=None, results=None): ) results[node['host'].hostname] = node_results - results = parallel_run(configure_nbr_gr, (), {}, nbrhosts.values(), timeout=120) + results = parallel_run(configure_nbr_gr, (), {}, nbrhosts.values(), timeout=120, concurrent_tasks=cct) check_results(results) @@ -150,12 +150,12 @@ def restore_nbr_gr(node=None, results=None): if not res: # Disable graceful restart in case of failure - parallel_run(restore_nbr_gr, (), {}, nbrhosts.values(), timeout=120) + parallel_run(restore_nbr_gr, (), {}, nbrhosts.values(), timeout=120, concurrent_tasks=cct) pytest.fail(err_msg) yield - results = parallel_run(restore_nbr_gr, (), {}, nbrhosts.values(), timeout=120) + results = parallel_run(restore_nbr_gr, (), {}, nbrhosts.values(), timeout=120, concurrent_tasks=cct) check_results(results) @@ -164,7 +164,7 @@ def restore_nbr_gr(node=None, results=None): @pytest.fixture(scope="module") -def setup_interfaces(duthosts, rand_one_dut_hostname, ptfhost, request, tbinfo): +def setup_interfaces(duthosts, rand_one_dut_hostname, ptfhost, request, tbinfo, topo_scenario): """Setup interfaces for the new BGP peers on PTF.""" def _is_ipv4_address(ip_addr): @@ -183,7 +183,7 @@ def _duthost_cleanup_ip(duthost, namespace, ip): intf_name = fields[-1] duthost.shell("config interface %s ip remove %s %s" % (namespace, intf_name, ip)) - ip_intfs = duthost.show_and_parse('show ip {} interface'.format(namespace)) + ip_intfs = duthost.show_and_parse('show ip interface {}'.format(namespace)) # For interface that has two IP configured, the output looks like: # admin@vlab-03:~$ show ip int @@ -256,7 +256,11 @@ def _setup_interfaces_dualtor(mg_facts, peer_count): { "local_intf": loopback_intf["name"], "local_addr": "%s/%s" % (loopback_intf_addr, loopback_intf_prefixlen), - "neighbor_intf": "eth%s" % mg_facts["minigraph_port_indices"][local_interface], + # Note: Config same subnets on PTF will generate two connect routes on PTF. + # This may lead different IPs has same FDB entry on DUT even they are on different + # interface and cause layer3 packet drop on PTF, so here same interface for different + # neighbor. + "neighbor_intf": "eth%s" % mg_facts["minigraph_port_indices"][local_interfaces[0]], "neighbor_addr": "%s/%s" % (mux_configs[local_interface]["server_ipv4"].split("/")[0], vlan_intf_prefixlen) } ) @@ -264,8 +268,8 @@ def _setup_interfaces_dualtor(mg_facts, peer_count): ptfhost.remove_ip_addresses() for conn in connections: - ptfhost.shell("ifconfig %s %s" % (conn["neighbor_intf"], - conn["neighbor_addr"])) + ptfhost.shell("ip address add %s dev %s" % (conn["neighbor_addr"], conn["neighbor_intf"])) + ptfhost.shell("ip route add %s via %s" % (loopback_intf_addr, vlan_intf_addr)) yield connections @@ -337,6 +341,7 @@ def _setup_interfaces_t1_or_t2(mg_facts, peer_count): used_subnets.add(ipaddress.ip_network(intf["subnet"])) ipv4_lag_interfaces = [] + asic_idx = 0 if mg_facts["minigraph_portchannel_interfaces"]: for pt in mg_facts["minigraph_portchannel_interfaces"]: if _is_ipv4_address(pt["addr"]): @@ -344,7 +349,17 @@ def _setup_interfaces_t1_or_t2(mg_facts, peer_count): # Only use LAG with 1 member for bgpmon session between PTF, # It's because exabgp on PTF is bind to single interface if len(pt_members) == 1: - ipv4_lag_interfaces.append(pt["attachto"]) + # If first time, we record the asic index + if not ipv4_lag_interfaces: + ipv4_lag_interfaces.append(pt["attachto"]) + asic_idx = duthost.get_asic_index_for_portchannel(pt["attachto"]) + # Not first time, only append the portchannel that belongs to the same asic in current list + else: + asic = duthost.get_asic_index_for_portchannel(pt["attachto"]) + if asic != asic_idx: + continue + else: + ipv4_lag_interfaces.append(pt["attachto"]) used_subnets.add(ipaddress.ip_network(pt["subnet"])) vlan_sub_interfaces = [] @@ -355,8 +370,9 @@ def _setup_interfaces_t1_or_t2(mg_facts, peer_count): used_subnets.add(ipaddress.ip_network(intf["subnet"])) subnet_prefixlen = list(used_subnets)[0].prefixlen - _subnets = ipaddress.ip_network(u"10.0.0.0/24").subnets(new_prefix=subnet_prefixlen) - subnets = (_ for _ in _subnets if _ not in used_subnets) + # Use a subnet which doesnt conflict with other subnets used in minigraph + subnets = ipaddress.ip_network(u"20.0.0.0/24").subnets(new_prefix=subnet_prefixlen) + loopback_ip = None for intf in mg_facts["minigraph_lo_interfaces"]: @@ -367,23 +383,30 @@ def _setup_interfaces_t1_or_t2(mg_facts, peer_count): pytest.fail("ipv4 lo interface not found") for intf, subnet in zip(random.sample(ipv4_interfaces + ipv4_lag_interfaces + vlan_sub_interfaces, peer_count), subnets): + def _get_namespace(minigraph_config, intf): + namespace = DEFAULT_NAMESPACE + if intf in minigraph_config and 'namespace' in minigraph_config[intf] and \ + minigraph_config[intf]['namespace']: + namespace = minigraph_config[intf]['namespace'] + return namespace conn = {} local_addr, neighbor_addr = [_ for _ in subnet][:2] conn["local_intf"] = "%s" % intf conn["local_addr"] = "%s/%s" % (local_addr, subnet_prefixlen) conn["neighbor_addr"] = "%s/%s" % (neighbor_addr, subnet_prefixlen) conn["loopback_ip"] = loopback_ip - conn["namespace"] = DEFAULT_NAMESPACE + conn["namespace"] = _get_namespace(mg_facts['minigraph_neighbors'], intf) + if intf.startswith("PortChannel"): member_intf = mg_facts["minigraph_portchannels"][intf]["members"][0] - conn["neighbor_intf"] = "eth%s" % mg_facts["minigraph_port_indices"][member_intf] - conn["namespace"] = mg_facts["minigraph_portchannels"][intf]["namespace"] + conn["neighbor_intf"] = "eth%s" % mg_facts["minigraph_ptf_indices"][member_intf] + conn["namespace"] = _get_namespace(mg_facts["minigraph_portchannels"], intf) elif constants.VLAN_SUB_INTERFACE_SEPARATOR in intf: orig_intf, vlan_id = intf.split(constants.VLAN_SUB_INTERFACE_SEPARATOR) ptf_port_index = str(mg_facts["minigraph_port_indices"][orig_intf]) conn["neighbor_intf"] = "eth" + ptf_port_index + constants.VLAN_SUB_INTERFACE_SEPARATOR + vlan_id else: - conn["neighbor_intf"] = "eth%s" % mg_facts["minigraph_port_indices"][intf] + conn["neighbor_intf"] = "eth%s" % mg_facts["minigraph_ptf_indices"][intf] connections.append(conn) ptfhost.remove_ip_addresses() # In case other case did not cleanup IP address configured on PTF interface @@ -428,10 +451,15 @@ def _setup_interfaces_t1_or_t2(mg_facts, peer_count): peer_count = getattr(request.module, "PEER_COUNT", 1) if "dualtor" in tbinfo["topo"]["name"]: setup_func = _setup_interfaces_dualtor - elif tbinfo["topo"]["type"] == "t0": + elif tbinfo["topo"]["type"] in ["t0"]: setup_func = _setup_interfaces_t0 elif tbinfo["topo"]["type"] in set(["t1", "t2"]): setup_func = _setup_interfaces_t1_or_t2 + elif tbinfo["topo"]["type"] == "m0": + if topo_scenario == "m0_l3_scenario": + setup_func = _setup_interfaces_t1_or_t2 + else: + setup_func = _setup_interfaces_t0 else: raise TypeError("Unsupported topology: %s" % tbinfo["topo"]["type"]) diff --git a/tests/bgp/templates/bgp_no_export.j2 b/tests/bgp/templates/bgp_no_export.j2 index 643db06165b..cbf4ae2dc24 100644 --- a/tests/bgp/templates/bgp_no_export.j2 +++ b/tests/bgp/templates/bgp_no_export.j2 @@ -20,7 +20,7 @@ enable password zebra ! bgp multiple-instance ! {% if 'LeafRouter' in DEVICE_METADATA['localhost']['type'] %} -route-map TO_TIER0_V4 permit 30 +route-map TO_TIER0_V4 permit 40 set community no-export additive ! {% endif %} diff --git a/tests/bgp/test_bgp_bbr.py b/tests/bgp/test_bgp_bbr.py index d82dd436cb2..d34679f043b 100644 --- a/tests/bgp/test_bgp_bbr.py +++ b/tests/bgp/test_bgp_bbr.py @@ -18,7 +18,7 @@ from tests.common.helpers.constants import DEFAULT_NAMESPACE from tests.common.helpers.parallel import reset_ansible_local_tmp from tests.common.helpers.parallel import parallel_run -from tests.common.utilities import wait_until +from tests.common.utilities import wait_until, delete_running_config pytestmark = [ @@ -48,6 +48,10 @@ def prepare_bbr_config_files(duthosts, rand_one_dut_hostname): duthost.copy(content=bgp_bbr_config.render(BGP_BBR_STATUS='disabled'), dest='/tmp/disable_bbr.json') duthost.copy(content=bgp_bbr_config.render(BGP_BBR_STATUS='enabled'), dest='/tmp/enable_bbr.json') + yield + + del_bbr_json = [{"BGP_BBR": {}}] + delete_running_config(del_bbr_json, duthost) @pytest.fixture(scope='module') def bbr_default_state(setup): @@ -111,9 +115,7 @@ def setup(duthosts, rand_one_dut_hostname, tbinfo, nbrhosts): mg_facts = duthost.get_extended_minigraph_facts(tbinfo) tor_neighbors = natsorted([neighbor for neighbor in nbrhosts.keys() if neighbor.endswith('T0')]) - t2_neighbors = [neighbor for neighbor in nbrhosts.keys() if neighbor.endswith('T2')] tor1 = tor_neighbors[0] - other_vms = tor_neighbors[1:] + t2_neighbors neigh_peer_map = defaultdict(dict) for bgp_neigh in mg_facts['minigraph_bgp']: @@ -129,7 +131,15 @@ def setup(duthosts, rand_one_dut_hostname, tbinfo, nbrhosts): if tor1 == neigh['name']: tor1_namespace = neigh['namespace'] break - + + # Modifying other_vms for multi-asic, check bgps on asic of tor1_namespace + other_vms = [] + for dut_port, neigh in mg_facts['minigraph_neighbors'].items(): + if neigh['name'] == tor1: + continue + if neigh['namespace'] == tor1_namespace: + other_vms.append(neigh['name']) + # Announce route to one of the T0 VM tor1_offset = tbinfo['topo']['properties']['topology']['VMs'][tor1]['vm_offset'] tor1_exabgp_port = EXABGP_BASE_PORT + tor1_offset @@ -246,7 +256,11 @@ def check_dut(duthost, other_vms, bgp_neighbors, setup, route, accepted=True): if 'advertisedTo' not in dut_route: logging.warn("DUT didn't advertise the route") return False - advertised_to = set([bgp_neighbors[_]['name'] for _ in dut_route['advertisedTo']]) + advertised_to = set() + for _ in dut_route['advertisedTo']: + # For multi-asic dut, dut_route included duthost which is not a BGP neighbor + if _ in bgp_neighbors.keys(): + advertised_to.add(bgp_neighbors[_]['name']) for vm in other_vms: if vm not in advertised_to: logging.warn("DUT didn't advertise route to neighbor %s" % vm) @@ -297,7 +311,7 @@ def check_other_vms(nbrhosts, setup, route, accepted=True, node=None, results=No # check DUT pytest_assert(wait_until(5, 1, 0, check_dut, duthost, other_vms, bgp_neighbors, setup, route, accepted=accepted), 'DUT check failed') - results = parallel_run(check_other_vms, (nbrhosts, setup, route), {'accepted': accepted}, other_vms, timeout=120) + results = parallel_run(check_other_vms, (nbrhosts, setup, route), {'accepted': accepted}, other_vms, timeout=120, concurrent_tasks=6) failed_results = {} for node, result in results.items(): diff --git a/tests/bgp/test_bgp_gr_helper.py b/tests/bgp/test_bgp_gr_helper.py index de8c1879b25..2409b080f51 100644 --- a/tests/bgp/test_bgp_gr_helper.py +++ b/tests/bgp/test_bgp_gr_helper.py @@ -17,7 +17,8 @@ logger = logging.getLogger(__name__) -def test_bgp_gr_helper_routes_perserved(duthosts, rand_one_dut_hostname, nbrhosts, setup_bgp_graceful_restart, tbinfo): +def test_bgp_gr_helper_routes_perserved(duthosts, rand_one_dut_hostname, nbrhosts, + setup_bgp_graceful_restart, tbinfo, cct=8): """Verify that routes received from one neighbor are all preserved during peer graceful restart.""" def _find_test_bgp_neighbors(test_neighbor_name, bgp_neighbors): diff --git a/tests/bgp/test_bgp_slb.py b/tests/bgp/test_bgp_slb.py index f4d408fd90b..f468037c4cb 100644 --- a/tests/bgp/test_bgp_slb.py +++ b/tests/bgp/test_bgp_slb.py @@ -2,9 +2,8 @@ from tests.common import reboot from tests.common.helpers.bgp import BGPNeighbor -from tests.common.dualtor.mux_simulator_control import mux_server_url # lgtm[py/unused-import] -from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_rand_selected_tor # lgtm[py/unused-import] -from tests.common.utilities import wait_until +from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_rand_selected_tor # noqa F401 +from tests.common.utilities import wait_until, delete_running_config pytestmark = [ @@ -16,6 +15,13 @@ NEIGHBOR_EXABGP_PORT = 11000 +@pytest.fixture(scope="module", autouse=True) +def skip_on_backend(tbinfo): + """Skip over storage backend topologies.""" + if "backend" in tbinfo["topo"]["name"]: + pytest.skip("Skipping test_bgp_slb, unsupported topology %s." % tbinfo["topo"]["name"]) + + @pytest.fixture(params=["warm", "fast"]) def reboot_type(request): return request.param @@ -58,6 +64,7 @@ def bgp_slb_neighbor(duthosts, rand_one_dut_hostname, setup_interfaces, ptfhost, return bgp_neighbor +@pytest.mark.disable_loganalyzer def test_bgp_slb_neighbor_persistence_across_advanced_reboot( duthosts, rand_one_dut_hostname, bgp_slb_neighbor, toggle_all_simulator_ports_to_rand_selected_tor, reboot_type, localhost @@ -80,3 +87,5 @@ def verify_bgp_session(duthost, bgp_neighbor): pytest.fail("dynamic BGP session is not established after %s" % reboot_type) finally: neighbor.stop_session() + delete_slb_json = [{"WARM_RESTART": {}}] + delete_running_config(delete_slb_json, duthost) diff --git a/tests/bgp/test_bgp_speaker.py b/tests/bgp/test_bgp_speaker.py index 88cd76fd2d3..25681e40256 100644 --- a/tests/bgp/test_bgp_speaker.py +++ b/tests/bgp/test_bgp_speaker.py @@ -18,7 +18,7 @@ pytestmark = [ - pytest.mark.topology('t0'), + pytest.mark.topology('t0', 'm0'), pytest.mark.device_type('vs') ] @@ -257,6 +257,7 @@ def bgp_speaker_announce_routes_common(common_setup_teardown, """ ptfip, mg_facts, interface_facts, vlan_ips, _, vlan_if_name, speaker_ips, port_num, http_ready = common_setup_teardown assert http_ready + asic_type = duthost.facts["asic_type"] logger.info("announce route") peer_range = mg_facts['minigraph_bgp_peers_with_range'][0]['ip_range'][0] @@ -318,6 +319,7 @@ def bgp_speaker_announce_routes_common(common_setup_teardown, "ipv4": ipv4, "ipv6": ipv6, "testbed_mtu": mtu, + "asic_type": asic_type, "test_balancing": False}, log_file="/tmp/bgp_speaker_test.FibTest.log", socket_recv_size=16384) diff --git a/tests/bgp/test_bgp_update_timer.py b/tests/bgp/test_bgp_update_timer.py index cab541b7895..85ce793646d 100644 --- a/tests/bgp/test_bgp_update_timer.py +++ b/tests/bgp/test_bgp_update_timer.py @@ -9,11 +9,11 @@ from scapy.all import sniff, IP from scapy.contrib import bgp from tests.common.helpers.bgp import BGPNeighbor +from tests.common.utilities import wait_until - -from tests.common.dualtor.mux_simulator_control import mux_server_url -from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_rand_selected_tor_m - +from tests.common.helpers.assertions import pytest_assert +from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_rand_selected_tor +from tests.common.helpers.constants import DEFAULT_NAMESPACE pytestmark = [ pytest.mark.topology("any"), @@ -32,10 +32,19 @@ NEIGHBOR_ASN1 = 61001 NEIGHBOR_PORT0 = 11000 NEIGHBOR_PORT1 = 11001 +WAIT_TIMEOUT = 120 +TCPDUMP_WAIT_TIMEOUT = 20 + + +def is_tcpdump_running(duthost, cmd): + check_cmd = "ps u -C tcpdump | grep '%s'" % cmd + if cmd in duthost.shell(check_cmd)['stdout']: + return True + return False @contextlib.contextmanager -def log_bgp_updates(duthost, iface, save_path): +def log_bgp_updates(duthost, iface, save_path, ns): """Capture bgp packets to file.""" if iface == "any": # Scapy doesn't support LINUX_SLL2 (Linux cooked v2), and tcpdump on Bullseye @@ -44,9 +53,16 @@ def log_bgp_updates(duthost, iface, save_path): start_pcap = "tcpdump -y LINUX_SLL -i %s -w %s port 179" % (iface, save_path) else: start_pcap = "tcpdump -i %s -w %s port 179" % (iface, save_path) - stop_pcap = "pkill -f '%s'" % start_pcap - start_pcap = "nohup %s &" % start_pcap - duthost.shell(start_pcap) + # for multi-asic dut, add 'ip netns exec asicx' to the beggining of tcpdump cmd + stop_pcap = "sudo pkill -f '%s%s'" % (duthost.asic_instance_from_namespace(ns).ns_arg, start_pcap) + start_pcap_cmd = "nohup {}{} &".format(duthost.asic_instance_from_namespace(ns).ns_arg, start_pcap) + duthost.shell(start_pcap_cmd) + # wait until tcpdump process created + if not wait_until(WAIT_TIMEOUT, 5, 1, lambda: is_tcpdump_running(duthost, start_pcap),): + pytest.fail("Could not start tcpdump") + # sleep and wait for tcpdump ready to sniff packets + time.sleep(TCPDUMP_WAIT_TIMEOUT) + try: yield finally: @@ -57,7 +73,7 @@ def log_bgp_updates(duthost, iface, save_path): def is_quagga(duthosts, rand_one_dut_hostname): """Return True if current bgp is using Quagga.""" duthost = duthosts[rand_one_dut_hostname] - show_res = duthost.shell("vtysh -c 'show version'") + show_res = duthost.asic_instance().run_vtysh("-c 'show version'") return "Quagga" in show_res["stdout"] @@ -67,10 +83,14 @@ def is_dualtor(tbinfo): @pytest.fixture -def common_setup_teardown(duthosts, rand_one_dut_hostname, is_dualtor, is_quagga, ptfhost, setup_interfaces): +def common_setup_teardown(duthosts, rand_one_dut_hostname, is_dualtor, is_quagga, ptfhost, setup_interfaces, tbinfo): duthost = duthosts[rand_one_dut_hostname] - mg_facts = duthost.minigraph_facts(host=duthost.hostname)["ansible_facts"] + mg_facts = duthost.get_extended_minigraph_facts(tbinfo) conn0, conn1 = setup_interfaces + conn0_ns = DEFAULT_NAMESPACE if "namespace" not in conn0.keys() else conn0["namespace"] + conn1_ns = DEFAULT_NAMESPACE if "namespace" not in conn1.keys() else conn1["namespace"] + pytest_assert(conn0_ns == conn1_ns, "Test fail for conn0 on {} and conn1 on {} started on different asics!".format(conn0_ns, conn1_ns)) + dut_asn = mg_facts["minigraph_bgp_asn"] dut_type = '' @@ -94,7 +114,9 @@ def common_setup_teardown(duthosts, rand_one_dut_hostname, is_dualtor, is_quagga dut_asn, NEIGHBOR_PORT0, neigh_type, - is_multihop=is_quagga or is_dualtor + conn0_ns, + is_multihop=is_quagga or is_dualtor, + is_passive=False ), BGPNeighbor( duthost, @@ -106,7 +128,9 @@ def common_setup_teardown(duthosts, rand_one_dut_hostname, is_dualtor, is_quagga dut_asn, NEIGHBOR_PORT1, neigh_type, - is_multihop=is_quagga or is_dualtor + conn1_ns, + is_multihop=is_quagga or is_dualtor, + is_passive=False ) ) @@ -135,9 +159,24 @@ class _C(object): ) return _constants +def is_neighbor_sessions_established(duthost, neighbors): + is_established = True + + # handle both multi-sic and single-asic + bgp_facts = duthost.bgp_facts(num_npus=duthost.sonichost.num_asics())[ + "ansible_facts" + ] + for neighbor in neighbors: + is_established &= ( + neighbor.ip in bgp_facts["bgp_neighbors"] + and bgp_facts["bgp_neighbors"][neighbor.ip]["state"] == "established" + ) + + return is_established + def test_bgp_update_timer(common_setup_teardown, constants, duthosts, rand_one_dut_hostname, - toggle_all_simulator_ports_to_rand_selected_tor_m): + toggle_all_simulator_ports_to_rand_selected_tor): def bgp_update_packets(pcap_file): """Get bgp update packets from pcap file.""" @@ -197,21 +236,26 @@ def match_bgp_update(packet, src_ip, dst_ip, action, route): n0.start_session() n1.start_session() - # sleep till new sessions are steady - time.sleep(30) + # ensure new sessions are ready + if not wait_until( + WAIT_TIMEOUT, + 5, + 20, + lambda: is_neighbor_sessions_established(duthost, (n0, n1)), + ): + pytest.fail("Could not establish bgp sessions") # ensure new sessions are ready - bgp_facts = duthost.bgp_facts()["ansible_facts"] + # handle both multi-sic and single-asic + bgp_facts = duthost.bgp_facts(num_npus=duthost.sonichost.num_asics())["ansible_facts"] assert n0.ip in bgp_facts["bgp_neighbors"] assert n1.ip in bgp_facts["bgp_neighbors"] - assert bgp_facts["bgp_neighbors"][n0.ip]["state"] == "established" - assert bgp_facts["bgp_neighbors"][n1.ip]["state"] == "established" announce_intervals = [] withdraw_intervals = [] for i, route in enumerate(constants.routes): bgp_pcap = BGP_LOG_TMPL % i - with log_bgp_updates(duthost, "any", bgp_pcap): + with log_bgp_updates(duthost, "any", bgp_pcap, n0.namespace): n0.announce_route(route) time.sleep(constants.sleep_interval) n0.withdraw_route(route) diff --git a/tests/bgp/test_traffic_shift.py b/tests/bgp/test_traffic_shift.py index c0db6e93cc1..05f0122dc63 100644 --- a/tests/bgp/test_traffic_shift.py +++ b/tests/bgp/test_traffic_shift.py @@ -201,21 +201,8 @@ def test_TSA_B_C_with_no_neighbors(duthost, bgpmon_setup_teardown): # Remove the Neighbors for the particular BGP instance bgp_neighbors = remove_bgp_neighbors(duthost, asic_index) - # Issue TSA on DUT - output = duthost.shell("TSA")['stdout_lines'] - - # Set the DUT in maintenance state - # Verify ASIC0 has no neighbors message. - pytest_assert(verify_traffic_shift_per_asic(duthost, output, TS_NO_NEIGHBORS, asic_index), "ASIC is not having no neighbors") - - # Recover to Normal state - duthost.shell("TSB")['stdout_lines'] - - # Verify DUT is in Normal state, and ASIC0 has no neighbors message. - pytest_assert(verify_traffic_shift_per_asic(duthost, output, TS_NO_NEIGHBORS, asic_index), "ASIC is not having no neighbors") - # Check the traffic state - duthost.shell("TSC")['stdout_lines'] + output = duthost.shell("TSC")['stdout_lines'] # Verify DUT is in Normal state, and ASIC0 has no neighbors message. pytest_assert(verify_traffic_shift_per_asic(duthost, output, TS_NO_NEIGHBORS, asic_index), "ASIC is not having no neighbors") diff --git a/tests/cacl/test_cacl_application.py b/tests/cacl/test_cacl_application.py index 8fa6b6a2ec2..aaa4ad4b444 100644 --- a/tests/cacl/test_cacl_application.py +++ b/tests/cacl/test_cacl_application.py @@ -5,7 +5,8 @@ from tests.common.config_reload import config_reload from tests.common.utilities import wait_until - +from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_upper_tor # lgtm[py/unused-import] +from tests.common.dualtor.dual_tor_utils import upper_tor_host, lower_tor_host # lgtm[py/unused-import] from tests.common.helpers.assertions import pytest_assert logger = logging.getLogger(__name__) @@ -17,7 +18,6 @@ ignored_iptable_rules = [] - @pytest.fixture(scope="module", autouse=True) def ignore_hardcoded_cacl_rule_on_dualtor(tbinfo): global ignored_iptable_rules @@ -30,6 +30,35 @@ def ignore_hardcoded_cacl_rule_on_dualtor(tbinfo): ] ignored_iptable_rules += rules_to_ignore +@pytest.fixture(scope="function", params=["active_tor", "standby_tor"]) +def duthost_dualtor(request, upper_tor_host, lower_tor_host): + which_tor = request.param + + # Add expected DHCP mark iptable rules for standby tor, not for active tor. + if which_tor == 'standby_tor': + dut = lower_tor_host + logger.info("Select lower tor...") + else: + logger.info("Select upper tor...") + dut = upper_tor_host + return dut + +@pytest.fixture +def expected_dhcp_rules_for_standby(duthost_dualtor): + expected_dhcp_rules = [] + mux_cable_int_keys = duthost_dualtor.shell('/usr/bin/redis-cli -n 6 --raw keys "MUX_CABLE_TABLE*"', module_ignore_errors=True)['stdout'] + mux_cable_int_keys = mux_cable_int_keys.split("\n") + for mux_cable_int in mux_cable_int_keys: + interface_name = mux_cable_int.split("|")[1] + mux_status = duthost_dualtor.shell('/usr/bin/redis-cli -n 6 --raw hget "{}" "state"'.format(mux_cable_int), module_ignore_errors=False)['stdout'] + if not mux_status: + continue + if mux_status == 'standby': + mark = duthost_dualtor.shell('/usr/bin/redis-cli -n 6 --raw hget "DHCP_PACKET_MARK|{}" "mark"'.format(interface_name), module_ignore_errors=False)['stdout'] + rule = "-A DHCP -m mark --mark {} -j DROP".format(mark) + expected_dhcp_rules.append(rule) + logger.info("Generated expected dhcp rules for standby interfaces: {}".format(expected_dhcp_rules)) + return expected_dhcp_rules @pytest.fixture(scope="module") def docker_network(duthost): @@ -106,7 +135,7 @@ def clean_scale_rules(duthosts, rand_one_dut_hostname, collect_ignored_rules): # delete the tmp file duthost.file(path=SCALE_ACL_FILE, state='absent') logger.info("Reload config to recover configuration.") - config_reload(duthost) + config_reload(duthost, safe_reload=True) def is_acl_rule_empty(duthost): """ @@ -299,7 +328,7 @@ def generate_and_append_block_ip2me_traffic_rules(duthost, iptables_rules, ip6ta pytest.fail("Unrecognized IP address type on interface '{}': {}".format(iface_name, ip_ntwrk)) -def generate_expected_rules(duthost, docker_network, asic_index): +def generate_expected_rules(duthost, tbinfo, docker_network, asic_index, expected_dhcp_rules_for_standby): iptables_rules = [] ip6tables_rules = [] @@ -364,6 +393,12 @@ def generate_expected_rules(duthost, docker_network, asic_index): iptables_rules.append("-A INPUT -p udp -m udp --dport 546:547 -j ACCEPT") ip6tables_rules.append("-A INPUT -p udp -m udp --dport 546:547 -j ACCEPT") + # On standby tor, it has expected dhcp mark iptables rules. + if expected_dhcp_rules_for_standby: + pytest_assert(isinstance(expected_dhcp_rules_for_standby, list), + "expected_dhcp_rules_for_standby should be list! current type is {}".format(type(expected_dhcp_rules_for_standby))) + iptables_rules.extend(expected_dhcp_rules_for_standby) + # Allow all incoming BGP traffic iptables_rules.append("-A INPUT -p tcp -m tcp --dport 179 -j ACCEPT") ip6tables_rules.append("-A INPUT -p tcp -m tcp --dport 179 -j ACCEPT") @@ -371,6 +406,18 @@ def generate_expected_rules(duthost, docker_network, asic_index): iptables_rules.append("-A INPUT -p tcp -m tcp --sport 179 -j ACCEPT") ip6tables_rules.append("-A INPUT -p tcp -m tcp --sport 179 -j ACCEPT") + # Allow LDP traffic + if ("wan" in tbinfo['topo']['name']): + wan_default_rules = [ + "-A INPUT -p tcp -m tcp --dport 646 -j ACCEPT", + "-A INPUT -p tcp -m tcp --sport 646 -j ACCEPT", + "-A INPUT -p udp -m udp --dport 646 -j ACCEPT", + "-A INPUT -p udp -m udp --sport 646 -j ACCEPT", + "-A INPUT -p tcp -m tcp --sport 179 -j ACCEPT" + ] + iptables_rules += wan_default_rules + ip6tables_rules += wan_default_rules + # Generate control plane rules from device config rules_applied_from_config = 0 @@ -682,16 +729,14 @@ def generate_scale_rules(duthost, ip_type): # to call check_iptable_rules every 10s to keep ssh session alive, it just calls # duthost.command to active ssh connection. # In this way, we can active ssh connection and wait as long as we want. - if duthost.is_multi_asic: - # For multi-asic, it has to wait enough long - wait_until(200, 10, 2, check_iptable_rules, duthost) - else: - wait_until(30, 10, 2, check_iptable_rules, duthost) + + # It has to wait cacl rules to be effective. + wait_until(200, 10, 2, check_iptable_rules, duthost) # add ACCEPT rule for SSH to make sure testbed access duthost.command("iptables -I INPUT 3 -p tcp -m tcp --dport 22 -j ACCEPT") -def verify_cacl(duthost, localhost, creds, docker_network, asic_index = None): - expected_iptables_rules, expected_ip6tables_rules = generate_expected_rules(duthost, docker_network, asic_index) +def verify_cacl(duthost, tbinfo, localhost, creds, docker_network, expected_dhcp_rules_for_standby = None, asic_index = None): + expected_iptables_rules, expected_ip6tables_rules = generate_expected_rules(duthost, tbinfo, docker_network, asic_index, expected_dhcp_rules_for_standby) stdout = duthost.get_asic_or_sonic_host(asic_index).command("iptables -S")["stdout"] actual_iptables_rules = stdout.strip().split("\n") @@ -758,7 +803,7 @@ def verify_nat_cacl(duthost, localhost, creds, docker_network, asic_index): unexpected_ip6tables_rules = set(actual_ip6tables_rules) - set(expected_ip6tables_rules) pytest_assert(len(unexpected_ip6tables_rules) == 0, "Unexpected ip6tables nat rules: {}".format(repr(unexpected_ip6tables_rules))) -def test_cacl_application(duthosts, rand_one_dut_hostname, localhost, creds, docker_network): +def test_cacl_application_nondualtor(duthosts, tbinfo, rand_one_dut_hostname, localhost, creds, docker_network): """ Test case to ensure caclmgrd is applying control plane ACLs properly @@ -767,15 +812,24 @@ def test_cacl_application(duthosts, rand_one_dut_hostname, localhost, creds, doc actual iptables/ip6tables rules on the DuT. """ duthost = duthosts[rand_one_dut_hostname] - verify_cacl(duthost, localhost, creds, docker_network) + verify_cacl(duthost, tbinfo, localhost, creds, docker_network) -def test_multiasic_cacl_application(duthosts, rand_one_dut_hostname, localhost, creds,docker_network, enum_frontend_asic_index): +def test_cacl_application_dualtor(duthost_dualtor, tbinfo, localhost, creds, docker_network, expected_dhcp_rules_for_standby): + """ + Test case to ensure caclmgrd is applying control plane ACLs properly on dualtor. - if enum_frontend_asic_index is None: - pytest.skip("Not Multi-asic platform. Skipping !!") + This is done by generating our own set of expected iptables and ip6tables + rules based on the DuT's configuration and comparing them against the + actual iptables/ip6tables rules on the DuT. + """ + verify_cacl(duthost_dualtor, tbinfo, localhost, creds, docker_network, expected_dhcp_rules_for_standby) +def test_multiasic_cacl_application(duthosts, tbinfo, rand_one_dut_hostname, localhost, creds, docker_network, enum_frontend_asic_index): + """ + Test case to ensure caclmgrd is applying control plane ACLs properly on multi-ASIC platform. + """ duthost = duthosts[rand_one_dut_hostname] - verify_cacl(duthost, localhost, creds, docker_network, enum_frontend_asic_index) + verify_cacl(duthost, tbinfo, localhost, creds, docker_network, None, enum_frontend_asic_index) verify_nat_cacl(duthost, localhost, creds, docker_network, enum_frontend_asic_index) def test_cacl_scale_rules_ipv4(duthosts, rand_one_dut_hostname, collect_ignored_rules, clean_scale_rules): diff --git a/tests/cacl/test_cacl_function.py b/tests/cacl/test_cacl_function.py index d03e111ee7c..cff82543c08 100644 --- a/tests/cacl/test_cacl_function.py +++ b/tests/cacl/test_cacl_function.py @@ -2,6 +2,7 @@ import logging from tests.common.helpers.assertions import pytest_assert from tests.common.helpers.snmp_helpers import get_snmp_facts +from tests.common.utilities import get_data_acl, recover_acl_rule try: import ntplib @@ -25,6 +26,7 @@ def test_cacl_function(duthosts, rand_one_dut_hostname, localhost, creds): """Test control plane ACL functionality on a SONiC device""" duthost = duthosts[rand_one_dut_hostname] + data_acl = get_data_acl(duthost) dut_mgmt_ip = duthost.mgmt_ip # Start an NTP client @@ -48,76 +50,79 @@ def test_cacl_function(duthosts, rand_one_dut_hostname, localhost, creds): ntp_client.request(dut_mgmt_ip) except ntplib.NTPException: pytest.fail("NTP did timed out when expected to succeed!") - - # Copy config_service_acls.sh to the DuT (this also implicitly verifies we can successfully SSH to the DuT) - duthost.copy(src="scripts/config_service_acls.sh", dest="/tmp/config_service_acls.sh", mode="0755") - - # We run the config_service_acls.sh script in the background because it - # will install ACL rules which will only allow control plane traffic - # to an unused IP range. Thus, if it works properly, it will sever our - # SSH session, but we don't want the script itself to get killed, - # because it is also responsible for resetting the control plane ACLs - # back to their previous, working state - duthost.shell("nohup /tmp/config_service_acls.sh < /dev/null > /dev/null 2>&1 &") - - # Wait until we are unable to SSH into the DuT - res = localhost.wait_for(host=dut_mgmt_ip, - port=SONIC_SSH_PORT, - state='stopped', - search_regex=SONIC_SSH_REGEX, - delay=30, - timeout=40, - module_ignore_errors=True) - - pytest_assert(not res.is_failed, "SSH port did not stop. {}".format(res.get('msg', ''))) - - # Try to SSH back into the DuT, it should time out - res = localhost.wait_for(host=dut_mgmt_ip, - port=SONIC_SSH_PORT, - state='started', - search_regex=SONIC_SSH_REGEX, - delay=0, - timeout=10, - module_ignore_errors=True) - - pytest_assert(res.is_failed, "SSH did not timeout when expected. {}".format(res.get('msg', ''))) - - # Ensure we CANNOT gather basic SNMP facts from the device - res = get_snmp_facts(localhost, host=dut_mgmt_ip, version='v2c', community=creds['snmp_rocommunity'], - module_ignore_errors=True) - - pytest_assert('ansible_facts' not in res and "No SNMP response received before timeout" in res.get('msg', '')) - - # Ensure we cannot send an NTP request to the DUT - if NTPLIB_INSTALLED: - try: - ntp_client.request(dut_mgmt_ip) - pytest.fail("NTP did not time out when expected") - except ntplib.NTPException: - pass - - # Wait until the original service ACLs are reinstated and the SSH port on the - # DUT is open to us once again. Note that the timeout here should be set sufficiently - # long enough to allow config_service_acls.sh to reset the ACLs to their original - # configuration. - res = localhost.wait_for(host=dut_mgmt_ip, - port=SONIC_SSH_PORT, - state='started', - search_regex=SONIC_SSH_REGEX, - delay=0, - timeout=90, + try: + # Copy config_service_acls.sh to the DuT (this also implicitly verifies we can successfully SSH to the DuT) + duthost.copy(src="scripts/config_service_acls.sh", dest="/tmp/config_service_acls.sh", mode="0755") + + # We run the config_service_acls.sh script in the background because it + # will install ACL rules which will only allow control plane traffic + # to an unused IP range. Thus, if it works properly, it will sever our + # SSH session, but we don't want the script itself to get killed, + # because it is also responsible for resetting the control plane ACLs + # back to their previous, working state + duthost.shell("nohup /tmp/config_service_acls.sh < /dev/null > /dev/null 2>&1 &") + + # Wait until we are unable to SSH into the DuT + res = localhost.wait_for(host=dut_mgmt_ip, + port=SONIC_SSH_PORT, + state='stopped', + search_regex=SONIC_SSH_REGEX, + delay=30, + timeout=40, + module_ignore_errors=True) + + pytest_assert(not res.is_failed, "SSH port did not stop. {}".format(res.get('msg', ''))) + + # Try to SSH back into the DuT, it should time out + res = localhost.wait_for(host=dut_mgmt_ip, + port=SONIC_SSH_PORT, + state='started', + search_regex=SONIC_SSH_REGEX, + delay=0, + timeout=10, + module_ignore_errors=True) + + pytest_assert(res.is_failed, "SSH did not timeout when expected. {}".format(res.get('msg', ''))) + + # Ensure we CANNOT gather basic SNMP facts from the device + res = get_snmp_facts(localhost, host=dut_mgmt_ip, version='v2c', community=creds['snmp_rocommunity'], module_ignore_errors=True) - pytest_assert(not res.is_failed, "SSH did not start working when expected. {}".format(res.get('msg', ''))) - - # Delete config_service_acls.sh from the DuT - duthost.file(path="/tmp/config_service_acls.sh", state="absent") - - # Ensure we can gather basic SNMP facts from the device once again. Should fail on timeout - get_snmp_facts(localhost, - host=dut_mgmt_ip, - version="v2c", - community=creds['snmp_rocommunity'], - wait=True, - timeout = 20, - interval=20) + pytest_assert('ansible_facts' not in res and "No SNMP response received before timeout" in res.get('msg', '')) + + # Ensure we cannot send an NTP request to the DUT + if NTPLIB_INSTALLED: + try: + ntp_client.request(dut_mgmt_ip) + pytest.fail("NTP did not time out when expected") + except ntplib.NTPException: + pass + + # Wait until the original service ACLs are reinstated and the SSH port on the + # DUT is open to us once again. Note that the timeout here should be set sufficiently + # long enough to allow config_service_acls.sh to reset the ACLs to their original + # configuration. + res = localhost.wait_for(host=dut_mgmt_ip, + port=SONIC_SSH_PORT, + state='started', + search_regex=SONIC_SSH_REGEX, + delay=0, + timeout=90, + module_ignore_errors=True) + + pytest_assert(not res.is_failed, "SSH did not start working when expected. {}".format(res.get('msg', ''))) + + # Delete config_service_acls.sh from the DuT + duthost.file(path="/tmp/config_service_acls.sh", state="absent") + + # Ensure we can gather basic SNMP facts from the device once again. Should fail on timeout + get_snmp_facts(localhost, + host=dut_mgmt_ip, + version="v2c", + community=creds['snmp_rocommunity'], + wait=True, + timeout = 20, + interval=20) + finally: + if data_acl: + recover_acl_rule(duthost, data_acl) diff --git a/tests/cacl/test_ebtables_application.py b/tests/cacl/test_ebtables_application.py index 8a588843b52..3825f9a973e 100644 --- a/tests/cacl/test_ebtables_application.py +++ b/tests/cacl/test_ebtables_application.py @@ -1,3 +1,4 @@ +import logging import pytest from tests.common.helpers.assertions import pytest_assert diff --git a/tests/common/cisco_data.py b/tests/common/cisco_data.py index a9b6e6f9bda..f70ee742a42 100644 --- a/tests/common/cisco_data.py +++ b/tests/common/cisco_data.py @@ -1,2 +1,61 @@ +import json +import re +from tests.common.reboot import reboot + def is_cisco_device(dut): return dut.facts["asic_type"] == "cisco-8000" + +def get_markings_config_file(duthost): + """ + Get the config file where the ECN markings are enabled or disabled. + """ + platform = duthost.facts['platform'] + if platform != 'x86_64-8102_64h_o-r0': + raise RuntimeError("This is applicable only to cisco platforms.") + + hwsku = duthost.facts['hwsku'] + match = re.search("\-([^-_]+)_", platform) + if match: + model = match.group(1) + else: + raise RuntimeError("Couldn't get the model from platform:{}".format(platform)) + config_file = "/usr/share/sonic/device/{}/{}/{}.json".format(platform, hwsku, model) + return config_file + +def get_markings_dut(duthost, key_list=['ecn_dequeue_marking', 'ecn_latency_marking', 'voq_allocation_mode']): + """ + Get the ecn marking values from the duthost. + """ + config_file = get_markings_config_file(duthost) + dest_file = "/tmp/" + contents = duthost.fetch(src=config_file, dest = dest_file) + local_file = contents['dest'] + with open(local_file) as fd: + json_contents = json.load(fd) + markings_dict = {} + # Getting markings from first device. + device = json_contents['devices'][0] + for key in key_list: + markings_dict[key] = device['device_property'][key] + return markings_dict + +def setup_markings_dut(duthost, localhost, **kwargs): + """ + Setup dequeue or latency depending on arguments. + Applicable to cisco-8000 Platforms only. + """ + config_file = get_markings_config_file(duthost) + dest_file = "/tmp/" + contents = duthost.fetch(src=config_file, dest = dest_file) + local_file = contents['dest'] + with open(local_file) as fd: + json_contents = json.load(fd) + reboot_required = False + for device in json_contents['devices']: + for k,v in kwargs.iteritems(): + if device['device_property'][k] != v: + reboot_required = True + device['device_property'][k] = v + if reboot_required: + duthost.copy(content=json.dumps(json_contents, sort_keys=True, indent=4), dest=config_file) + reboot(duthost, localhost) diff --git a/tests/common/config_reload.py b/tests/common/config_reload.py index 85b7e725926..f68afd7ce09 100644 --- a/tests/common/config_reload.py +++ b/tests/common/config_reload.py @@ -1,9 +1,15 @@ import time import logging +from tests.common.helpers.assertions import pytest_assert +from tests.common.plugins.loganalyzer.utils import ignore_loganalyzer +from tests.common.platform.processes_utils import wait_critical_processes +from tests.common.utilities import wait_until +from tests.configlet.util.common import chk_for_pfc_wd + logger = logging.getLogger(__name__) -config_sources = ['config_db', 'minigraph'] +config_sources = ['config_db', 'minigraph', 'running_golden_config'] def config_system_checks_passed(duthost): logging.info("Checking if system is running") @@ -36,11 +42,13 @@ def config_force_option_supported(duthost): return True return False -def config_reload(duthost, config_source='config_db', wait=120, start_bgp=True, start_dynamic_buffer=True): + +@ignore_loganalyzer +def config_reload(duthost, config_source='config_db', wait=120, start_bgp=True, start_dynamic_buffer=True, safe_reload=False): """ reload SONiC configuration :param duthost: DUT host object - :param config_source: configuration source either 'config_db' or 'minigraph' + :param config_source: configuration source is 'config_db', 'minigraph' or 'running_golden_config' :param wait: wait timeout for DUT to initialize after configuration reload :return: """ @@ -51,10 +59,6 @@ def config_reload(duthost, config_source='config_db', wait=120, start_bgp=True, ' or '.join(['"{}"'.format(src) for src in config_sources]) )) - cmd = 'config reload -y &>/dev/null' - if config_force_option_supported(duthost): - cmd = 'config reload -y -f &>/dev/null' - logger.info('reloading {}'.format(config_source)) if config_source == 'minigraph': @@ -71,10 +75,31 @@ def config_reload(duthost, config_source='config_db', wait=120, start_bgp=True, duthost.shell('enable-dynamic-buffer.py') duthost.shell('config save -y') - if config_source == 'config_db': + elif config_source == 'config_db': + cmd = 'config reload -y &>/dev/null' + if config_force_option_supported(duthost): + cmd = 'config reload -y -f &>/dev/null' + duthost.shell(cmd, executable="/bin/bash") + + elif config_source == 'running_golden_config': + cmd = 'config reload -y -l /etc/sonic/running_golden_config.json &>/dev/null' + if config_force_option_supported(duthost): + cmd = 'config reload -y -f -l /etc/sonic/running_golden_config.json &>/dev/null' duthost.shell(cmd, executable="/bin/bash") modular_chassis = duthost.get_facts().get("modular_chassis") wait = max(wait, 240) if modular_chassis else wait - time.sleep(wait) + if safe_reload: + # The wait time passed in might not be guaranteed to cover the actual + # time it takes for containers to come back up. Therefore, add 5 + # minutes to the maximum wait time. If it's ready sooner, then the + # function will return sooner. + pytest_assert(wait_until(wait + 300, 20, 0, duthost.critical_services_fully_started), + "All critical services should be fully started!") + wait_critical_processes(duthost) + if config_source == 'minigraph': + pytest_assert(wait_until(300, 20, 0, chk_for_pfc_wd, duthost), + "PFC_WD is missing in CONFIG-DB") + else: + time.sleep(wait) diff --git a/tests/common/devices/fanout.py b/tests/common/devices/fanout.py index 4acfe0fecca..c61c7909788 100644 --- a/tests/common/devices/fanout.py +++ b/tests/common/devices/fanout.py @@ -16,16 +16,17 @@ class FanoutHost(object): For running ansible module on the Fanout switch """ - def __init__(self, ansible_adhoc, os, hostname, device_type, user, passwd, shell_user=None, shell_passwd=None): + def __init__(self, ansible_adhoc, os, hostname, device_type, user, passwd, eos_shell_user=None, eos_shell_passwd=None): self.hostname = hostname self.type = device_type self.host_to_fanout_port_map = {} self.fanout_to_host_port_map = {} if os == 'sonic': self.os = os + self.fanout_port_alias_to_name = {} self.host = SonicHost(ansible_adhoc, hostname, - shell_user=shell_user, - shell_passwd=shell_passwd) + ssh_user=user, + ssh_passwd=passwd) elif os == 'onyx': self.os = os self.host = OnyxHost(ansible_adhoc, hostname, user, passwd) @@ -39,7 +40,7 @@ def __init__(self, ansible_adhoc, os, hostname, device_type, user, passwd, shell else: # Use eos host if the os type is unknown self.os = 'eos' - self.host = EosHost(ansible_adhoc, hostname, user, passwd, shell_user=shell_user, shell_passwd=shell_passwd) + self.host = EosHost(ansible_adhoc, hostname, user, passwd, shell_user=eos_shell_user, shell_passwd=eos_shell_passwd) def __getattr__(self, module_name): return getattr(self.host, module_name) @@ -66,6 +67,9 @@ def shutdown(self, interface_name): raise AttributeError("Host of type {} does not contain a" "'shutdown_multiple' method" .format(type(self.host))) + if self.os == 'sonic': + if interface_name in self.fanout_port_alias_to_name.keys(): + return self.host.shutdown(self.fanout_port_alias_to_name[interface_name]) return self.host.shutdown(interface_name) @@ -86,6 +90,10 @@ def no_shutdown(self, interface_name): "'no_shutdown_multiple' method" .format(type(self.host))) + if self.os == 'sonic': + if interface_name in self.fanout_port_alias_to_name.keys(): + return self.host.no_shutdown(self.fanout_port_alias_to_name[interface_name]) + return self.host.no_shutdown(interface_name) def __str__(self): diff --git a/tests/common/devices/multi_asic.py b/tests/common/devices/multi_asic.py index 530681c3fe4..9174822aa86 100644 --- a/tests/common/devices/multi_asic.py +++ b/tests/common/devices/multi_asic.py @@ -268,7 +268,14 @@ def stop_service(self, service): for asic in self.asics: asic.stop_service(service) + + def reset_service(self, service): + if service in self._DEFAULT_SERVICES: + return self.sonichost.reset_service(service, service) + for asic in self.asics: + asic.reset_service(service) + def restart_service(self, service): if service in self._DEFAULT_SERVICES: return self.sonichost.restart_service(service, service) @@ -438,6 +445,26 @@ def get_bgp_neighbors(self): return bgp_neigh + def get_bgp_neighbors_per_asic(self, state="established"): + """ + Get a diction of BGP neighbor states + + Args: + state: BGP session state, return neighbor IP of sessions that match this state + Returns: dictionary {namespace: { (neighbor_ip : info_dict)* }} + + """ + bgp_neigh = {} + for asic in self.asics: + bgp_neigh[asic.namespace] = {} + bgp_info = asic.bgp_facts()["ansible_facts"]["bgp_neighbors"] + for k, v in bgp_info.items(): + if v["state"] != state: + bgp_info.pop(k) + bgp_neigh[asic.namespace].update(bgp_info) + + return bgp_neigh + def check_bgp_session_state(self, neigh_ips, state="established"): """ @summary: check if current bgp session equals to the target state @@ -461,6 +488,20 @@ def check_bgp_session_state(self, neigh_ips, state="established"): return False + def check_bgp_session_state_all_asics(self, bgp_neighbors, state="established"): + """ + @summary: check if current bgp session equals to the target state in each namespace + + @param bgp_neighbors: dictionary {namespace: { (neighbor_ip : info_dict)* }} + @param state: target state + """ + for asic in self.asics: + if asic.namespace in bgp_neighbors: + neigh_ips = [ k.lower() for k, v in bgp_neighbors[asic.namespace].items() if v["state"] == state ] + if not asic.check_bgp_session_state(neigh_ips, state): + return False + return True + def get_bgp_route(self, *args, **kwargs): """ @summary: return BGP routes information from BGP docker. On @@ -546,3 +587,109 @@ def update_ip_route(self, ip, nexthop, op="", namespace=DEFAULT_NAMESPACE): else: for dutasic in self.asics: dutasic.run_vtysh(vty_cmd_args) + + def get_internal_bgp_peers(self): + """ + Get Internal BGP peers. API iterates through frontend ASIC + index to get the BGP internal peers from running configuration + + Returns: + Dict of {BGP peer: Peer Info} + """ + if not self.sonichost.is_multi_asic: + return {} + bgp_internal_neighbors = {} + for asic in self.frontend_asics: + config_facts = self.config_facts( + host=self.hostname, source="running", + namespace=asic.namespace + )['ansible_facts'] + bgp_internal_neighbors.update( + config_facts.get("BGP_INTERNAL_NEIGHBOR", {}) + ) + return bgp_internal_neighbors + + def docker_cmds_on_all_asics(self, cmd, container_name): + """This function iterate for ALL asics and execute cmds""" + duthost = self.sonichost + if duthost.is_multi_asic: + for n in range(duthost.facts['num_asic']): + container = container_name + str(n) + self.shell(argv=["docker", "exec", container, "bash", "-c", cmd]) + else: + self.shell(argv=["docker", "exec", container_name, "bash", "-c", cmd]) + + def docker_copy_to_all_asics(self, container_name, src, dst): + """This function copy from host to ALL asics""" + duthost = self.sonichost + if duthost.is_multi_asic: + for n in range(duthost.facts['num_asic']): + container = container_name + str(n) + self.shell("sudo docker cp {} {}:{}".format(src, container, dst)) + else: + self.shell("sudo docker cp {} {}:{}".format(src, container_name, dst)) + + def docker_copy_from_asic(self, container_name, src, dst, asic_id = 0): + """This function copy from one asic to host""" + duthost = self.sonichost + if duthost.is_multi_asic: + container_name += str(asic_id) + self.shell("sudo docker cp {}:{} {}".format(container_name, src, dst)) + + def is_service_fully_started_per_asic_or_host(self, service): + """This function tell if service is fully started base on multi-asic/single-asic""" + duthost = self.sonichost + if duthost.is_multi_asic: + for asic_index in range(duthost.facts["num_asic"]): + docker_name = self.asic_instance(asic_index).get_docker_name(service) + if not duthost.is_service_fully_started(docker_name): + return False + return True + else: + return duthost.is_service_fully_started(service) + + def restart_service_on_asic(self, service, asic_index=DEFAULT_ASIC_ID): + """Restart service on an asic passed or None(DEFAULT_ASIC_ID)""" + self.asic_instance(asic_index).restart_service(service) + + def docker_exec_swssconfig(self, json_name, container_name, asic_idx): + if self.sonichost.is_multi_asic: + container = container_name + str(asic_idx) + return self.shell('docker exec -i {} swssconfig {}'.format(container, json_name), + module_ignore_errors=True) + else: + return self.shell('docker exec -i {} swssconfig {}'.format(container_name, json_name), + module_ignore_errors=True) + + def get_bgp_name_to_ns_mapping(self): + """ This function returns mapping of bgp name -- namespace + e.g. {'ARISTAT2': 'asic0', ...} + """ + mg_facts = self.sonichost.minigraph_facts( + host = self.sonichost.hostname + )['ansible_facts'] + neighbors = mg_facts['minigraph_neighbors'] + mapping = dict() + for neigh in neighbors.values(): + mapping[neigh['name']] = neigh['namespace'] + return mapping + + def get_default_route_from_app_db(self, af='ipv4'): + default_routes = dict() + if self.sonichost.is_multi_asic: + for front_asic in self.frontend_asics: + default_routes[front_asic.namespace] = front_asic.get_default_route_from_app_db(af) + else: + default_routes = self.asic_instance(0).get_default_route_from_app_db(af) + return default_routes + + def is_default_route_removed_from_app_db(self, uplink_asics = DEFAULT_NAMESPACE): + if self.sonichost.is_multi_asic: + for ns in uplink_asics: + if not self.asic_instance_from_namespace(ns).is_default_route_removed_from_app_db(): + return False + else: + if not self.asic_instance(0).is_default_route_removed_from_app_db(): + return False + return True + diff --git a/tests/common/devices/onyx.py b/tests/common/devices/onyx.py index 2d8d64b9304..d2319b557e0 100644 --- a/tests/common/devices/onyx.py +++ b/tests/common/devices/onyx.py @@ -56,6 +56,10 @@ def command(self, cmd): out = self.host.onyx_command(commands=[cmd]) return out + def config(self, cmd): + out = self.host.onyx_config(commands=[cmd]) + return out + def set_interface_lacp_rate_mode(self, interface_name, mode): out = self.host.onyx_config( lines=['lacp rate %s' % mode], diff --git a/tests/common/devices/ptf.py b/tests/common/devices/ptf.py index 6d92178f38f..15436d087b0 100644 --- a/tests/common/devices/ptf.py +++ b/tests/common/devices/ptf.py @@ -2,7 +2,7 @@ CHANGE_MAC_ADDRESS_SCRIPT = "scripts/change_mac.sh" REMOVE_IP_ADDRESS_SCRIPT = "scripts/remove_ip.sh" - +RESTART_INTERFACE_SCRIPT = "scripts/restart_interface.sh" class PTFHost(AnsibleHostBase): @@ -20,4 +20,66 @@ def change_mac_addresses(self): def remove_ip_addresses(self): self.script(REMOVE_IP_ADDRESS_SCRIPT) + def restart_interfaces(self): + self.script(RESTART_INTERFACE_SCRIPT) + + def add_ip_to_dev(self, dev, ip): + """ + @summary: add ip to dev + + @param dev: device name + @param ip: ip to be added + """ + self.command("ip addr add {} dev {}".format(ip, dev)) + + def create_lag(self, lag_name, lag_ip, lag_mode): + """ + @summary: create a lag as intf, only if after running add_intf_to_lag and startup_lag the lag can work + + @param lag_name: name of lag + @param lag_ip: ip of lag + @param lag_mode: mode of lag + """ + self.command("ip link add {} type bond".format(lag_name)) + self.command("ip link set {} type bond miimon 100 mode {}".format(lag_name, lag_mode)) + self.add_ip_to_dev(lag_name, lag_ip) + + def add_intf_to_lag(self, lag_name, intf_name): + """ + @summary: set interface down and master lag + + @param lag_name: name of lag + @param intf_name: mode of interface + """ + self.set_dev_up_or_down(intf_name, False) + self.command("ip link set {} master {}".format(intf_name, lag_name)) + + def startup_lag(self, lag_name): + """ + @summary: startup lag + + @param lag_name: name of lag + """ + self.set_dev_up_or_down(lag_name, True) + + def set_dev_up_or_down(self, dev_name, is_up): + """ + @summary: set device up or down + + @param dev_name: name of devices + @param is_up: True -> set device up, False -> set device down + """ + self.command("ip link set {} {}".format(dev_name, "up" if is_up else "down")) + + def set_dev_no_master(self, dev_name): + """ + @summary: set device no master + + @param dev_name: name of device + """ + self.command("ip link set {} nomaster".format(dev_name)) + + def ptf_nn_agent(self): + self.command("supervisorctl restart ptf_nn_agent") + # TODO: Add a method for running PTF script diff --git a/tests/common/devices/sonic.py b/tests/common/devices/sonic.py index 3a4a945b9f3..9bf9e191bd7 100644 --- a/tests/common/devices/sonic.py +++ b/tests/common/devices/sonic.py @@ -34,6 +34,9 @@ class SonicHost(AnsibleHostBase): DEFAULT_ASIC_SERVICES = ["bgp", "database", "lldp", "swss", "syncd", "teamd"] + """ + setting either one of shell_user/shell_pw or ssh_user/ssh_passwd pair should yield the same result. + """ def __init__(self, ansible_adhoc, hostname, shell_user=None, shell_passwd=None, ssh_user=None, ssh_passwd=None): @@ -549,11 +552,11 @@ def critical_process_status(self, service): @param service: Name of the SONiC service """ - result = {'status': True} - result['exited_critical_process'] = [] - result['running_critical_process'] = [] - critical_group_list = [] - critical_process_list = [] + result = { + 'status': True, + 'exited_critical_process': [], + 'running_critical_process': [] + } # return false if the service is not started service_status = self.is_service_fully_started(service) @@ -571,17 +574,11 @@ def critical_process_status(self, service): output = self.command("docker exec {} supervisorctl status".format(service), module_ignore_errors=True) logging.info("====== supervisor process status for service {} ======".format(service)) - for l in output['stdout_lines']: - (pname, status, info) = re.split("\s+", l, 2) - if status != "RUNNING": - if pname in critical_group_list or pname in critical_process_list: - result['exited_critical_process'].append(pname) - result['status'] = False - else: - if pname in critical_group_list or pname in critical_process_list: - result['running_critical_process'].append(pname) - - return result + return self.parse_service_status_and_critical_process( + service_result=output, + critical_group_list=critical_group_list, + critical_process_list=critical_process_list + ) def all_critical_process_status(self): """ @@ -616,26 +613,96 @@ def all_critical_process_status(self): all_critical_process[service] = service_critical_process continue - service_group_process = group_process_results[service] - - service_result = service_results[service] - for line in service_result['stdout_lines']: - pname, status, _ = re.split('\s+', line, 2) - if status != 'RUNNING': - if pname in service_group_process['groups'] or pname in service_group_process['processes']: - service_critical_process['exited_critical_process'].append(pname) - service_critical_process['status'] = False - else: - if pname in service_group_process['groups'] or pname in service_group_process['processes']: - service_critical_process['running_critical_process'].append(pname) - all_critical_process[service] = service_critical_process + all_critical_process[service] = self.parse_service_status_and_critical_process( + service_result=service_results[service], + critical_group_list=group_process_results[service]['groups'], + critical_process_list=group_process_results[service]['processes'] + ) return all_critical_process - def get_crm_resources(self): + def parse_service_status_and_critical_process(self, service_result, critical_group_list, + critical_process_list): + """ + Parse the result of command "docker exec supervisorctl status" + and get service container status and critical processes + """ + service_critical_process = { + 'status': True, + 'exited_critical_process': [], + 'running_critical_process': [] + } + # If container is not running, stdout_lines is empty + # In this situation, service container status should be false + if not service_result['stdout_lines']: + service_critical_process['status'] = False + for line in service_result['stdout_lines']: + pname, status, _ = re.split('\\s+', line, 2) + # 1. Check status is valid + # Sometimes, stdout_lines may be error messages but not emtpy + # In this situation, service container status should be false + # We can check status is valid or not + # You can just add valid status str in this tuple if meet later + if status not in ('RUNNING', 'EXITED', 'STOPPED', 'FATAL', 'BACKOFF', 'STARTING'): + service_critical_process['status'] = False + # 2. Check status is not running + elif status != 'RUNNING': + # 3. Check process is critical + if pname in critical_group_list or pname in critical_process_list: + service_critical_process['exited_critical_process'].append(pname) + service_critical_process['status'] = False + else: + if pname in critical_group_list or pname in critical_process_list: + service_critical_process['running_critical_process'].append(pname) + + return service_critical_process + + def get_crm_resources_for_masic(self, namespace = DEFAULT_NAMESPACE): + """ + @summary: Run the "crm show resources all" command on multi-asic dut and parse its output + """ + # Construct mapping of {'ASIC0' : {"main_resources": {}, "acl_resources": [], "table_resources": []}, ...} + # Here we leave value as empty and overwrite it at the end of each ASIC table + multi_result = dict() + for n in range(self.num_asics()): + ns = "asic" + str(n) + multi_result[ns] = {"main_resources": {}, "acl_resources": [], "table_resources": []} + + output = self.command("crm show resources all")["stdout_lines"] + current_table = 0 # Totally 3 tables in the command output + asic = None + for line in output: + if len(line.strip()) == 0 or "---" in line: + continue + if "ASIC" in line: + asic = line.lower() + # Switch table type when 'ASIC0' comes again + if "ASIC0" in line: + current_table += 1 + continue + if current_table == 1: # content of first table, main resources + fields = line.split() + if len(fields) == 3: + multi_result[asic]["main_resources"][fields[0]] = {"used": int(fields[1]), "available": int(fields[2])} + if current_table == 2: # content of the second table, acl resources + fields = line.split() + if len(fields) == 5: + multi_result[asic]["acl_resources"].append({"stage": fields[0], "bind_point": fields[1], + "resource_name": fields[2], "used_count": int(fields[3]), "available_count": int(fields[4])}) + if current_table == 3: # content of the third table, table resources + fields = line.split() + if len(fields) == 4: + multi_result[asic]["table_resources"].append({"table_id": fields[0], "resource_name": fields[1], + "used_count": int(fields[2]), "available_count": int(fields[3])}) + return multi_result[namespace] + + + def get_crm_resources(self, namespace = DEFAULT_NAMESPACE): """ @summary: Run the "crm show resources all" command and parse its output """ + if self.is_multi_asic: + return self.get_crm_resources_for_masic(namespace) result = {"main_resources": {}, "acl_resources": [], "table_resources": []} output = self.command("crm show resources all")["stdout_lines"] current_table = 0 # Totally 3 tables in the command output @@ -1167,7 +1234,7 @@ def _parse_route_summary(self, output): ret[key] = val return ret - def get_ip_route_summary(self): + def get_ip_route_summary(self, skip_kernel_tunnel=False): """ @summary: issue "show ip[v6] route summary" and parse output into dicitionary. Going forward, this show command should use tabular output so that @@ -1175,8 +1242,38 @@ def get_ip_route_summary(self): """ ipv4_output = self.shell("show ip route sum")["stdout_lines"] ipv4_summary = self._parse_route_summary(ipv4_output) + + if skip_kernel_tunnel == True: + ipv4_route_kernel_output = self.shell("show ip route kernel")["stdout_lines"] + ipv4_route_kernel_count = 0 + for string in ipv4_route_kernel_output: + if re.search('tun', string): + ipv4_route_kernel_count += 1 + logging.debug("IPv4 kernel tun route {}, {}".format(ipv4_route_kernel_count, ipv4_route_kernel_output)) + + if ipv4_route_kernel_count > 0: + ipv4_summary['kernel']['routes'] -= ipv4_route_kernel_count + ipv4_summary['kernel']['FIB'] -= ipv4_route_kernel_count + ipv4_summary['Totals']['routes'] -= ipv4_route_kernel_count + ipv4_summary['Totals']['FIB'] -= ipv4_route_kernel_count + ipv6_output = self.shell("show ipv6 route sum")["stdout_lines"] ipv6_summary = self._parse_route_summary(ipv6_output) + + if skip_kernel_tunnel == True: + ipv6_route_kernel_output = self.shell("show ipv6 route kernel")["stdout_lines"] + ipv6_route_kernel_count = 0 + for string in ipv6_route_kernel_output: + if re.search('tun', string): + ipv6_route_kernel_count += 1 + logging.debug("IPv6 kernel tun route {}, {}".format(ipv6_route_kernel_count, ipv6_route_kernel_output)) + + if ipv6_route_kernel_count > 0: + ipv6_summary['kernel']['routes'] -= ipv6_route_kernel_count + ipv6_summary['kernel']['FIB'] -= ipv6_route_kernel_count + ipv6_summary['Totals']['routes'] -= ipv6_route_kernel_count + ipv6_summary['Totals']['FIB'] -= ipv6_route_kernel_count + return ipv4_summary, ipv6_summary def get_dut_iface_mac(self, iface_name): @@ -1429,7 +1526,8 @@ def get_asic_name(self): elif ("Broadcom Limited Device b850" in output or "Broadcom Limited Broadcom BCM56850" in output): asic = "td2" - elif "Broadcom Limited Device b870" in output: + elif ("Broadcom Limited Device b870" in output or + "Broadcom Inc. and subsidiaries Device b870" in output): asic = "td3" elif "Broadcom Limited Device b980" in output: asic = "th3" @@ -1455,6 +1553,42 @@ def get_vlan_intfs(self): return vlan_intfs + def get_interfaces_status(self): + ''' + Get intnerfaces status by running 'show interfaces status' on the DUT, and parse the result into a dict. + + Example output: + { + "Ethernet0": { + "oper": "down", + "lanes": "25,26,27,28", + "fec": "N/A", + "asym pfc": "off", + "admin": "down", + "type": "N/A", + "vlan": "routed", + "mtu": "9100", + "alias": "fortyGigE0/0", + "interface": "Ethernet0", + "speed": "40G" + }, + "PortChannel101": { + "oper": "up", + "lanes": "N/A", + "fec": "N/A", + "asym pfc": "N/A", + "admin": "up", + "type": "N/A", + "vlan": "routed", + "mtu": "9100", + "alias": "N/A", + "interface": "PortChannel101", + "speed": "40G" + } + } + ''' + return {x.get('interface'): x for x in self.show_and_parse('show interfaces status')} + def get_crm_facts(self): """Run various 'crm show' commands and parse their output to gather CRM facts @@ -1831,3 +1965,77 @@ def active_ip_interfaces(self, ip_ifs, tbinfo, ns_arg=DEFAULT_NAMESPACE): } return ip_ifaces + + def remove_acl_table(self, acl_table): + """ + Remove acl table + + Args: + acl_table: name of acl table to be removed + """ + self.command("config acl remove table {}".format(acl_table)) + + def del_member_from_vlan(self, vlan_id, member_name): + """ + Del vlan member + + Args: + vlan_id: id of vlan + member_name: interface deled from vlan + """ + self.command("config vlan member del {} {}".format(vlan_id, member_name)) + + def add_member_to_vlan(self, vlan_id, member_name, is_tagged=True): + """ + Add vlan member + + Args: + vlan_id: id of vlan + member_name: interface added to vlan + is_tagged: True - add tagged member. False - add untagged member. + """ + self.command("config vlan member add {} {} {}".format("" if is_tagged else "-u", vlan_id, member_name)) + + def remove_ip_from_port(self, port, ip=None): + """ + Remove ip addresses from port. If get ip from running config successfully, ignore arg ip provided + + Args: + port: port name + ip: IP address + """ + ip_addresses = self.config_facts(host=self.hostname, source="running")["ansible_facts"].get("INTERFACE", {}).get(port, {}) + if ip_addresses: + for ip in ip_addresses: + self.command("config interface ip remove {} {}".format(port, ip)) + elif ip: + self.command("config interface ip remove {} {}".format(port, ip)) + + def get_port_channel_status(self, port_channel_name): + """ + Collect port channel information by command docker teamdctl + + Args: + port_channel_name: name of port channel + + Returns: + port channel status, key information example: + { + "ports": { + "Ethernet28": { + "runner": { + "selected": True, + "state": "current" + }, + "link": { + "duplex": "full", + "speed": 10, + "up": True + } + } + } + } + """ + commond_output = self.command("docker exec -i teamd teamdctl {} state dump".format(port_channel_name)) + json_info = json.loads(commond_output["stdout"]) + return json_info diff --git a/tests/common/devices/sonic_asic.py b/tests/common/devices/sonic_asic.py index 769caaf8a47..31e6728a364 100644 --- a/tests/common/devices/sonic_asic.py +++ b/tests/common/devices/sonic_asic.py @@ -422,6 +422,22 @@ def run_redis_cmd(self, argv=[]): return result["stdout_lines"] + def run_ip_neigh_cmd(self, cmdstr): + """ + Add -n option with ASIC instance on multi ASIC + + Args: + cmdstr + Returns: + Output from the ansible command module + """ + if not self.sonichost.is_multi_asic: + return self.sonichost.command("sudo ip neigh {}".format(cmdstr)) + + cmdstr = "sudo ip -n asic{} neigh {}".format(self.asic_index, cmdstr) + return self.sonichost.command(cmdstr) + + def port_exists(self, port): """ Check if a given port exists in ASIC instance @@ -516,11 +532,22 @@ def port_on_asic(self, portname): return False def portchannel_on_asic(self, portchannel): - cmd = 'sudo sonic-cfggen -n {} -v "PORTCHANNEL.keys()" -d'.format(self.cli_ns_option) + cmd = 'sudo sonic-cfggen {} -v "PORTCHANNEL.keys()" -d'.format(self.cli_ns_option) + # Need to compare every portchannel in pcs split by single quote, with the target portchannel + # And cannot do 'if portchannel in pcs', reason is that string/unicode comparison could be misleading + # e.g. 'Portchanne101 in ['portchannel1011']' -> returns True + # By split() function we are converting 'pcs' to list, and can do one by one comparison pcs = self.shell(cmd)["stdout_lines"][0].decode("utf-8") - if pcs is not None and portchannel in pcs: - return True + if pcs is not None: + pcs_list = pcs.split("'") + for pc in pcs_list: + if portchannel == pc: + return True return False + + def write_to_config_db(self, dst_path): + cmd = 'sonic-cfggen {} -j {} --write-to-db'.format(self.cli_ns_option, dst_path) + return self.shell(cmd) def get_portchannel_and_members_in_ns(self, tbinfo): """ @@ -571,6 +598,10 @@ def get_bgp_statistic(self, stat): def check_bgp_statistic(self, stat, value): val = self.get_bgp_statistic(stat) return val == value + + def get_router_mac(self): + return (self.sonichost.command("sonic-cfggen -d -v 'DEVICE_METADATA.localhost.mac' {}".format(self.cli_ns_option))["stdout_lines"][0].encode() + .decode("utf-8").lower()) def get_default_route_from_app_db(self, af='ipv4'): def_rt_json = None @@ -591,5 +622,47 @@ def is_default_route_removed_from_app_db(self): for af in af_list: def_rt_json = self.get_default_route_from_app_db(af) if def_rt_json: + # For multi-asic duts, when bgps are down, docker bridge will come up, which we should ignore here + if self.sonichost.is_multi_asic and def_rt_json.values()[0]['value']['ifname'] == 'eth0': + continue return False return True + + def check_bgp_session_state(self, neigh_ips, state="established"): + """ + @summary: check if current bgp session equals to the target state + + @param neigh_ips: bgp neighbor IPs + @param state: target state + """ + bgp_facts = self.bgp_facts()['ansible_facts'] + neigh_ok = [] + for k, v in bgp_facts['bgp_neighbors'].items(): + if v['state'] == state: + if k.lower() in neigh_ips: + neigh_ok.append(k) + logging.info("bgp neighbors that match the state: {} on namespace {}".format(neigh_ok, self.namespace)) + + if len(neigh_ips) == len(neigh_ok): + return True + + return False + + def count_crm_resources(self, resource_type, route_tag, count_type): + mapping = self.sonichost.get_crm_resources(self.namespace) + return mapping.get(resource_type).get(route_tag, {}).get(count_type) + + def count_routes(self, ROUTE_TABLE_NAME): + ns_prefix = "" + if self.sonichost.is_multi_asic: + ns_prefix = '-n' + str(self.namespace) + return int(self.shell( + 'sonic-db-cli {} ASIC_DB eval "return #redis.call(\'keys\', \'{}*\')" 0'.format(ns_prefix, ROUTE_TABLE_NAME), + module_ignore_errors=True, verbose=True)['stdout']) + + def get_route_key(self, ROUTE_TABLE_NAME): + ns_prefix = "" + if self.sonichost.is_multi_asic: + ns_prefix = '-n' + str(self.namespace) + return self.shell('sonic-db-cli {} ASIC_DB eval "return redis.call(\'keys\', \'{}*\')" 0'.format(ns_prefix, ROUTE_TABLE_NAME), + verbose=False)['stdout_lines'] diff --git a/tests/common/dualtor/control_plane_utils.py b/tests/common/dualtor/control_plane_utils.py index 3b41cd10745..49dae63ed56 100644 --- a/tests/common/dualtor/control_plane_utils.py +++ b/tests/common/dualtor/control_plane_utils.py @@ -115,7 +115,9 @@ def get_mismatched_ports(self, db): for intf_name in mux_intfs: table_key = '{}{}{}'.format(table, separator, intf_name) - if db_dump[table_key]['value'][field] != target_value: + if table_key not in db_dump: + mismatch_ports[table_key] = {} + elif db_dump[table_key]['value'][field] != target_value: mismatch_ports[table_key] = db_dump[table_key]['value'] self.mismatch_ports = mismatch_ports diff --git a/tests/common/dualtor/data_plane_utils.py b/tests/common/dualtor/data_plane_utils.py index 932713c9bc7..6082b25f672 100644 --- a/tests/common/dualtor/data_plane_utils.py +++ b/tests/common/dualtor/data_plane_utils.py @@ -152,7 +152,15 @@ def run_test(duthosts, activehost, ptfhost, ptfadapter, action, logger.info("Sender and sniffer threads started, ready to execute the "\ "callback action") time.sleep(15) - action() + + try: + action() + except Exception as error: + logging.error("Caught exception %s during action.", repr(error)) + tor_IO.stop_early = True + send_and_sniff.join() + raise + # do not time-wait the test, if early stop is not requested (when stop_after=None) if stop_after is not None: wait_until(timeout=stop_after, interval=0.5, delay=0, condition=\ diff --git a/tests/common/dualtor/dual_tor_mock.py b/tests/common/dualtor/dual_tor_mock.py index 342d5f258fe..463482991a9 100644 --- a/tests/common/dualtor/dual_tor_mock.py +++ b/tests/common/dualtor/dual_tor_mock.py @@ -8,10 +8,10 @@ ip_address, IPv4Address from tests.common import config_reload from tests.common.dualtor.dual_tor_utils import tor_mux_intfs -from tests.common.helpers.assertions import pytest_require, pytest_assert +from tests.common.helpers.assertions import pytest_assert +from tests.common.platform.processes_utils import wait_critical_processes __all__ = [ - 'require_mocked_dualtor', 'apply_active_state_to_orchagent', 'apply_dual_tor_neigh_entries', 'apply_dual_tor_peer_switch_route', @@ -138,12 +138,6 @@ def is_mocked_dualtor(tbinfo): return 'dualtor' not in tbinfo['topo']['name'] -@pytest.fixture -def require_mocked_dualtor(tbinfo): - pytest_require(is_t0_mocked_dualtor(tbinfo), "This testcase is designed for " - "single tor testbed with mock dualtor config. Skip this testcase on real dualtor testbed") - - def set_mux_state(dut, tbinfo, state, itfs, toggle_all_simulator_ports): if is_mocked_dualtor(tbinfo): set_dual_tor_state_to_orchagent(dut, state, itfs) @@ -339,31 +333,27 @@ def apply_peer_switch_table_to_dut(cleanup_mocked_configs, rand_selected_dut, mo peer_switch_hostname = 'switch_hostname' peer_switch_key = 'PEER_SWITCH|{}'.format(peer_switch_hostname) device_meta_key = 'DEVICE_METADATA|localhost' + restart_swss = False + if dut.get_asic_name() in ['th2', 'td3']: + restart_swss = True + cmd = 'redis-cli -n 4 HSET "{}" "{}" "{}"'.format(device_meta_key, 'subtype', 'DualToR') + dut.shell(cmd=cmd) + if restart_swss: + # Restart swss on TH2 or TD3 platform to trigger syncd restart to regenerate config.bcm + # We actually need to restart syncd only, but restarting syncd will also trigger swss + # being restarted, and it costs more time than restarting swss + logger.info("Restarting swss service to regenerate config.bcm") + dut.shell('systemctl reset-failed swss; systemctl restart swss') + wait_critical_processes(dut) cmds = ['redis-cli -n 4 HSET "{}" "address_ipv4" "{}"'.format(peer_switch_key, mock_peer_switch_loopback_ip.ip), - 'redis-cli -n 4 HSET "{}" "{}" "{}"'.format(device_meta_key, 'subtype', 'DualToR'), 'redis-cli -n 4 HSET "{}" "{}" "{}"'.format(device_meta_key, 'peer_switch', peer_switch_hostname)] dut.shell_cmds(cmds=cmds) - if dut.get_asic_name() == 'th2': - # Restart swss on TH2 platform - logger.info("Restarting swss service") - dut.shell('systemctl restart swss') - time.sleep(120) - - yield - logger.info("Removing peer switch table") - - cmds=['redis-cli -n 4 DEL "{}"'.format(peer_switch_key), - 'redis-cli -n 4 HDEL"{}" "{}" "{}"'.format(device_meta_key, 'subtype', 'DualToR'), - 'redis-cli -n 4 HDEL "{}" "{}" "{}"'.format(device_meta_key, 'peer_switch', peer_switch_hostname)] - dut.shell_cmds(cmds=cmds) - if dut.get_asic_name() == 'th2': - # Restart swss on TH2 platform + if restart_swss: + # Restart swss on TH2 or TD3 platform to apply changes logger.info("Restarting swss service") - dut.shell('systemctl restart swss') - time.sleep(120) - - return + dut.shell('systemctl reset-failed swss; systemctl restart swss') + wait_critical_processes(dut) @pytest.fixture(scope='module') @@ -458,4 +448,4 @@ def cleanup_mocked_configs(duthost, tbinfo): if is_t0_mocked_dualtor(tbinfo): logger.info("Load minigraph to reset the DUT %s", duthost.hostname) - config_reload(duthost, config_source="minigraph") + config_reload(duthost, config_source="minigraph", safe_reload=True) diff --git a/tests/common/dualtor/dual_tor_utils.py b/tests/common/dualtor/dual_tor_utils.py index 2faee23f206..31366048e1e 100644 --- a/tests/common/dualtor/dual_tor_utils.py +++ b/tests/common/dualtor/dual_tor_utils.py @@ -1,21 +1,26 @@ import contextlib +import ipaddress import logging import itertools import pytest import random import time import json +import os import ptf import re import string -from scapy.all import Ether, IP, TCP, IPv6 -import scapy.all as scapyall -from datetime import datetime -from tests.ptf_runner import ptf_runner - from collections import defaultdict +from datetime import datetime from natsort import natsorted +from ptf import mask +from ptf import testutils +from scapy.layers.l2 import Ether +from scapy.layers.inet import IP, TCP +from scapy.layers.inet6 import IPv6 + +from tests.common import constants from tests.common.config_reload import config_reload from tests.common.helpers.assertions import pytest_assert as pt_assert from tests.common.helpers.dut_ports import encode_dut_port_name @@ -27,13 +32,17 @@ from ptf import testutils from scapy.all import Ether, IP from tests.common.helpers.generators import generate_ip_through_default_route -from tests.common import constants +from tests.common.utilities import dump_scapy_packet_show_output, get_intf_by_sub_intf, is_ipv4_address +from tests.ptf_runner import ptf_runner __all__ = ['tor_mux_intf', 'tor_mux_intfs', 'ptf_server_intf', 't1_upper_tor_intfs', 't1_lower_tor_intfs', 'upper_tor_host', 'lower_tor_host', 'force_active_tor'] logger = logging.getLogger(__name__) +ARP_RESPONDER_PY = "arp_responder.py" +SCRIPTS_SRC_DIR = "scripts/" +OPT_DIR = "/opt" def get_tor_mux_intfs(duthost): return sorted(duthost.get_vlan_intfs(), key=lambda intf: int(intf.replace('Ethernet', ''))) @@ -342,6 +351,9 @@ def lower_tor_fanouthosts(lower_tor_host, fanouthosts): return _get_tor_fanouthosts(lower_tor_host, fanouthosts) +fanout_intfs_to_recover = defaultdict(list) + + def _shutdown_fanout_tor_intfs(tor_host, tor_fanouthosts, tbinfo, dut_intfs=None): """Helper function for shutting down fanout interfaces that are connected to specified DUT interfaces. @@ -392,6 +404,7 @@ def _shutdown_fanout_tor_intfs(tor_host, tor_fanouthosts, tbinfo, dut_intfs=None for fanout_host, intf_list in fanout_shut_intfs.items(): fanout_host.shutdown(intf_list) + fanout_intfs_to_recover[fanout_host].extend(intf_list) return fanout_shut_intfs @@ -410,6 +423,7 @@ def shutdown_fanout_upper_tor_intfs(upper_tor_host, upper_tor_fanouthosts, tbinf function: A function for shutting down fanout interfaces connected to specified upper_tor interfaces """ shut_fanouts = [] + fanout_intfs_to_recover.clear() def shutdown(dut_intfs=None): logger.info('Shutdown fanout ports connected to upper_tor') @@ -419,9 +433,9 @@ def shutdown(dut_intfs=None): logger.info('Recover fanout ports connected to upper_tor') - for instance in shut_fanouts: - for fanout_host, intf_list in instance.items(): - fanout_host.no_shutdown(intf_list) + for fanout_host, intf_list in fanout_intfs_to_recover.items(): + fanout_host.no_shutdown(intf_list) + fanout_intfs_to_recover.clear() @pytest.fixture @@ -438,6 +452,7 @@ def shutdown_fanout_lower_tor_intfs(lower_tor_host, lower_tor_fanouthosts, tbinf function: A function for shutting down fanout interfaces connected to specified lower_tor interfaces """ shut_fanouts = [] + fanout_intfs_to_recover.clear() def shutdown(dut_intfs=None): logger.info('Shutdown fanout ports connected to lower_tor') @@ -447,9 +462,9 @@ def shutdown(dut_intfs=None): logger.info('Recover fanout ports connected to lower_tor') - for instance in shut_fanouts: - for fanout_host, intf_list in instance.items(): - fanout_host.no_shutdown(intf_list) + for fanout_host, intf_list in fanout_intfs_to_recover.items(): + fanout_host.no_shutdown(intf_list) + fanout_intfs_to_recover.clear() @pytest.fixture @@ -467,6 +482,7 @@ def shutdown_fanout_tor_intfs(upper_tor_host, upper_tor_fanouthosts, lower_tor_h function: A function for shutting down fanout interfaces connected to specified lower_tor interfaces """ down_intfs = [] + fanout_intfs_to_recover.clear() def shutdown(dut_intfs=None, upper=False, lower=False): if not upper and not lower: @@ -484,8 +500,9 @@ def shutdown(dut_intfs=None, upper=False, lower=False): yield shutdown logger.info('Recover fanout ports connected to tor') - for fanout_host, fanout_intf in down_intfs: - fanout_host.no_shutdown(fanout_intf) + for fanout_host, intf_list in fanout_intfs_to_recover.items(): + fanout_host.no_shutdown(intf_list) + fanout_intfs_to_recover.clear() def _shutdown_t1_tor_intfs(tor_host, nbrhosts, tbinfo, vm_names=None): @@ -620,7 +637,7 @@ def _shutdown_tor_downlink_intfs(tor_host, dut_intfs=None): Args: tor_host (object): Host object for the ToR DUT. - dut_intfs (list, optional): List of DUT interface names, for example: ['Ethernet0', 'Ethernet4']. All + dut_intfs (list, optional): List of DUT interface names, for example: ['Ethernet0', 'Ethernet4']. All downlink interfaces on DUT will be shutdown. If dut_intfs is not specified, the function will shutdown all DUT downlink interfaces. Defaults to None. @@ -705,7 +722,8 @@ def mux_cable_server_ip(dut): return json.loads(mux_cable_config) -def check_tunnel_balance(ptfhost, standby_tor_mac, vlan_mac, active_tor_ip, standby_tor_ip, selected_port, target_server_ip, target_server_ipv6, target_server_port, ptf_portchannel_indices, check_ipv6=False): +def check_tunnel_balance(ptfhost, standby_tor_mac, vlan_mac, active_tor_ip, standby_tor_ip, selected_port, target_server_ip, + target_server_ipv6, target_server_port, ptf_portchannel_indices, completeness_level, check_ipv6=False): """ Function for testing traffic distribution among all avtive T1. A test script will be running on ptf to generate traffic to standby interface, and the traffic will be forwarded to @@ -723,6 +741,7 @@ def check_tunnel_balance(ptfhost, standby_tor_mac, vlan_mac, active_tor_ip, stan Returns: None. """ + HASH_KEYS = ["src-port", "dst-port", "src-ip"] params = { "server_ip": target_server_ip, @@ -732,7 +751,8 @@ def check_tunnel_balance(ptfhost, standby_tor_mac, vlan_mac, active_tor_ip, stan "active_tor_ip": active_tor_ip, "standby_tor_ip": standby_tor_ip, "ptf_portchannel_indices": ptf_portchannel_indices, - "hash_key_list": HASH_KEYS + "hash_key_list": HASH_KEYS, + "completeness_level": completeness_level } if check_ipv6: params["server_ip"] = target_server_ipv6 @@ -776,10 +796,10 @@ def _generate_hashed_ipv4_packet(src_mac, dst_mac, dst_ip, hash_key): ip_ttl=64 ) exp_pkt = mask.Mask(send_pkt) - exp_pkt.set_do_not_care_scapy(scapyall.Ether, 'dst') - exp_pkt.set_do_not_care_scapy(scapyall.Ether, "src") - exp_pkt.set_do_not_care_scapy(scapyall.IP, "ttl") - exp_pkt.set_do_not_care_scapy(scapyall.IP, "chksum") + exp_pkt.set_do_not_care_scapy(Ether, 'dst') + exp_pkt.set_do_not_care_scapy(Ether, "src") + exp_pkt.set_do_not_care_scapy(IP, "ttl") + exp_pkt.set_do_not_care_scapy(IP, "chksum") inner_packet = send_pkt[IP] inner_packet.ttl = inner_packet.ttl - 1 @@ -793,11 +813,11 @@ def _generate_hashed_ipv4_packet(src_mac, dst_mac, dst_ip, hash_key): send_pkt.ttl = 64 exp_tunnel_pkt[TCP] = inner_packet[TCP] exp_tunnel_pkt = mask.Mask(exp_tunnel_pkt) - exp_tunnel_pkt.set_do_not_care_scapy(scapyall.Ether, "dst") - exp_tunnel_pkt.set_do_not_care_scapy(scapyall.Ether, "src") - exp_tunnel_pkt.set_do_not_care_scapy(scapyall.IP, "id") # since src and dst changed, ID would change too - exp_tunnel_pkt.set_do_not_care_scapy(scapyall.IP, "ttl") # ttl in outer packet is set to 255 - exp_tunnel_pkt.set_do_not_care_scapy(scapyall.IP, "chksum") # checksum would differ as the IP header is not the same + exp_tunnel_pkt.set_do_not_care_scapy(Ether, "dst") + exp_tunnel_pkt.set_do_not_care_scapy(Ether, "src") + exp_tunnel_pkt.set_do_not_care_scapy(IP, "id") # since src and dst changed, ID would change too + exp_tunnel_pkt.set_do_not_care_scapy(IP, "ttl") # ttl in outer packet is set to 255 + exp_tunnel_pkt.set_do_not_care_scapy(IP, "chksum") # checksum would differ as the IP header is not the same return send_pkt, exp_pkt, exp_tunnel_pkt @@ -818,9 +838,9 @@ def _generate_hashed_ipv6_packet(src_mac, dst_mac, dst_ip, hash_key): tcp_dport=dport ) exp_pkt = mask.Mask(send_pkt) - exp_pkt.set_do_not_care_scapy(scapyall.Ether, "dst") - exp_pkt.set_do_not_care_scapy(scapyall.Ether, "src") - exp_pkt.set_do_not_care_scapy(scapyall.IPv6, "hlim") + exp_pkt.set_do_not_care_scapy(Ether, "dst") + exp_pkt.set_do_not_care_scapy(Ether, "src") + exp_pkt.set_do_not_care_scapy(IPv6, "hlim") inner_packet = send_pkt[IPv6] inner_packet[IPv6].hlim -= 1 @@ -834,11 +854,11 @@ def _generate_hashed_ipv6_packet(src_mac, dst_mac, dst_ip, hash_key): send_pkt.hlim = 64 exp_tunnel_pkt[TCP] = inner_packet[TCP] exp_tunnel_pkt = mask.Mask(exp_tunnel_pkt) - exp_tunnel_pkt.set_do_not_care_scapy(scapyall.Ether, "dst") - exp_tunnel_pkt.set_do_not_care_scapy(scapyall.Ether, "src") - exp_tunnel_pkt.set_do_not_care_scapy(scapyall.IP, "id") - exp_tunnel_pkt.set_do_not_care_scapy(scapyall.IP, "ttl") - exp_tunnel_pkt.set_do_not_care_scapy(scapyall.IP, "chksum") + exp_tunnel_pkt.set_do_not_care_scapy(Ether, "dst") + exp_tunnel_pkt.set_do_not_care_scapy(Ether, "src") + exp_tunnel_pkt.set_do_not_care_scapy(IP, "id") + exp_tunnel_pkt.set_do_not_care_scapy(IP, "ttl") + exp_tunnel_pkt.set_do_not_care_scapy(IP, "chksum") return send_pkt, exp_pkt, exp_tunnel_pkt @@ -1042,7 +1062,7 @@ def get_crm_nexthop_counter(host): return crm_facts['resources']['ipv4_nexthop']['used'] -def dualtor_info(ptfhost, rand_selected_dut, rand_unselected_dut, tbinfo): +def dualtor_info(ptfhost, rand_selected_dut, rand_unselected_dut, tbinfo, get_function_completeness_level=None): """ @summary: A helper function for collecting info of dualtor testbed. @param ptfhost: The ptf host fixture @@ -1070,7 +1090,7 @@ def _get_iface_ip(mg_facts, ifacename): if 't0' in tbinfo["topo"]["name"]: # For mocked dualtor res['active_tor_ip'] = str(ipaddress.ip_address(res['standby_tor_ip']) + 1) - # For mocked dualtor, routes to peer switch is static + # For mocked dualtor, routes to peer switch is static res['ptf_portchannel_indices'] = get_t1_active_ptf_ports(standby_tor, tbinfo) else: active_tor_mg_facts = active_tor.get_extended_minigraph_facts(tbinfo) @@ -1085,6 +1105,9 @@ def _get_iface_ip(mg_facts, ifacename): res['target_server_ipv6'] = servers[random_server_iface]['server_ipv6'].split('/')[0] res['target_server_port'] = standby_tor_mg_facts['minigraph_ptf_indices'][random_server_iface] + normalize_level = get_function_completeness_level if get_function_completeness_level else 'thorough' + res['completeness_level'] = normalize_level + logger.debug("dualtor info is generated {}".format(res)) return res @@ -1108,12 +1131,27 @@ def flush_neighbor(duthost, neighbor, restore=True): logging.info("remove neighbor entry for %s", neighbor) duthost.shell("ip neighbor del %s dev %s" % (neighbor, neighbor_details['dev'])) try: - yield + yield neighbor_details finally: if restore: logging.info("restore neighbor entry for %s", neighbor) duthost.shell("ip neighbor replace %s lladdr %s dev %s" % (neighbor, neighbor_details['lladdr'], neighbor_details['dev'])) +def delete_neighbor(duthost, neighbor): + """Delete neighbor entry for server in duthost, ignore it if doesn't exist.""" + neighbor_details = get_neighbor(duthost, neighbor) + if neighbor_details: + logging.info("neighbor details for %s: %s", neighbor, neighbor_details) + logging.info("remove neighbor entry for %s", neighbor) + duthost.shell("ip neighbor del %s dev %s" % (neighbor, neighbor_details['dev'])) + else: + logging.info("Neighbor entry %s doesn't exist", neighbor) + return True + + neighbor_details = get_neighbor(duthost, neighbor) + if neighbor_details: + return False + return True @pytest.fixture(scope="function") def rand_selected_interface(rand_selected_dut): @@ -1133,7 +1171,7 @@ def show_muxcable_status(duthost): output = json.loads(duthost.shell(command)["stdout"]) ret = {} - for port, muxcable in output['MUX_CABLE'].items(): + for port, muxcable in output['MUX_CABLE'].items(): ret[port] = {'status': muxcable['STATUS'], 'health': muxcable['HEALTH']} return ret @@ -1157,11 +1195,11 @@ def build_ipv4_packet_to_server(duthost, ptfadapter, target_server_ip): dump_scapy_packet_show_output(pkt) ) exp_pkt = mask.Mask(pkt) - exp_pkt.set_do_not_care_scapy(scapyall.Ether, "dst") - exp_pkt.set_do_not_care_scapy(scapyall.Ether, "src") - exp_pkt.set_do_not_care_scapy(scapyall.IP, "tos") - exp_pkt.set_do_not_care_scapy(scapyall.IP, "ttl") - exp_pkt.set_do_not_care_scapy(scapyall.IP, "chksum") + exp_pkt.set_do_not_care_scapy(Ether, "dst") + exp_pkt.set_do_not_care_scapy(Ether, "src") + exp_pkt.set_do_not_care_scapy(IP, "tos") + exp_pkt.set_do_not_care_scapy(IP, "ttl") + exp_pkt.set_do_not_care_scapy(IP, "chksum") return pkt, exp_pkt @@ -1180,9 +1218,9 @@ def build_ipv6_packet_to_server(duthost, ptfadapter, target_server_ip): dump_scapy_packet_show_output(pkt) ) exp_pkt = mask.Mask(pkt) - exp_pkt.set_do_not_care_scapy(scapyall.Ether, "dst") - exp_pkt.set_do_not_care_scapy(scapyall.Ether, "src") - exp_pkt.set_do_not_care_scapy(scapyall.IPv6, "hlim") + exp_pkt.set_do_not_care_scapy(Ether, "dst") + exp_pkt.set_do_not_care_scapy(Ether, "src") + exp_pkt.set_do_not_care_scapy(IPv6, "hlim") return pkt, exp_pkt @@ -1195,17 +1233,18 @@ def build_packet_to_server(duthost, ptfadapter, target_server_ip): @contextlib.contextmanager -def crm_neighbor_checker(duthost): +def crm_neighbor_checker(duthost, ip_version="ipv4", expect_change=False): + resource_name = "{}_neighbor".format(ip_version) crm_facts_before = duthost.get_crm_facts() - ipv4_neighbor_before = crm_facts_before["resources"]["ipv4_neighbor"]["used"] - logging.info("ipv4 neighbor before test: %s", ipv4_neighbor_before) + neighbor_before = crm_facts_before["resources"][resource_name]["used"] + logging.info("{} neighbor before test: {}".format(ip_version, neighbor_before)) yield time.sleep(crm_facts_before["polling_interval"]) crm_facts_after = duthost.get_crm_facts() - ipv4_neighbor_after = crm_facts_after["resources"]["ipv4_neighbor"]["used"] - logging.info("ipv4 neighbor after test: %s", ipv4_neighbor_after) - if ipv4_neighbor_after != ipv4_neighbor_before: - raise ValueError("ipv4 neighbor differs, before %s, after %s", ipv4_neighbor_before, ipv4_neighbor_after) + neighbor_after = crm_facts_after["resources"][resource_name]["used"] + logging.info("{} neighbor after test: {}".format(ip_version, neighbor_after)) + if neighbor_after != neighbor_before and not expect_change: + raise ValueError("{} neighbor differs, before {}, after {}".format(ip_version, neighbor_before, neighbor_after)) def get_ptf_server_intf_index(tor, tbinfo, iface): @@ -1253,12 +1292,15 @@ def add_nexthop_routes(standby_tor, route_dst, nexthops=None): logging.info("Route added to {}: {}".format(standby_tor.hostname, route_cmd)) -def remove_static_routes(standby_tor, active_tor_loopback_ip): +def remove_static_routes(duthost, route_dst): """ - Remove static routes for active tor + Remove static routes for duthost """ - logger.info("Removing dual ToR peer switch static route") - standby_tor.shell('ip route del {}/32'.format(active_tor_loopback_ip), module_ignore_errors=True) + route_dst = ipaddress.ip_address(route_dst.decode()) + subnet_mask_len = 32 if route_dst.version == 4 else 128 + + logger.info("Removing dual ToR peer switch static route: {}/{}".format(str(route_dst), subnet_mask_len)) + duthost.shell('ip route del {}/{}'.format(str(route_dst), subnet_mask_len), module_ignore_errors=True) def increase_linkmgrd_probe_interval(duthosts, tbinfo): @@ -1277,3 +1319,120 @@ def increase_linkmgrd_probe_interval(duthosts, tbinfo): .format(probe_interval_ms)) cmds.append("config save -y") duthosts.shell_cmds(cmds=cmds) + + +def update_linkmgrd_probe_interval(duthosts, tbinfo, probe_interval_ms): + ''' + Temporarily modify linkmgrd probe interval + ''' + if 'dualtor' not in tbinfo['topo']['name']: + return + + logger.info("Increase linkmgrd probe interval on {} to {}ms".format(duthosts, probe_interval_ms)) + cmds = [] + cmds.append('sonic-db-cli CONFIG_DB HSET "MUX_LINKMGR|LINK_PROBER" "interval_v4" "{}"'.format(probe_interval_ms)) + duthosts.shell_cmds(cmds=cmds) + + +@pytest.fixture(scope='module') +def dualtor_ports(request, duthosts, rand_one_dut_hostname, enum_frontend_asic_index): + # Fetch dual ToR ports + logger.info("Starting fetching dual ToR info") + + fetch_dual_tor_ports_script = "\ + local remap_enabled = redis.call('HGET', 'SYSTEM_DEFAULTS|tunnel_qos_remap', 'status')\ + if remap_enabled ~= 'enabled' then\ + return {}\ + end\ + local type = redis.call('HGET', 'DEVICE_METADATA|localhost', 'type')\ + local expected_neighbor_type\ + local expected_neighbor_suffix\ + if type == 'LeafRouter' then\ + expected_neighbor_type = 'ToRRouter'\ + expected_neighbor_suffix = 'T0'\ + else\ + if type == 'ToRRouter' then\ + local subtype = redis.call('HGET', 'DEVICE_METADATA|localhost', 'subtype')\ + if subtype == 'DualToR' then\ + expected_neighbor_type = 'LeafRouter'\ + expected_neighbor_suffix = 'T1'\ + end\ + end\ + end\ + if expected_neighbor_type == nil then\ + return {}\ + end\ + local result = {}\ + local all_ports_with_neighbor = redis.call('KEYS', 'DEVICE_NEIGHBOR|*')\ + for i = 1, #all_ports_with_neighbor, 1 do\ + local neighbor = redis.call('HGET', all_ports_with_neighbor[i], 'name')\ + if neighbor ~= nil and string.sub(neighbor, -2, -1) == expected_neighbor_suffix then\ + local peer_type = redis.call('HGET', 'DEVICE_NEIGHBOR_METADATA|' .. neighbor, 'type')\ + if peer_type == expected_neighbor_type then\ + table.insert(result, string.sub(all_ports_with_neighbor[i], 17, -1))\ + end\ + end\ + end\ + return result\ + " + + duthost = duthosts[rand_one_dut_hostname] + dut_asic = duthost.asic_instance(enum_frontend_asic_index) + dualtor_ports_str = dut_asic.run_redis_cmd(argv = ["sonic-db-cli", "CONFIG_DB", "eval", fetch_dual_tor_ports_script, "0"]) + if dualtor_ports_str: + dualtor_ports_set = set(dualtor_ports_str) + else: + dualtor_ports_set = set({}) + + logger.info("Finish fetching dual ToR info {}".format(dualtor_ports_set)) + + return dualtor_ports_set + +def is_tunnel_qos_remap_enabled(duthost): + """ + Check whether tunnel_qos_remap is enabled or not + """ + try: + tunnel_qos_remap_status = duthost.shell('sonic-cfggen -d -v \'SYSTEM_DEFAULTS.tunnel_qos_remap.status\'', module_ignore_errors=True)["stdout_lines"][0].decode("utf-8") + except IndexError: + return False + return "enabled" == tunnel_qos_remap_status + +@pytest.fixture(scope="session") +def mux_config(duthosts, tbinfo): + if 'dualtor' not in tbinfo['topo']['name']: + return {} + + # NOTE: assume both ToRs have the same mux config + duthost = duthosts[0] + cmd = 'show mux config --json' + return json.loads(duthost.shell(cmd)['stdout'])["MUX_CABLE"]["PORTS"] + +@pytest.fixture(scope="session") +def config_dualtor_arp_responder(tbinfo, duthost, mux_config, ptfhost): + """ + Apply standard ARP responder for dualtor testbeds + + In this case, ARP responder will reply to ARP requests and NA messages for the + server IPs configured in the ToR's config DB MUX_CABLE table + """ + ptfhost.copy(src=os.path.join(SCRIPTS_SRC_DIR, ARP_RESPONDER_PY), dest=OPT_DIR) + arp_responder_conf = {} + tor_to_ptf_intf_map = duthost.get_extended_minigraph_facts(tbinfo)['minigraph_ptf_indices'] + + for tor_intf, config_vals in mux_config.items(): + ptf_intf = "eth{}".format(tor_to_ptf_intf_map[tor_intf]) + arp_responder_conf[ptf_intf] = [ + str(ipaddress.ip_interface(config_vals["SERVER"]["IPv4"]).ip), + str(ipaddress.ip_interface(config_vals["SERVER"]["IPv6"]).ip)] + + ptfhost.copy(content=json.dumps(arp_responder_conf, indent=4, sort_keys=True), dest="/tmp/from_t1.json") + ptfhost.host.options["variable_manager"].extra_vars.update({"arp_responder_args": ""}) + ptfhost.template(src="templates/arp_responder.conf.j2", dest="/etc/supervisor/conf.d/arp_responder.conf") + + supervisor_cmd = "supervisorctl reread && supervisorctl update && supervisorctl restart arp_responder" + ptfhost.shell(supervisor_cmd) + + yield + + ptfhost.shell("supervisorctl stop arp_responder") diff --git a/tests/common/dualtor/mux_simulator_control.py b/tests/common/dualtor/mux_simulator_control.py index 0db8103bdc2..d1a374181c0 100644 --- a/tests/common/dualtor/mux_simulator_control.py +++ b/tests/common/dualtor/mux_simulator_control.py @@ -28,6 +28,8 @@ 'toggle_simulator_port_to_upper_tor', 'toggle_simulator_port_to_lower_tor', 'toggle_all_simulator_ports', + 'check_mux_status', + 'validate_check_result', ] logger = logging.getLogger(__name__) @@ -162,7 +164,7 @@ def _post(server_url, data): server_url = '{}?reqId={}'.format(server_url, uuid.uuid4()) # Add query string param reqId for debugging logger.debug('POST {} with {}'.format(server_url, data)) # lgtm [py/clear-text-logging-sensitive-data] headers = {'Accept': 'application/json', 'Content-Type': 'application/json'} - resp = requests.post(server_url, json=data, headers=headers) + resp = requests.post(server_url, json=data, headers=headers, timeout=10) logger.debug('Received response {}/{} with content {}'.format(resp.status_code, resp.reason, resp.text)) return resp.status_code == 200 except Exception as e: @@ -363,62 +365,50 @@ def toggle_all_simulator_ports_to_lower_tor(mux_server_url, tbinfo): _toggle_all_simulator_ports(mux_server_url, LOWER_TOR, tbinfo) -def _are_muxcables_active(duthost): - """Check if all the muxcables are active on the duthost. - - Example output of "show muxcable status --json" - { - "MUX_CABLE": { - "Ethernet0": { - "STATUS": "active", - "HEALTH": "unhealthy" - }, - "Ethernet4": { - "STATUS": "active", - "HEALTH": "unhealthy" - }, - "Ethernet8": { - "STATUS": "active", - "HEALTH": "unhealthy" - }, - ... - } +def _probe_mux_ports(duthosts, ports): + """Probe the mux from the duthost.""" + probe_cmd = "sonic-db-cli APPL_DB hset MUX_CABLE_COMMAND_TABLE:%s command probe" + cmds = [probe_cmd % port for port in ports] + for duthost in duthosts: + duthost.shell_cmds(cmds=cmds) - Args: - duthost (ojb): Object for interacting with DUT. - Returns: - bool: True if all mux cables are active on DUT. False if not. - """ +def _get_mux_ports(duthost, target_status=None, exclude_status=None): + """Get mux ports that has expected mux status.""" + def _check_status(mux_status): + return ((target_status is None or target_status == mux_status) and (exclude_status is None or exclude_status != mux_status)) + muxcables = json.loads(duthost.shell("show muxcable status --json")['stdout']) - inactive_muxcables = [intf for intf, muxcable in muxcables['MUX_CABLE'].items() if muxcable['STATUS'] != 'active'] - if len(inactive_muxcables) > 0: - logger.info('Found muxcables not active on {}: {}'.format(duthost.hostname, json.dumps(inactive_muxcables))) - return False - else: - return True + return {port:mux_status for port, mux_status in muxcables['MUX_CABLE'].items() if _check_status(mux_status["STATUS"])} -@pytest.fixture -def toggle_all_simulator_ports_to_rand_selected_tor(duthosts, mux_server_url, tbinfo, rand_one_dut_hostname): - """ - A function level fixture to toggle all ports to randomly selected tor +def _toggle_all_simulator_ports_to_target_dut(target_dut_hostname, duthosts, mux_server_url, tbinfo): + """Helper function to toggle all ports to active on the target DUT.""" - For this fixture to work properly, ICMP responder must be running. Please ensure that fixture run_icmp_responder - is imported in test script. The run_icmp_responder fixture is defined in tests.common.fixtures.ptfhost_utils - """ - # Skip on non dualtor testbed - if 'dualtor' not in tbinfo['topo']['name']: - return - logger.info("Toggling mux cable to {}".format(rand_one_dut_hostname)) - duthost = duthosts[rand_one_dut_hostname] - dut_index = tbinfo['duts'].index(rand_one_dut_hostname) + def _check_toggle_done(duthosts, target_dut_hostname, probe=False): + duthost = duthosts[target_dut_hostname] + inactive_ports = _get_mux_ports(duthost, exclude_status="active") + if not inactive_ports: + return True + + # NOTE: if ICMP responder is not running, linkmgrd is stuck in waiting for heartbeats and + # the mux probe interval is backed off. Adding a probe here to notify linkmgrd to shorten + # the wait for linkmgrd's sync with the mux. + if probe: + _probe_mux_ports(duthosts, list(inactive_ports.keys())) + + logger.info('Found muxcables not active on {}: {}'.format(duthost.hostname, json.dumps(list(inactive_ports.keys())))) + return False + + logging.info("Toggling mux cable to {}".format(target_dut_hostname)) + dut_index = tbinfo['duts'].index(target_dut_hostname) if dut_index == 0: data = {"active_side": UPPER_TOR} else: data = {"active_side": LOWER_TOR} # Allow retry for mux cable toggling + is_toggle_done = False for attempt in range(1, 4): logger.info('attempt={}, toggle active side of all muxcables to {} from mux simulator'.format( attempt, @@ -426,14 +416,31 @@ def toggle_all_simulator_ports_to_rand_selected_tor(duthosts, mux_server_url, tb )) _post(mux_server_url, data) time.sleep(5) - if _are_muxcables_active(duthost): + if _check_toggle_done(duthosts, target_dut_hostname): + is_toggle_done = True break - else: - pytest_assert(False, "Failed to toggle all ports to {} from mux simulator".format(rand_one_dut_hostname)) + + if not is_toggle_done and not utilities.wait_until(120, 10, 0, _check_toggle_done, duthosts, target_dut_hostname, probe=True): + pytest_assert(False, "Failed to toggle all ports to {} from mux simulator".format(target_dut_hostname)) @pytest.fixture -def toggle_all_simulator_ports_to_rand_unselected_tor(mux_server_url, tbinfo, rand_one_dut_hostname): +def toggle_all_simulator_ports_to_rand_selected_tor(duthosts, mux_server_url, tbinfo, rand_one_dut_hostname): + """ + A function level fixture to toggle all ports to randomly selected tor + + For this fixture to work properly, ICMP responder must be running. Please ensure that fixture run_icmp_responder + is imported in test script. The run_icmp_responder fixture is defined in tests.common.fixtures.ptfhost_utils + """ + # Skip on non dualtor testbed + if 'dualtor' not in tbinfo['topo']['name']: + return + + _toggle_all_simulator_ports_to_target_dut(rand_one_dut_hostname, duthosts, mux_server_url, tbinfo) + + +@pytest.fixture +def toggle_all_simulator_ports_to_rand_unselected_tor(duthosts, rand_unselected_dut, mux_server_url, tbinfo): """ A function level fixture to toggle all ports to randomly unselected tor @@ -443,13 +450,8 @@ def toggle_all_simulator_ports_to_rand_unselected_tor(mux_server_url, tbinfo, ra # Skip on non dualtor testbed if 'dualtor' not in tbinfo['topo']['name']: return - dut_index = tbinfo['duts'].index(rand_one_dut_hostname) - if dut_index == 0: - data = {"active_side": LOWER_TOR} - else: - data = {"active_side": UPPER_TOR} - pytest_assert(_post(mux_server_url, data), "Failed to toggle all ports to the randomly unselected tor, the counterpart of {}".format(rand_one_dut_hostname)) + _toggle_all_simulator_ports_to_target_dut(rand_unselected_dut.hostname, duthosts, mux_server_url, tbinfo) @pytest.fixture @@ -481,26 +483,7 @@ def toggle_all_simulator_ports_to_rand_selected_tor_m(duthosts, mux_server_url, logger.info('Set all muxcable to manual mode on all ToRs') duthosts.shell('config muxcable mode manual all') - logger.info("Toggling mux cable to {}".format(rand_one_dut_hostname)) - duthost = duthosts[rand_one_dut_hostname] - dut_index = tbinfo['duts'].index(rand_one_dut_hostname) - if dut_index == 0: - data = {"active_side": UPPER_TOR} - else: - data = {"active_side": LOWER_TOR} - - # Allow retry for mux cable toggling - for attempt in range(1, 4): - logger.info('attempt={}, toggle active side of all muxcables to {} from mux simulator'.format( - attempt, - data['active_side'] - )) - _post(mux_server_url, data) - utilities.wait(5, 'Wait for DUT muxcable status to update after toggled from mux simulator') - if _are_muxcables_active(duthost): - break - else: - pytest_assert(False, "Failed to toggle all ports to {} from mux simulator".format(rand_one_dut_hostname)) + _toggle_all_simulator_ports_to_target_dut(rand_one_dut_hostname, duthosts, mux_server_url, tbinfo) yield @@ -538,7 +521,8 @@ def _check_mux_status_consistency(): return False # get mapping from port indices to mux status - simulator_port_mux_status = {int(k.split('-')[-1]):v for k,v in simulator_mux_status.items()} + simulator_port_mux_status = {int(k.split('-')[-1]): v for k, v in simulator_mux_status.items()} + inconsistent_intfs = [] for intf in upper_tor_mux_status['MUX_CABLE']: intf_index = port_indices[intf] if intf_index not in simulator_port_mux_status: @@ -558,6 +542,13 @@ def _check_mux_status_consistency(): intf, upper_tor_status, lower_tor_status, simulator_status ) logging.warn("Inconsistent mux status for interface %s", intf) + inconsistent_intfs.append(intf) + + # NOTE: if ICMP responder is not running, linkmgrd is stuck in waiting for heartbeats and + # the mux probe interval is backed off. Adding a probe here to notify linkmgrd to shorten + # the wait for linkmgrd's sync with the mux. + if inconsistent_intfs: + _probe_mux_ports(duthosts, inconsistent_intfs) return False return True @@ -569,7 +560,7 @@ def _check_mux_status_consistency(): mg_facts = upper_tor_host.get_extended_minigraph_facts(tbinfo) port_indices = mg_facts['minigraph_port_indices'] pytest_assert( - utilities.wait_until(30, 5, 0, _check_mux_status_consistency), + utilities.wait_until(120, 10, 10, _check_mux_status_consistency), "Mux status is inconsistent between the DUTs and mux simulator after toggle" ) @@ -658,3 +649,63 @@ def _get_mux_status(interface_name=None): return _get(url(interface_name=interface_name)) return _get_mux_status + +def check_mux_status(duthosts, active_side): + """Verify that status of muxcables are expected + This function runs "show muxcable status --json" on both ToRs. Before call this function, active side of all + mux cables must be toggled to one side of the ToR. Active side ToR should be indicated in argument "active_side". + This function will ensure that on one ToR, all the mux cables are active. On the other ToR, all the mux cable + should be standby. + Args: + duthosts (list): List of duthost objects + active_side (str): Active side of all mux cables, either UPPER_TOR or LOWER_TOR + Returns: + bool: True if check passed. Otherwise, return False. + """ + if active_side == UPPER_TOR: + mux_active_dut = duthosts[0] + mux_standby_dut = duthosts[1] + else: + mux_active_dut = duthosts[1] + mux_standby_dut = duthosts[0] + + active_side_muxstatus = json.loads(mux_active_dut.shell("show muxcable status --json")['stdout']) + standby_side_muxstatus = json.loads(mux_standby_dut.shell("show muxcable status --json")['stdout']) + + active_side_active_muxcables = [intf for intf, muxcable in active_side_muxstatus['MUX_CABLE'].items() if muxcable['STATUS'] == 'active'] + active_side_standby_muxcables = [intf for intf, muxcable in active_side_muxstatus['MUX_CABLE'].items() if muxcable['STATUS'] == 'standby'] + + standby_side_active_muxcables = [intf for intf, muxcable in standby_side_muxstatus['MUX_CABLE'].items() if muxcable['STATUS'] == 'active'] + standby_side_standby_muxcables = [intf for intf, muxcable in standby_side_muxstatus['MUX_CABLE'].items() if muxcable['STATUS'] == 'standby'] + + if len(active_side_active_muxcables) > 0 and \ + len(active_side_standby_muxcables) == 0 and \ + len(standby_side_active_muxcables) == 0 and \ + len(standby_side_standby_muxcables) > 0 and \ + set(active_side_active_muxcables) == set(standby_side_standby_muxcables): + logger.info('Check mux status on DUTs passed') + return True + else: + logger.info('Unexpected mux status. active_side={}'.format(active_side)) + logger.info('Active side active muxcables: {}'.format(active_side_active_muxcables)) + logger.info('Active side standby muxcables: {}'.format(active_side_standby_muxcables)) + logger.info('Standby side active muxcables: {}'.format(standby_side_active_muxcables)) + logger.info('Standby side standby muxcables: {}'.format(standby_side_standby_muxcables)) + logger.info('Check mux status on DUTs failed') + return False + +def validate_check_result(check_result, duthosts, get_mux_status): + """If check_result is False, collect some log and fail the test. + Args: + check_result (bool): Check result + duthosts (list): List of duthost objects. + """ + if not check_result: + duthosts.shell('show muxcable config') + duthosts.shell('show muxcable status') + simulator_muxstatus = get_mux_status() + if simulator_muxstatus is not None: + logger.info('Mux status from mux simulator: {}'.format(json.dumps(simulator_muxstatus))) + else: + logger.error('Failed to get mux status from mux simulator') + pytest.fail('Toggle mux from simulator test failed') diff --git a/tests/common/dualtor/tor_failure_utils.py b/tests/common/dualtor/tor_failure_utils.py index 85ea755d391..c3e9f7d31f5 100644 --- a/tests/common/dualtor/tor_failure_utils.py +++ b/tests/common/dualtor/tor_failure_utils.py @@ -11,6 +11,7 @@ import pytest import logging import time +import contextlib logger = logging.getLogger(__name__) @@ -139,3 +140,28 @@ def wait_for_device_reachable(duthost, timeout=300): logger.info("SSH started on {}".format((duthost.hostname))) return wait_for_device_reachable + +@contextlib.contextmanager +def shutdown_bgp_sessions_on_duthost(): + """Shutdown all BGP sessions on a device""" + duthosts = [] + + def _shutdown_bgp_sessions_on_duthost(duthost): + duthosts.append(duthost) + logger.info("Shutdown all BGP sessions on {}".format(duthost.hostname)) + duthost.shell("config bgp shutdown all") + + try: + yield _shutdown_bgp_sessions_on_duthost + finally: + time.sleep(1) + for duthost in duthosts: + logger.info("Startup all BGP sessions on {}".format(duthost.hostname)) + duthost.shell("config bgp startup all") + + +@pytest.fixture +def shutdown_bgp_sessions(): + """Shutdown all bgp sessions on a device.""" + with shutdown_bgp_sessions_on_duthost() as shutdown_util: + yield shutdown_util \ No newline at end of file diff --git a/tests/common/dualtor/tunnel_traffic_utils.py b/tests/common/dualtor/tunnel_traffic_utils.py index 51a89f9b8be..4e82d7d4e50 100644 --- a/tests/common/dualtor/tunnel_traffic_utils.py +++ b/tests/common/dualtor/tunnel_traffic_utils.py @@ -4,54 +4,116 @@ import operator import pytest import re +import json from ptf import mask, testutils from scapy.all import IP, IPv6, Ether from tests.common.dualtor import dual_tor_utils from tests.common.utilities import dump_scapy_packet_show_output from tests.common.utilities import wait_until -from tests.common.helpers.assertions import pytest_assert - - -def derive_queue_id_from_dscp(dscp): - """ Derive queue id form DSCP using following mapping - DSCP -> Queue mapping - 8 0 - 5 2 - 3 3 - 4 4 - 46 5 - 48 6 - Rest 1 - """ +from tests.common.dualtor.dual_tor_utils import is_tunnel_qos_remap_enabled - dscp_to_queue = { 8 : 0, 5 : 2, 3 : 3, 4 : 4, 46 : 5, 48 : 6} - return dscp_to_queue.get(dscp, 1) +def dut_dscp_tc_queue_maps(duthost): + """ + A module level fixture to get QoS map from DUT host. + Return a dict + { + "dscp_to_tc_map": { + "AZURE": { + "0": "1", + ... + }, + ... + }, + "tc_to_queue_map": { + "AZURE": { + "0": "0", + ... + }, + ... + }, + "tc_to_dscp_map": { + "AZURE_TUNNEL": { + "0": "8", + ... + } + } + } + or an empty dict if failed to parse the output + """ + maps = {} + try: + # dscp_to_tc_map + maps['dscp_to_tc_map'] = json.loads(duthost.shell("sonic-cfggen -d --var-json 'DSCP_TO_TC_MAP'")['stdout']) + # tc_to_queue_map + maps['tc_to_queue_map'] = json.loads(duthost.shell("sonic-cfggen -d --var-json 'TC_TO_QUEUE_MAP'")['stdout']) + # tc_to_dscp_map + maps['tc_to_dscp_map'] = json.loads(duthost.shell("sonic-cfggen -d --var-json 'TC_TO_DSCP_MAP'")['stdout']) + except Exception as e: + logging.error("Failed to retrieve map on {}, exception {}".format(duthost.hostname, repr(e))) + return maps + +def derive_queue_id_from_dscp(duthost, dscp, is_tunnel): + """ + Helper function to find Queue ID for a DSCP ID. + """ + if is_tunnel_qos_remap_enabled(duthost) and is_tunnel: + dscp_to_tc_map_name = "AZURE" + tc_to_queue_map_name = "AZURE_TUNNEL" + logging.info("Enable pcbb") + else: + dscp_to_tc_map_name = "AZURE" + tc_to_queue_map_name = "AZURE" + try: + map = dut_dscp_tc_queue_maps(duthost) + # Load dscp_to_tc_map + tc_id = map['dscp_to_tc_map'][dscp_to_tc_map_name][str(dscp)] + # Load tc_to_queue_map + queue_id = map['tc_to_queue_map'][tc_to_queue_map_name][str(tc_id)] + except Exception as e: + logging.error("Failed to retrieve queue id for dscp {} on {}, exception {}".format(dscp, duthost.hostname, repr(e))) + return + return int(queue_id) + + +def derive_out_dscp_from_inner_dscp(duthost, inner_dscp): + """ + Helper function to find outer DSCP ID for a inner DSCP ID. + """ + if is_tunnel_qos_remap_enabled(duthost): + tc_to_dscp_map_name = "AZURE_TUNNEL" + map = dut_dscp_tc_queue_maps(duthost) + # Load tc_to_dscp_map + dscp_id = map['tc_to_dscp_map'][tc_to_dscp_map_name][str(inner_dscp)] + return int(dscp_id) + else: + return inner_dscp -def queue_stats_check(dut, exp_queue): +def queue_stats_check(dut, exp_queue, packet_count): queue_counter = dut.shell('show queue counters | grep "UC"')['stdout'] logging.debug('queue_counter:\n{}'.format(queue_counter)) + # In case of other noise packets + DIFF = 0.1 """ regex search will look for following pattern in queue_counter outpute ----------------------------------------------------------------------------_--- Port TxQ Counter/pkts Counter/bytes Drop/pkts Drop/bytes ----------- ----- -------------- --------------- ----------- -------------- - Ethernet124 UC1 10 1000 0 0 + Ethernet124 UC1 100 12,400 0 0 """ - result = re.findall(r'\S+\s+UC%d\s+10+\s+\S+\s+\S+\s+\S+' % exp_queue, queue_counter) + result = re.findall(r'\S+\s+UC%d\s+(\d+)+\s+\S+\s+\S+\s+\S+' % exp_queue, queue_counter) if result: - for line in result: - rec_queue = int(line.split()[1][2]) - if rec_queue != exp_queue: - logging.debug("the expected Queue : {} not matching with received Queue : {}".format(exp_queue, rec_queue)) - else: - logging.info("the expected Queue : {} matching with received Queue : {}".format(exp_queue, rec_queue)) + for number in result: + if int(number) <= packet_count * (1 + DIFF) and int(number) >= packet_count: + logging.info("the expected Queue : {} received expected numbers of packet {}".format(exp_queue, number)) return True + logging.debug("the expected Queue : {} did not receive expected numbers of packet : {}".format(exp_queue, packet_count)) + return False else: - logging.debug("Could not find queue counter matches.") + logging.debug("Could not find expected queue counter matches.") return False @@ -122,8 +184,7 @@ def _check_ttl(packet): return "outer packet's TTL expected TTL 255, actual %s" % outer_ttl return "" - @staticmethod - def _check_tos(packet): + def _check_tos(self, packet): """Check ToS field in the packet.""" def _disassemble_ip_tos(tos): @@ -142,13 +203,13 @@ def _disassemble_ip_tos(tos): logging.info("Outer packet DSCP: {0:06b}, inner packet DSCP: {1:06b}".format(outer_dscp, inner_dscp)) logging.info("Outer packet ECN: {0:02b}, inner packet ECN: {0:02b}".format(outer_ecn, inner_ecn)) check_res = [] - if outer_dscp != inner_dscp: - check_res.append("outer packet DSCP not same as inner packet DSCP") + expected_outer_dscp = derive_out_dscp_from_inner_dscp(self.standby_tor, inner_dscp) + if outer_dscp != expected_outer_dscp: + check_res.append("outer packet DSCP {0:06b} not same as expected packet DSCP {0:06b}".format(outer_dscp, expected_outer_dscp)) if outer_ecn != inner_ecn: - check_res.append("outer packet ECN not same as inner packet ECN") + check_res.append("outer packet ECN {0:02b} not same as inner packet ECN {0:02b}".format(outer_ecn, inner_ecn)) return " ,".join(check_res) - @staticmethod def _check_queue(self, packet): """Check queue for encap packet.""" @@ -163,18 +224,17 @@ def _disassemble_ip_tos(tos): else: return "Not a valid IPinIP or IPv6inIP tunnel packet" - outer_dscp, outer_ecn = _disassemble_ip_tos(outer_tos) - inner_dscp, inner_ecn = _disassemble_ip_tos(inner_tos) + outer_dscp, _ = _disassemble_ip_tos(outer_tos) + inner_dscp, _ = _disassemble_ip_tos(inner_tos) logging.info("Outer packet DSCP: {0:06b}, inner packet DSCP: {1:06b}".format(outer_dscp, inner_dscp)) check_res = [] - if outer_dscp != inner_dscp: - check_res.append("outer packet DSCP not same as inner packet DSCP") - exp_queue = derive_queue_id_from_dscp(outer_dscp) - - pytest_assert(wait_until(60, 5, 0, queue_stats_check, self.standby_tor, exp_queue)) - return check_res + exp_queue = derive_queue_id_from_dscp(self.standby_tor, inner_dscp, True) + logging.info("Expect queue: %s", exp_queue) + if not wait_until(60, 5, 0, queue_stats_check, self.standby_tor, exp_queue, self.packet_count): + check_res.append("no expect counter in the expected queue %s" % exp_queue) + return " ,".join(check_res) - def __init__(self, standby_tor, active_tor=None, existing=True, inner_packet=None): + def __init__(self, standby_tor, active_tor=None, existing=True, inner_packet=None, check_items=("ttl", "tos", "queue"), packet_count=10): """ Init the tunnel traffic monitor. @@ -185,6 +245,7 @@ def __init__(self, standby_tor, active_tor=None, existing=True, inner_packet=Non self.standby_tor = standby_tor self.listen_ports = sorted(self._get_t1_ptf_port_indexes(standby_tor, tbinfo)) self.ptfadapter = ptfadapter + self.packet_count = packet_count standby_tor_cfg_facts = self.standby_tor.config_facts( host=self.standby_tor.hostname, source="running" @@ -206,6 +267,7 @@ def __init__(self, standby_tor, active_tor=None, existing=True, inner_packet=Non self.inner_packet = inner_packet self.exp_pkt = self._build_tunnel_packet(self.standby_tor_lo_addr, self.active_tor_lo_addr, inner_packet=self.inner_packet) self.rec_pkt = None + self.check_items = check_items def __enter__(self): # clear queue counters before IO to ensure _check_queue could get more precise result @@ -235,17 +297,16 @@ def __exit__(self, *exc_info): logging.info("Encapsulated packet:\n%s", dump_scapy_packet_show_output(self.rec_pkt)) if not self.existing: raise RuntimeError("Detected tunnel traffic from host %s." % self.standby_tor.hostname) - ttl_check_res = self._check_ttl(self.rec_pkt) - tos_check_res = self._check_tos(self.rec_pkt) - queue_check_res = self._check_queue(self, self.rec_pkt) - check_res = [] - if ttl_check_res: - check_res.append(ttl_check_res) - if tos_check_res: - check_res.append(tos_check_res) - if queue_check_res: - check_res.append(queue_check_res) - if check_res: - raise ValueError(", ".join(check_res) + ".") + + check_result = [] + for check_item in self.check_items: + check_func = getattr(self, "_check_%s" % check_item, None) + if check_func is not None: + result = check_func(self.rec_pkt) + if result: + check_result.append(result) + + if check_result: + raise ValueError(", ".join(check_result) + ".") return TunnelTrafficMonitor diff --git a/tests/common/fixtures/advanced_reboot.py b/tests/common/fixtures/advanced_reboot.py index f76edfa3b66..0494735e113 100644 --- a/tests/common/fixtures/advanced_reboot.py +++ b/tests/common/fixtures/advanced_reboot.py @@ -6,6 +6,7 @@ import pytest import time import os +import traceback from tests.common.mellanox_data import is_mellanox_device as isMellanoxDevice from tests.common.platform.ssh_utils import prepare_testbed_ssh_keys as prepareTestbedSshKeys @@ -13,6 +14,8 @@ from tests.common.helpers.sad_path import SadOperation from tests.ptf_runner import ptf_runner from tests.common.helpers.assertions import pytest_assert +from tests.common.utilities import InterruptableThread +from tests.common.fixtures.duthost_utils import check_bgp_router_id logger = logging.getLogger(__name__) @@ -23,15 +26,16 @@ REBOOT_CASE_TIMEOUT = 1800 class AdvancedReboot: - ''' + """ AdvancedReboot is used to perform reboot dut while running preboot/inboot operations This class collects information about the current testbed. This information is used by test cases to build - inboot/preboot list. The class transfers number of configuration files to the dut/ptf in preparation for reboot test. + inboot/preboot list. The class transfers number of config files to the dut/ptf in preparation for reboot test. Test cases can trigger test start utilizing runRebootTestcase API. - ''' - def __init__(self, request, duthost, ptfhost, localhost, tbinfo, creds, **kwargs): - ''' + """ + + def __init__(self, request, duthosts, duthost, ptfhost, localhost, tbinfo, creds, **kwargs): + """ Class constructor. @param request: pytest request object @param duthost: AnsibleHost instance of DUT @@ -39,10 +43,10 @@ def __init__(self, request, duthost, ptfhost, localhost, tbinfo, creds, **kwargs @param localhost: Localhost for interacting with localhost through ansible @param tbinfo: fixture provides information about testbed @param kwargs: extra parameters including reboot type - ''' - assert 'rebootType' in kwargs and kwargs['rebootType'] in ['fast-reboot', 'warm-reboot', 'warm-reboot -f'], ( + """ + assert 'rebootType' in kwargs and ('warm-reboot' in kwargs['rebootType'] or + 'fast-reboot' in kwargs['rebootType']), \ "Please set rebootType var." - ) if duthost.facts['platform'] == 'x86_64-kvm_x86_64-r0': # Fast and Warm-reboot procedure now test if "docker exec" works. @@ -70,6 +74,7 @@ def __init__(self, request, duthost, ptfhost, localhost, tbinfo, creds, **kwargs self.kvmTest = False self.request = request + self.duthosts = duthosts self.duthost = duthost self.ptfhost = ptfhost self.localhost = localhost @@ -77,7 +82,8 @@ def __init__(self, request, duthost, ptfhost, localhost, tbinfo, creds, **kwargs self.creds = creds self.moduleIgnoreErrors = kwargs["allow_fail"] if "allow_fail" in kwargs else False self.allowMacJump = kwargs["allow_mac_jumping"] if "allow_mac_jumping" in kwargs else False - self.advanceboot_loganalyzer = kwargs["advanceboot_loganalyzer"] if "advanceboot_loganalyzer" in kwargs else None + self.advanceboot_loganalyzer = kwargs["advanceboot_loganalyzer"] if "advanceboot_loganalyzer"\ + in kwargs else None self.__dict__.update(kwargs) self.__extractTestParam() self.rebootData = {} @@ -89,9 +95,9 @@ def __init__(self, request, duthost, ptfhost, localhost, tbinfo, creds, **kwargs self.__buildTestbedData(tbinfo) def __extractTestParam(self): - ''' + """ Extract test parameters from pytest request object. Note that all the parameters have default values. - ''' + """ self.vnet = self.request.config.getoption("--vnet") self.vnetPkts = self.request.config.getoption("--vnet_pkts") self.rebootLimit = self.request.config.getoption("--reboot_limit") @@ -108,47 +114,47 @@ def __extractTestParam(self): # Set default reboot limit if it is not given if self.rebootLimit is None: if self.kvmTest: - self.rebootLimit = 200 # Default reboot limit for kvm + self.rebootLimit = 200 # Default reboot limit for kvm elif 'warm-reboot' in self.rebootType: self.rebootLimit = 0 else: - self.rebootLimit = 30 # Default reboot limit for physical devices + self.rebootLimit = 30 # Default reboot limit for physical devices def getHostMaxLen(self): - ''' + """ Accessor method for hostMaxLen - ''' + """ # Number of VMS - 1 return self.hostMaxLen def getlagMemberCnt(self): - ''' + """ Accessor method for lagMemberCnt - ''' + """ return self.lagMemberCnt def getVlanMaxCnt(self): - ''' + """ Accessor method for vlanMaxCnt - ''' + """ return self.vlanMaxCnt def getHostMaxCnt(self): - ''' + """ Accessor method for hostMaxCnt - ''' + """ return self.hostMaxCnt def getTestbedType(self): - ''' + """ Accessor method for testbed's topology name - ''' + """ return self.tbinfo['topo']['name'] def __buildTestbedData(self, tbinfo): - ''' + """ Build testbed data that are needed by ptf advanced-reboot.ReloadTest class - ''' + """ self.mgFacts = self.duthost.get_extended_minigraph_facts(tbinfo) @@ -162,6 +168,16 @@ def __buildTestbedData(self, tbinfo): self.rebootData['dut_hostname'] = self.mgFacts['minigraph_mgmt_interface']['addr'] self.rebootData['dut_mac'] = self.duthost.facts['router_mac'] + vlan_mac = self.rebootData['dut_mac'] + config_facts = self.duthost.get_running_config_facts() + vlan_table = config_facts.get('VLAN', None) + if vlan_table: + vlan_name = list(vlan_table.keys())[0] + vlan_mac = vlan_table[vlan_name].get('mac', self.rebootData['dut_mac']) + self.rebootData['vlan_mac'] = vlan_mac + self.rebootData['lo_prefix'] = "%s/%s" % (self.mgFacts['minigraph_lo_interfaces'][0]['addr'], + self.mgFacts['minigraph_lo_interfaces'][0]['prefixlen']) + vlan_ip_range = dict() for vlan in self.mgFacts['minigraph_vlan_interfaces']: if type(ipaddress.ip_network(vlan['subnet'])) is ipaddress.IPv4Network: @@ -173,7 +189,8 @@ def __buildTestbedData(self, tbinfo): # Change network of the dest IP addresses (used by VM servers) to be different from Vlan network prefixLen = self.mgFacts['minigraph_vlan_interfaces'][0]['prefixlen'] - 3 - testNetwork = ipaddress.ip_address(self.mgFacts['minigraph_vlan_interfaces'][0]['addr']) + (1 << (32 - prefixLen)) + testNetwork = ipaddress.ip_address(self.mgFacts['minigraph_vlan_interfaces'][0]['addr']) + \ + (1 << (32 - prefixLen)) self.rebootData['default_ip_range'] = str( ipaddress.ip_interface(unicode(str(testNetwork) + '/{0}'.format(prefixLen))).network ) @@ -183,9 +200,9 @@ def __buildTestbedData(self, tbinfo): break def __updateNextHopIps(self): - ''' + """ Update next hop IPs - ''' + """ if self.inbootList is not None: self.rebootData['nexthop_ips'] = [ self.tbinfo['topo']['properties']['configuration_properties']['common']['nhipv4'], @@ -195,9 +212,9 @@ def __updateNextHopIps(self): self.rebootData['nexthop_ips'] = None def __validateAndBuildSadList(self): - ''' + """ Validate sad list (preboot/inboot lists) member data - ''' + """ prebootList = [] if self.prebootList is None else self.prebootList inbootList = [] if self.inbootList is None else self.inbootList sadList = [item for item in itertools.chain(prebootList, inbootList)] @@ -230,11 +247,11 @@ def __validateAndBuildSadList(self): self.rebootData['sadList'] = sadList if len(sadList) > 0 else [None] def __transferTestDataFiles(self, data, ansibleHost): - ''' + """ Convert data into json format and transfers json file to ansible host (ptfhost/duthost) @param data: map that includedata source and json file name @param ansibleHost: Ansible host that is receiving this data - ''' + """ for item in data: data_source = item['source'] filename = '/tmp/' + item['name'] + '.json' @@ -246,39 +263,39 @@ def __transferTestDataFiles(self, data, ansibleHost): self.rebootData[item['name'] + '_file'] = filename def __runScript(self, scripts, ansibleHost): - ''' + """ Run script on an Ansibl host @param scripts: list of script names to be run on Ansible host @param ansibleHost: Ansible host to run the scripts on - ''' + """ # this could be done using script API from ansible modules for script in scripts: logger.info('Running script {0} on {1}'.format(script, ansibleHost.hostname)) ansibleHost.script('scripts/' + script) def __prepareTestbedSshKeys(self): - ''' + """ Prepares testbed ssh keys by generating ssh key on ptf host and adding this key to known_hosts on duthost - ''' + """ prepareTestbedSshKeys(self.duthost, self.ptfhost, self.rebootData['dut_username']) def __handleMellanoxDut(self): - ''' + """ Handle Mellanox DUT reboot when upgrading from SONiC-OS-201803 to SONiC-OS-201811 - ''' + """ if self.newSonicImage is not None and \ - self.rebootType == 'fast-reboot' and \ - isMellanoxDevice(self.duthost): + self.rebootType == 'fast-reboot' and \ + isMellanoxDevice(self.duthost): logger.info('Handle Mellanox platform') nextImage = self.duthost.shell('sonic_installer list | grep Next | cut -f2 -d " "')['stdout'] if 'SONiC-OS-201803' in self.currentImage and 'SONiC-OS-201811' in nextImage: self.__runScript(['upgrade_mlnx_fw.sh'], self.duthost) def __updateAndRestartArpResponder(self, item=None): - ''' + """ Update ARP responder configuration data based on the inboot/preboot operation (item) @param item: inboot/preboot operation - ''' + """ arp_responder_args = '-e' if item is not None: arp_responder_args += ' -c /tmp/from_t1_{0}.json'.format(item) @@ -291,9 +308,9 @@ def __updateAndRestartArpResponder(self, item=None): self.ptfhost.shell('supervisorctl reread && supervisorctl update') def __handleRebootImage(self): - ''' + """ Download and install new image to DUT - ''' + """ if self.newSonicImage is None: self.newImage = False return @@ -325,9 +342,9 @@ def __handleRebootImage(self): self.duthost.shell('rm -f {}'.format(tempfile)) def __setupTestbed(self): - ''' + """ Sets testbed up. It tranfers test data files, ARP responder, and runs script to update IPs and MAC addresses. - ''' + """ self.__runScript(['remove_ip.sh'], self.ptfhost) self.__prepareTestbedSshKeys() @@ -341,9 +358,9 @@ def __setupTestbed(self): self.duthost.copy(src='scripts/fast-reboot', dest='/usr/bin/') def __clearArpAndFdbTables(self): - ''' + """ Clears ARP and FDB entries - ''' + """ logger.info('Clearing arp entries on DUT {}'.format(self.duthost.hostname)) self.duthost.shell('sonic-clear arp') @@ -351,31 +368,35 @@ def __clearArpAndFdbTables(self): self.duthost.shell('sonic-clear fdb all') def __fetchTestLogs(self, rebootOper=None): - ''' + """ Fetch test logs from duthost and ptfhost after individual test run - ''' + """ if rebootOper: dir_name = "{}_{}".format(self.request.node.name, rebootOper) else: dir_name = self.request.node.name - report_file_dir = os.path.realpath((os.path.join(os.path.dirname(__file__),\ - "../../logs/platform_tests/"))) + report_file_dir = os.path.realpath((os.path.join(os.path.dirname(__file__), "../../logs/platform_tests/"))) log_dir = os.path.join(report_file_dir, dir_name) if not os.path.exists(log_dir): os.makedirs(log_dir) log_dir = log_dir + "/" + if "warm" in self.rebootType: + # normalize "warm-reboot -f", "warm-reboot -c" to "warm-reboot" for report collection + reboot_file_prefix = "warm-reboot" + else: + reboot_file_prefix = self.rebootType if rebootOper is None: - rebootLog = '/tmp/{0}.log'.format(self.rebootType) - rebootReport = '/tmp/{0}-report.json'.format(self.rebootType) + rebootLog = '/tmp/{0}.log'.format(reboot_file_prefix) + rebootReport = '/tmp/{0}-report.json'.format(reboot_file_prefix) capturePcap = '/tmp/capture.pcap' filterPcap = '/tmp/capture_filtered.pcap' syslogFile = '/tmp/syslog' sairedisRec = '/tmp/sairedis.rec' swssRec = '/tmp/swss.rec' else: - rebootLog = '/tmp/{0}-{1}.log'.format(self.rebootType, rebootOper) - rebootReport = '/tmp/{0}-{1}-report.json'.format(self.rebootType, rebootOper) + rebootLog = '/tmp/{0}-{1}.log'.format(reboot_file_prefix, rebootOper) + rebootReport = '/tmp/{0}-{1}-report.json'.format(reboot_file_prefix, rebootOper) capturePcap = '/tmp/capture_{0}.pcap'.format(rebootOper) filterPcap = '/tmp/capture_filtered_{0}.pcap'.format(rebootOper) syslogFile = '/tmp/syslog_{0}'.format(rebootOper) @@ -384,9 +405,12 @@ def __fetchTestLogs(self, rebootOper=None): logger.info('Extract log files on dut host') dutLogFiles = [ - {'directory': '/var/log', 'file_prefix': 'syslog', 'start_string': 'Linux version', 'target_filename': syslogFile}, - {'directory': '/var/log/swss', 'file_prefix': 'sairedis.rec', 'start_string': 'recording on:', 'target_filename': sairedisRec}, - {'directory': '/var/log/swss', 'file_prefix': 'swss.rec', 'start_string': 'recording started', 'target_filename': swssRec}, + {'directory': '/var/log', 'file_prefix': 'syslog', 'start_string': 'Linux version', + 'target_filename': syslogFile}, + {'directory': '/var/log/swss', 'file_prefix': 'sairedis.rec', 'start_string': 'recording on:', + 'target_filename': sairedisRec}, + {'directory': '/var/log/swss', 'file_prefix': 'swss.rec', 'start_string': 'recording started', + 'target_filename': swssRec}, ] for logFile in dutLogFiles: self.duthost.extract_log(**logFile) @@ -411,12 +435,12 @@ def __fetchTestLogs(self, rebootOper=None): return log_dir def imageInstall(self, prebootList=None, inbootList=None, prebootFiles=None): - ''' + """ This method validates and prepares test bed for reboot test case. @param prebootList: list of operation to run before reboot process - @param inbootList: list of operation to run during reboot prcoess + @param inbootList: list of operation to run during reboot process @param prebootFiles: preboot files - ''' + """ self.prebootList = prebootList self.inbootList = inbootList self.prebootFiles = prebootFiles @@ -436,48 +460,114 @@ def imageInstall(self, prebootList=None, inbootList=None, prebootFiles=None): # Handle mellanox platform self.__handleMellanoxDut() + def move_logs_before_reboot(self): + source_dir = '/host/logs_before_reboot' + target_dir = '/var/log' + + command = "test -d {}".format(source_dir) + result = self.duthost.shell(command, module_ignore_errors=True) + + if result["rc"] == 0: + command = 'sudo mv ' + source_dir + ' ' + target_dir + result = self.duthost.shell(command, module_ignore_errors=True) + if result["rc"] == 0: + logger.info("Files under /host/logs_before_reboot copied successfully to {}.".format(target_dir)) + else: + logger.info("Failed to copy files under /host/logs_before_reboot successfully to {}.".format(target_dir)) + else: + logger.info("Directory {} does not exist.".format(source_dir)) + def runRebootTest(self): # Run advanced-reboot.ReloadTest for item in preboot/inboot list count = 0 result = True - failed_list = list() + test_results = dict() for rebootOper in self.rebootData['sadList']: count += 1 + test_case_name = str(self.request.node.name) + str(rebootOper) + test_results[test_case_name] = list() try: + if self.preboot_setup: + self.preboot_setup() if self.advanceboot_loganalyzer: pre_reboot_analysis, post_reboot_analysis = self.advanceboot_loganalyzer marker = pre_reboot_analysis() - self.__setupRebootOper(rebootOper) - result = self.__runPtfRunner(rebootOper) + event_counters = self.__setupRebootOper(rebootOper) + thread = InterruptableThread( + target=self.__runPtfRunner, + kwargs={"rebootOper": rebootOper}) + thread.daemon = True + thread.start() + # give the test REBOOT_CASE_TIMEOUT (1800s) to complete the reboot with IO, + # and then additional 300s to examine the pcap, logs and generate reports + ptf_timeout = REBOOT_CASE_TIMEOUT + 300 + thread.join(timeout=ptf_timeout, suppress_exception=True) + self.ptfhost.shell("pkill -f 'ptftests advanced-reboot.ReloadTest'", module_ignore_errors=True) + # the thread might still be running, and to catch any exceptions after pkill allow 10s to join + thread.join(timeout=10) self.__verifyRebootOper(rebootOper) + if self.duthost.num_asics() == 1 and not check_bgp_router_id(self.duthost, self.mgFacts): + test_results[test_case_name].append("Failed to verify BGP router identifier is Loopback0 on %s" % + self.duthost.hostname) + if self.postboot_setup: + self.postboot_setup() except Exception: - failed_list.append(rebootOper) + traceback_msg = traceback.format_exc() + logger.error("Exception caught while running advanced-reboot test on ptf: \n{}".format(traceback_msg)) + test_results[test_case_name].append("Exception caught while running advanced-reboot test on ptf") finally: - # always capture the test logs + # capture the test logs, and print all of them in case of failure, or a summary in case of success log_dir = self.__fetchTestLogs(rebootOper) + if self.advanceboot_loganalyzer: + self.move_logs_before_reboot() + verification_errors = post_reboot_analysis(marker, event_counters=event_counters, + reboot_oper=rebootOper, log_dir=log_dir) + if verification_errors: + logger.error("Post reboot verification failed. List of failures: {}" + .format('\n'.join(verification_errors))) + test_results[test_case_name].extend(verification_errors) self.__clearArpAndFdbTables() self.__revertRebootOper(rebootOper) - if self.advanceboot_loganalyzer: - post_reboot_analysis(marker, reboot_oper=rebootOper, log_dir=log_dir) - if len(self.rebootData['sadList']) > 1 and count != len(self.rebootData['sadList']): + if 1 < len(self.rebootData['sadList']) != count: time.sleep(TIME_BETWEEN_SUCCESSIVE_TEST_OPER) - pytest_assert(len(failed_list) == 0,\ - "Advanced-reboot failure. Failed test: {}, sub-cases: {}".format(self.request.node.name, failed_list)) + failed_list = [(testcase, failures) for testcase, failures in test_results.items() if len(failures) != 0] + pytest_assert(len(failed_list) == 0, "Advanced-reboot failure. Failed test: {}, " + "failure summary:\n{}".format(self.request.node.name, failed_list)) return result - def runRebootTestcase(self, prebootList=None, inbootList=None, - prebootFiles='peer_dev_info,neigh_port_info'): - ''' + def runRebootTestcase(self, prebootList=None, inbootList=None, prebootFiles='peer_dev_info,neigh_port_info', + preboot_setup=None, postboot_setup=None): + """ This method validates and prepares test bed for reboot test case. It runs the reboot test case using provided test arguments @param prebootList: list of operation to run before reboot process @param inbootList: list of operation to run during reboot prcoess @param prebootFiles: preboot files - ''' + """ + self.preboot_setup = preboot_setup + self.postboot_setup = postboot_setup self.imageInstall(prebootList, inbootList, prebootFiles) return self.runRebootTest() def __setupRebootOper(self, rebootOper): + if "dualtor" in self.getTestbedType(): + for device in self.duthosts: + device.shell("config mux mode manual all") + + down_ports = 0 + if "dut_lag_member_down" in str(rebootOper) \ + or "neigh_lag_member_down" in str(rebootOper) \ + or "vlan_port_down" in str(rebootOper) \ + or "neigh_vlan_member_down" in str(rebootOper): + down_ports = int(str(rebootOper)[-1]) + + event_counters = { + "SAI_CREATE_SWITCH": 1, + "INIT_VIEW": 1, + "APPLY_VIEW": 1, + "LAG_READY": len(self.mgFacts["minigraph_portchannels"]), + "PORT_READY": len(self.mgFacts["minigraph_ports"]) - down_ports, + } testData = { 'portchannel_interfaces': copy.deepcopy(self.mgFacts['minigraph_portchannels']), 'vlan_interfaces': copy.deepcopy(self.mgFacts['minigraph_vlans']), @@ -498,6 +588,7 @@ def __setupRebootOper(self, rebootOper): testDataFiles = [{'source': source, 'name': name} for name, source in testData.items()] self.__transferTestDataFiles(testDataFiles, self.ptfhost) + return event_counters def __verifyRebootOper(self, rebootOper): if isinstance(rebootOper, SadOperation): @@ -505,37 +596,43 @@ def __verifyRebootOper(self, rebootOper): rebootOper.verify() def __revertRebootOper(self, rebootOper): + if "dualtor" in self.getTestbedType(): + for device in self.duthosts: + device.shell("config mux mode auto all") + if isinstance(rebootOper, SadOperation): logger.info('Running revert handler for reboot operation {}'.format(rebootOper)) rebootOper.revert() def __runPtfRunner(self, rebootOper=None): - ''' + """ Run single PTF advanced-reboot.ReloadTest @param rebootOper:Reboot operation to conduct before/during reboot process - ''' + """ logger.info("Running PTF runner on PTF host: {0}".format(self.ptfhost)) - params={ - "dut_username" : self.rebootData['dut_username'], - "dut_password" : self.rebootData['dut_password'], - "dut_hostname" : self.rebootData['dut_hostname'], - "reboot_limit_in_seconds" : self.rebootLimit, - "reboot_type" : self.rebootType, - "portchannel_ports_file" : self.rebootData['portchannel_interfaces_file'], - "vlan_ports_file" : self.rebootData['vlan_interfaces_file'], - "ports_file" : self.rebootData['ports_file'], - "dut_mac" : self.rebootData['dut_mac'], - "default_ip_range" : self.rebootData['default_ip_range'], - "vlan_ip_range" : self.rebootData['vlan_ip_range'], - "lo_v6_prefix" : self.rebootData['lo_v6_prefix'], - "arista_vms" : self.rebootData['arista_vms'], - "nexthop_ips" : self.rebootData['nexthop_ips'], - "allow_vlan_flooding" : self.allowVlanFlooding, - "sniff_time_incr" : self.sniffTimeIncr, - "setup_fdb_before_test" : True, - "vnet" : self.vnet, - "vnet_pkts" : self.vnetPkts, + params = { + "dut_username": self.rebootData['dut_username'], + "dut_password": self.rebootData['dut_password'], + "dut_hostname": self.rebootData['dut_hostname'], + "reboot_limit_in_seconds": self.rebootLimit, + "reboot_type": self.rebootType, + "portchannel_ports_file": self.rebootData['portchannel_interfaces_file'], + "vlan_ports_file": self.rebootData['vlan_interfaces_file'], + "ports_file": self.rebootData['ports_file'], + "dut_mac": self.rebootData['dut_mac'], + "vlan_mac": self.rebootData['vlan_mac'], + "lo_prefix": self.rebootData['lo_prefix'], + "default_ip_range": self.rebootData['default_ip_range'], + "vlan_ip_range": self.rebootData['vlan_ip_range'], + "lo_v6_prefix": self.rebootData['lo_v6_prefix'], + "arista_vms": self.rebootData['arista_vms'], + "nexthop_ips": self.rebootData['nexthop_ips'], + "allow_vlan_flooding": self.allowVlanFlooding, + "sniff_time_incr": self.sniffTimeIncr, + "setup_fdb_before_test": True, + "vnet": self.vnet, + "vnet_pkts": self.vnetPkts, "bgp_v4_v6_time_diff": self.bgpV4V6TimeDiff, "asic_type": self.duthost.facts["asic_type"], "allow_mac_jumping": self.allowMacJump, @@ -551,8 +648,8 @@ def __runPtfRunner(self, rebootOper=None): # presence of routing in reboot operation indicates it is during reboot operation (inboot) inbootOper = rebootOper if rebootOper is not None and 'routing' in rebootOper else None params.update({ - "preboot_oper" : prebootOper, - "inboot_oper" : inbootOper, + "preboot_oper": prebootOper, + "inboot_oper": inbootOper, }) else: params.update({'logfile_suffix': str(rebootOper)}) @@ -578,9 +675,9 @@ def __runPtfRunner(self, rebootOper=None): return result def __restorePrevImage(self): - ''' + """ Restore previous image and reboot DUT - ''' + """ currentImage = self.duthost.shell('sonic_installer list | grep Current | cut -f2 -d " "')['stdout'] if currentImage != self.currentImage: logger.info('Restore current image') @@ -594,9 +691,9 @@ def __restorePrevImage(self): ) def tearDown(self): - ''' + """ Tears down test case. It also verifies that config_db.json exists. - ''' + """ logger.info('Running test tear down') if 'warm-reboot' in self.rebootType and self.newSonicImage is not None: logger.info('Save configuration after warm rebooting into new image') @@ -615,24 +712,25 @@ def tearDown(self): self.__restorePrevImage() @pytest.fixture -def get_advanced_reboot(request, duthosts, rand_one_dut_hostname, ptfhost, localhost, tbinfo, creds): - ''' +def get_advanced_reboot(request, duthosts, enum_rand_one_per_hwsku_frontend_hostname, ptfhost, localhost, tbinfo, + creds): + """ Pytest test fixture that provides access to AdvancedReboot test fixture @param request: pytest request object - @param duthost: AnsibleHost instance of DUT + @param duthosts: AnsibleHost instance of DUT @param ptfhost: PTFHost for interacting with PTF through ansible @param localhost: Localhost for interacting with localhost through ansible @param tbinfo: fixture provides information about testbed - ''' - duthost = duthosts[rand_one_dut_hostname] + """ + duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] instances = [] def get_advanced_reboot(**kwargs): - ''' + """ API that returns instances of AdvancedReboot class - ''' + """ assert len(instances) == 0, "Only one instance of reboot data is allowed" - advancedReboot = AdvancedReboot(request, duthost, ptfhost, localhost, tbinfo, creds, **kwargs) + advancedReboot = AdvancedReboot(request, duthosts, duthost, ptfhost, localhost, tbinfo, creds, **kwargs) instances.append(advancedReboot) return advancedReboot diff --git a/tests/common/fixtures/duthost_utils.py b/tests/common/fixtures/duthost_utils.py index 61969bb0aa0..762a2fbce72 100644 --- a/tests/common/fixtures/duthost_utils.py +++ b/tests/common/fixtures/duthost_utils.py @@ -4,6 +4,7 @@ import collections import ipaddress import time +import json from tests.common.helpers.assertions import pytest_assert from tests.common.utilities import wait_until from jinja2 import Template @@ -189,9 +190,13 @@ def check_ebgp_routes(num_v4_routes, num_v6_routes, duthost): MAX_DIFF = 5 sumv4, sumv6 = duthost.get_ip_route_summary() rtn_val = True - if 'ebgp' in sumv4 and 'routes' in sumv4['ebgp'] and abs(int(float(sumv4['ebgp']['routes'])) - int(float(num_v4_routes))) >= MAX_DIFF: + if 'ebgp' in sumv4 and 'routes' in sumv4['ebgp'] and \ + abs(int(float(sumv4['ebgp']['routes'])) - int(float(num_v4_routes))) >= MAX_DIFF: + logger.info("IPv4 ebgp routes: {}".format(float(sumv4['ebgp']['routes']))) rtn_val = False - if 'ebgp' in sumv6 and 'routes' in sumv6['ebgp'] and abs(int(float(sumv6['ebgp']['routes'])) - int(float(num_v6_routes))) >= MAX_DIFF: + if 'ebgp' in sumv6 and 'routes' in sumv6['ebgp'] and \ + abs(int(float(sumv6['ebgp']['routes'])) - int(float(num_v6_routes))) >= MAX_DIFF: + logger.info("IPv6 ebgp routes: {}".format(float(sumv6['ebgp']['routes']))) rtn_val = False return rtn_val @@ -209,7 +214,7 @@ def shutdown_ebgp(duthosts): # Shutdown all eBGP neighbors duthost.command("sudo config bgp shutdown all") # Verify that the total eBGP routes are 0. - pytest_assert(wait_until(30, 2, 0, check_ebgp_routes, 0, 0, duthost), + pytest_assert(wait_until(60, 2, 5, check_ebgp_routes, 0, 0, duthost), "eBGP routes are not 0 after shutting down all neighbors on {}".format(duthost)) pytest_assert(wait_until(60, 2, 0, check_orch_cpu_utilization, duthost, orch_cpu_threshold), "Orch CPU utilization {} > orch cpu threshold {} after shutdown all eBGP" @@ -412,3 +417,120 @@ def utils_create_test_vlans(duthost, cfg_facts, vlan_ports_list, vlan_intfs_dict )) logger.info("Commands: {}".format(cmds)) duthost.shell_cmds(cmds=cmds) + + +@pytest.fixture(scope='module') +def dut_qos_maps(rand_selected_dut): + """ + A module level fixture to get QoS map from DUT host. + Return a dict + { + "dscp_to_tc_map": { + "0":"1", + ... + }, + "tc_to_queue_map": { + "0":"0" + }, + ... + } + or an empty dict if failed to parse the output + """ + maps = {} + try: + # port_qos_map + maps['port_qos_map'] = json.loads(rand_selected_dut.shell("sonic-cfggen -d --var-json 'PORT_QOS_MAP'")['stdout']) + # dscp_to_tc_map + maps['dscp_to_tc_map'] = json.loads(rand_selected_dut.shell("sonic-cfggen -d --var-json 'DSCP_TO_TC_MAP'")['stdout']) + # tc_to_queue_map + maps['tc_to_queue_map'] = json.loads(rand_selected_dut.shell("sonic-cfggen -d --var-json 'TC_TO_QUEUE_MAP'")['stdout']) + # tc_to_priority_group_map + maps['tc_to_priority_group_map'] = json.loads(rand_selected_dut.shell("sonic-cfggen -d --var-json 'TC_TO_PRIORITY_GROUP_MAP'")['stdout']) + # tc_to_dscp_map + maps['tc_to_dscp_map'] = json.loads(rand_selected_dut.shell("sonic-cfggen -d --var-json 'TC_TO_DSCP_MAP'")['stdout']) + except: + pass + return maps + + +def separated_dscp_to_tc_map_on_uplink(duthost, dut_qos_maps): + """ + A helper function to check if separated DSCP_TO_TC_MAP is applied to + downlink/unlink ports. + """ + dscp_to_tc_map_names = set() + for port_name, qos_map in dut_qos_maps['port_qos_map'].iteritems(): + if port_name == "global": + continue + dscp_to_tc_map_names.add(qos_map.get("dscp_to_tc_map", "")) + if len(dscp_to_tc_map_names) > 1: + return True + return False + + +def load_dscp_to_pg_map(duthost, port, dut_qos_maps): + """ + Helper function to calculate DSCP to PG map for a port. + The map is derived from DSCP_TO_TC_MAP + TC_TO_PG_MAP + return a dict like {0:0, 1:1...} + """ + try: + port_qos_map = dut_qos_maps['port_qos_map'] + dscp_to_tc_map_name = port_qos_map[port]['dscp_to_tc_map'].split('|')[-1].strip(']') + tc_to_pg_map_name = port_qos_map[port]['tc_to_pg_map'].split('|')[-1].strip(']') + # Load dscp_to_tc_map + dscp_to_tc_map = dut_qos_maps['dscp_to_tc_map'][dscp_to_tc_map_name] + # Load tc_to_pg_map + tc_to_pg_map = dut_qos_maps['tc_to_priority_group_map'][tc_to_pg_map_name] + # Calculate dscp to pg map + dscp_to_pg_map = {} + for dscp, tc in dscp_to_tc_map.items(): + dscp_to_pg_map[dscp] = tc_to_pg_map[tc] + return dscp_to_pg_map + except: + logger.error("Failed to retrieve dscp to pg map for port {} on {}".format(port, duthost.hostname)) + return {} + + +def load_dscp_to_queue_map(duthost, port, dut_qos_maps): + """ + Helper function to calculate DSCP to Queue map for a port. + The map is derived from DSCP_TO_TC_MAP + TC_TO_QUEUE_MAP + return a dict like {0:0, 1:1...} + """ + try: + port_qos_map = dut_qos_maps['port_qos_map'] + dscp_to_tc_map_name = port_qos_map[port]['dscp_to_tc_map'].split('|')[-1].strip(']') + tc_to_queue_map_name = port_qos_map[port]['tc_to_queue_map'].split('|')[-1].strip(']') + # Load dscp_to_tc_map + dscp_to_tc_map = dut_qos_maps['dscp_to_tc_map'][dscp_to_tc_map_name][dscp_to_tc_map_name] + # Load tc_to_queue_map + tc_to_queue_map = dut_qos_maps['tc_to_queue_map'][tc_to_queue_map_name] + # Calculate dscp to queue map + dscp_to_queue_map = {} + for dscp, tc in dscp_to_tc_map.items(): + dscp_to_queue_map[dscp] = tc_to_queue_map[tc] + return dscp_to_queue_map + except: + logger.error("Failed to retrieve dscp to queue map for port {} on {}".format(port, duthost.hostname)) + return {} + + +def check_bgp_router_id(duthost, mgFacts): + """ + Check bgp router ID is same as Loopback0 + """ + check_bgp_router_id_cmd = r'vtysh -c "show ip bgp summary json"' + bgp_summary = duthost.shell(check_bgp_router_id_cmd, module_ignore_errors=True) + try: + bgp_summary_json = json.loads(bgp_summary['stdout']) + router_id = str(bgp_summary_json['ipv4Unicast']['routerId']) + loopback0 = str(mgFacts['minigraph_lo_interfaces'][0]['addr']) + if router_id == loopback0: + logger.info("BGP router identifier: %s == Loopback0 address %s" % (router_id, loopback0)) + return True + else: + logger.info("BGP router identifier %s != Loopback0 address %s" % (router_id, loopback0)) + return False + except Exception as e: + logger.error("Error loading BGP routerID - {}".format(e)) diff --git a/tests/common/fixtures/ptfhost_utils.py b/tests/common/fixtures/ptfhost_utils.py index 22b91ec2036..7926956f65d 100644 --- a/tests/common/fixtures/ptfhost_utils.py +++ b/tests/common/fixtures/ptfhost_utils.py @@ -8,7 +8,6 @@ from ipaddress import ip_interface from jinja2 import Template -from natsort import natsorted from tests.common import constants from tests.common.helpers.assertions import pytest_assert as pt_assert @@ -140,6 +139,8 @@ def remove_ip_addresses(ptfhost): logger.info("Remove IPs to restore ptfhost '{0}'".format(ptfhost.hostname)) ptfhost.remove_ip_addresses() + # Interfaces restart is required, otherwise the ipv6 link-addresses won't back. + ptfhost.restart_interfaces() @pytest.fixture(scope="session", autouse=True) @@ -161,45 +162,62 @@ def copy_arp_responder_py(ptfhost): ptfhost.file(path=os.path.join(OPT_DIR, ARP_RESPONDER_PY), state="absent") -@pytest.fixture(scope='class') -def ptf_portmap_file(duthosts, rand_one_dut_hostname, ptfhost): +def _ptf_portmap_file(duthost, ptfhost, tbinfo): """ Prepare and copys port map file to PTF host - Args: request (Fixture): pytest request object duthost (AnsibleHost): Device Under Test (DUT) ptfhost (AnsibleHost): Packet Test Framework (PTF) - Returns: filename (str): returns the filename copied to PTF host """ - duthost = duthosts[rand_one_dut_hostname] intfInfo = duthost.show_interface(command = "status")['ansible_facts']['int_status'] - portList = natsorted([port for port in intfInfo if port.startswith('Ethernet')]) + portList = [port for port in intfInfo if port.startswith('Ethernet') and intfInfo[port]['oper_state'] == 'up'] + mg_facts = duthost.get_extended_minigraph_facts(tbinfo) portMapFile = "/tmp/default_interface_to_front_map.ini" with open(portMapFile, 'w') as file: file.write("# ptf host interface @ switch front port name\n") - file.writelines( - map( - lambda (index, port): "{0}@{1}\n".format(index, port), - enumerate(portList) - ) - ) + ptf_port_map = [] + for port in portList: + if "Ethernet-Rec" not in port or "Ethernet-IB" not in port: + index = mg_facts['minigraph_ptf_indices'][port] + ptf_port_map.append("{}@{}\n".format(index, port)) + file.writelines(ptf_port_map) ptfhost.copy(src=portMapFile, dest="/root/") - yield "/root/{}".format(portMapFile.split('/')[-1]) + return "/root/{}".format(portMapFile.split('/')[-1]) + + +@pytest.fixture(scope='class') +def ptf_portmap_file(rand_selected_dut, ptfhost, tbinfo): + """ + A class level fixture that calls _ptf_portmap_file + """ + yield _ptf_portmap_file(rand_selected_dut, ptfhost, tbinfo) + + +@pytest.fixture(scope='module') +def ptf_portmap_file_module(rand_selected_dut, ptfhost, tbinfo): + """ + A module level fixture that calls _ptf_portmap_file + """ + yield _ptf_portmap_file(rand_selected_dut, ptfhost, tbinfo) @pytest.fixture(scope="module", autouse=True) -def run_icmp_responder(duthosts, rand_one_dut_hostname, ptfhost, tbinfo): +def run_icmp_responder(duthosts, rand_one_dut_hostname, ptfhost, tbinfo, request): """Run icmp_responder.py over ptfhost.""" # No vlan is avaliable on non-t0 testbed, so skip this fixture if 't0' not in tbinfo['topo']['type']: logger.info("Not running on a T0 testbed, not starting ICMP responder") yield return + elif 'dualtor' not in tbinfo['topo']['name'] and "test_advanced_reboot" in request.node.name: + logger.info("Skip ICMP responder for advanced-reboot test on non dualtor devices") + yield + return increase_linkmgrd_probe_interval(duthosts, tbinfo) @@ -229,20 +247,73 @@ def run_icmp_responder(duthosts, rand_one_dut_hostname, ptfhost, tbinfo): ptfhost.shell("supervisorctl stop icmp_responder") +@pytest.fixture +def pause_garp_service(ptfhost): + """ + Temporarily pause GARP service on PTF for one test method + + `run_garp_service` is module scoped and autoused, + but some tests in modules where it is imported need it disabled + This fixture should only be used when garp_service is already running on the PTF + """ + needs_resume = False + res = ptfhost.shell("supervisorctl status garp_service", module_ignore_errors=True) + if res['rc'] != 0: + logger.warning("GARP service not present on PTF") + elif 'RUNNING' in res['stdout']: + needs_resume = True + ptfhost.shell("supervisorctl stop garp_service") + else: + logger.warning("GARP service already stopped on PTF") + + yield + + if needs_resume: + ptfhost.shell("supervisorctl start garp_service") + + @pytest.fixture(scope='module', autouse=True) def run_garp_service(duthost, ptfhost, tbinfo, change_mac_addresses, request): + config_facts = duthost.config_facts(host=duthost.hostname, source="running")['ansible_facts'] if tbinfo['topo']['type'] == 't0': garp_config = {} + vlans = config_facts['VLAN'] + vlan_intfs = config_facts['VLAN_INTERFACE'] + dut_mac = '' + for vlan_details in vlans.values(): + if 'dualtor' in tbinfo['topo']['name']: + dut_mac = vlan_details['mac'].lower() + else: + dut_mac = duthost.shell('sonic-cfggen -d -v \'DEVICE_METADATA.localhost.mac\'')["stdout_lines"][0].decode("utf-8") + break + + dst_ipv6 = '' + for intf_details in vlan_intfs.values(): + for key in intf_details.keys(): + try: + intf_ip = ip_interface(key) + if intf_ip.version == 6: + dst_ipv6 = intf_ip.ip + break + except ValueError: + continue + break ptf_indices = duthost.get_extended_minigraph_facts(tbinfo)["minigraph_ptf_indices"] if 'dualtor' not in tbinfo['topo']['name']: + if "test_advanced_reboot" in request.node.name: + logger.info("Skip GARP service for advanced-reboot test on non dualtor devices") + yield + return # For mocked dualtor testbed mux_cable_table = {} - server_ipv4_base_addr, _ = request.getfixturevalue('mock_server_base_ip_addr') + server_ipv4_base_addr, server_ipv6_base_addr = request.getfixturevalue('mock_server_base_ip_addr') for i, intf in enumerate(request.getfixturevalue('tor_mux_intfs')): server_ipv4 = str(server_ipv4_base_addr + i) + server_ipv6 = str(server_ipv6_base_addr + i) mux_cable_table[intf] = {} mux_cable_table[intf]['server_ipv4'] = unicode(server_ipv4) + mux_cable_table[intf]['server_ipv6'] = unicode(server_ipv6) else: # For physical dualtor testbed mux_cable_table = duthost.get_running_config_facts()['MUX_CABLE'] @@ -252,9 +323,13 @@ def run_garp_service(duthost, ptfhost, tbinfo, change_mac_addresses, request): for vlan_intf, config in mux_cable_table.items(): ptf_port_index = ptf_indices[vlan_intf] server_ip = ip_interface(config['server_ipv4']).ip + server_ipv6 = ip_interface(config['server_ipv6']).ip garp_config[ptf_port_index] = { - 'target_ip': '{}'.format(server_ip) + 'dut_mac': '{}'.format(dut_mac), + 'dst_ipv6': '{}'.format(dst_ipv6), + 'target_ip': '{}'.format(server_ip), + 'target_ipv6': '{}'.format(server_ipv6) } ptfhost.copy(src=os.path.join(SCRIPTS_SRC_DIR, GARP_SERVICE_PY), dest=OPT_DIR) diff --git a/tests/common/helpers/backend_acl.py b/tests/common/helpers/backend_acl.py new file mode 100644 index 00000000000..06bc85e38fa --- /dev/null +++ b/tests/common/helpers/backend_acl.py @@ -0,0 +1,35 @@ +import os + +BASE_DIR = os.path.dirname(os.path.realpath(__file__)) +DUT_TMP_DIR = "/tmp" +TEMPLATE_DIR = os.path.join(BASE_DIR, '../templates') +ACL_TEMPLATE = 'backend_acl_update_config.j2' + +def apply_acl_rules(duthost, tbinfo, intf_list=None): + if "t0-backend" not in tbinfo["topo"]["name"]: + return + + dst_acl_template = os.path.join(DUT_TMP_DIR, ACL_TEMPLATE) + dst_acl_file = os.path.join(DUT_TMP_DIR, 'backend_new_acl.json') + add_var = '' + + if intf_list: + duthost.copy(src=os.path.join(TEMPLATE_DIR, ACL_TEMPLATE), dest=dst_acl_template) + intfs = ",".join(intf_list) + confvar = '{{"intf_list" : "{}"}}'.format(intfs) + add_var = "-a '{}' ".format(confvar) + else: + dst_acl_template = "/usr/share/sonic/templates/backend_acl.j2" + + duthost.shell("sonic-cfggen {}-d -t {} > {}".format(add_var, dst_acl_template, dst_acl_file)) + tmp = duthost.stat(path=dst_acl_file) + if tmp['stat']['exists']: + duthost.command("acl-loader update incremental {}".format(dst_acl_file)) + + +def bind_acl_table(duthost, tbinfo): + if "t0-backend" not in tbinfo["topo"]["name"]: + return + + vlan_intfs = duthost.get_vlan_intfs() + duthost.command("config acl add table DATAACL L3 -p {}".format(",".join(vlan_intfs))) diff --git a/tests/common/helpers/bgp.py b/tests/common/helpers/bgp.py index 053a8bacb49..97f3c845933 100644 --- a/tests/common/helpers/bgp.py +++ b/tests/common/helpers/bgp.py @@ -12,9 +12,10 @@ def _write_variable_from_j2_to_configdb(duthost, template_file, **kwargs): save_dest_path = kwargs.pop("save_dest_path", "/tmp/temp.j2") keep_dest_file = kwargs.pop("keep_dest_file", True) + namespace = kwargs.pop("namespace") config_template = jinja2.Template(open(template_file).read()) duthost.copy(content=config_template.render(**kwargs), dest=save_dest_path) - duthost.shell("sonic-cfggen -j %s --write-to-db" % save_dest_path) + duthost.asic_instance_from_namespace(namespace).write_to_config_db(save_dest_path) if not keep_dest_file: duthost.file(path=save_dest_path, state="absent") @@ -24,7 +25,7 @@ class BGPNeighbor(object): def __init__(self, duthost, ptfhost, name, neighbor_ip, neighbor_asn, dut_ip, dut_asn, port, neigh_type=None, - is_multihop=False, is_passive=False): + namespace=None, is_multihop=False, is_passive=False): self.duthost = duthost self.ptfhost = ptfhost self.ptfip = ptfhost.mgmt_ip @@ -35,6 +36,7 @@ def __init__(self, duthost, ptfhost, name, self.peer_asn = dut_asn self.port = port self.type = neigh_type + self.namespace = namespace self.is_passive = is_passive self.is_multihop = not is_passive and is_multihop @@ -46,6 +48,7 @@ def start_session(self): _write_variable_from_j2_to_configdb( self.duthost, "bgp/templates/neighbor_metadata_template.j2", + namespace=self.namespace, save_dest_path=NEIGHBOR_SAVE_DEST_TMPL % self.name, neighbor_name=self.name, neighbor_lo_addr=self.ip, @@ -57,6 +60,7 @@ def start_session(self): _write_variable_from_j2_to_configdb( self.duthost, "bgp/templates/bgp_template.j2", + namespace=self.namespace, save_dest_path=BGP_SAVE_DEST_TMPL % self.name, db_table_name="BGP_NEIGHBOR", peer_addr=self.ip, @@ -75,7 +79,7 @@ def start_session(self): peer_asn=self.peer_asn, port=self.port ) - if not wait_tcp_connection(self.ptfhost, self.ptfip, self.port): + if not wait_tcp_connection(self.ptfhost, self.ptfip, self.port, timeout_s=60): raise RuntimeError("Failed to start BGP neighbor %s" % self.name) if self.is_multihop: @@ -92,8 +96,9 @@ def stop_session(self): """Stop the BGP session.""" logging.debug("stop bgp session %s", self.name) if not self.is_passive: - self.duthost.shell("redis-cli -n 4 -c DEL 'BGP_NEIGHBOR|%s'" % self.ip) - self.duthost.shell("redis-cli -n 4 -c DEL 'DEVICE_NEIGHBOR_METADATA|%s'" % self.name) + for asichost in self.duthost.asics: + asichost.run_sonic_db_cli_cmd("CONFIG_DB del 'BGP_NEIGHBOR|{}'".format(self.ip)) + asichost.run_sonic_db_cli_cmd("CONFIG_DB del 'DEVICE_NEIGHBOR_METADATA|{}'".format(self.name)) self.ptfhost.exabgp(name=self.name, state="absent") def announce_route(self, route): diff --git a/tests/common/helpers/constants.py b/tests/common/helpers/constants.py index dde09903a32..75ed11362e2 100644 --- a/tests/common/helpers/constants.py +++ b/tests/common/helpers/constants.py @@ -4,3 +4,22 @@ NAMESPACE_PREFIX = 'asic' ASIC_PARAM_TYPE_ALL = 'num_asics' ASIC_PARAM_TYPE_FRONTEND = 'frontend_asics' + +# Describe upstream neighbor of dut in different topos +UPSTREAM_NEIGHBOR_MAP = { + "t0": "t1", + "t1": "t2", + "m0": "m1", + "t2": "t3", + "m0_vlan": "m1", + "m0_l3": "m1" +} +# Describe downstream neighbor of dut in different topos +DOWNSTREAM_NEIGHBOR_MAP = { + "t0": "server", + "t1": "t0", + "m0": "mx", + "t2": "t1", + "m0_vlan": "server", + "m0_l3": "mx" +} diff --git a/tests/common/helpers/dut_ports.py b/tests/common/helpers/dut_ports.py index d9987946e38..83f4b735e91 100644 --- a/tests/common/helpers/dut_ports.py +++ b/tests/common/helpers/dut_ports.py @@ -1,3 +1,8 @@ +import logging + +logger = logging.getLogger(__name__) + + def encode_dut_port_name(dutname, portname): return dutname + '|' + portname @@ -15,3 +20,10 @@ def decode_dut_port_name(dut_portname): portname = None return dutname, portname + +def get_duthost_with_name(duthosts, dut_name): + for duthost in duthosts: + if dut_name in ['unknown', duthost.hostname]: + return duthost + logger.error("Can't find duthost with name {}.".format(dut_name)) + return diff --git a/tests/common/helpers/dut_utils.py b/tests/common/helpers/dut_utils.py index 17b3df8ce32..2e846d0659a 100644 --- a/tests/common/helpers/dut_utils.py +++ b/tests/common/helpers/dut_utils.py @@ -1,4 +1,5 @@ import logging +from collections import defaultdict from tests.common.helpers.assertions import pytest_assert from tests.common.utilities import get_host_visible_vars from tests.common.utilities import wait_until @@ -177,7 +178,7 @@ def get_disabled_container_list(duthost): pytest_assert(succeeded, "Failed to get status ('enabled'|'disabled') of containers. Exiting...") for container_name, status in container_status.items(): - if "disabled" in status: + if "disabled".encode('UTF-8') in status: disabled_containers.append(container_name) return disabled_containers @@ -266,12 +267,12 @@ def verify_orchagent_running_or_assert(duthost): """ Verifies that orchagent is running, asserts otherwise - Args: + Args: duthost: Device Under Test (DUT) """ - + def _orchagent_running(): - cmds = 'docker exec swss supervisorctl status orchagent' + cmds = 'docker exec swss supervisorctl status orchagent' output = duthost.shell(cmds, module_ignore_errors=True) pytest_assert(not output['rc'], "Unable to check orchagent status output") return 'RUNNING' in output['stdout'] diff --git a/tests/common/helpers/parallel.py b/tests/common/helpers/parallel.py index ad207fbe437..4c3d8b71194 100644 --- a/tests/common/helpers/parallel.py +++ b/tests/common/helpers/parallel.py @@ -51,7 +51,7 @@ def exception(self): def parallel_run( - target, args, kwargs, nodes_list, timeout=None, concurrent_tasks=24 + target, args, kwargs, nodes_list, timeout=None, concurrent_tasks=24, init_result=None ): """Run target function on nodes in parallel @@ -81,7 +81,7 @@ def on_terminate(worker): worker.name, worker.returncode) ) - def force_terminate(workers): + def force_terminate(workers, init_result): # Some processes cannot be terminated. Try to kill them and raise flag. running_processes = [worker for worker in workers if worker.is_alive()] if len(running_processes) > 0: @@ -91,7 +91,13 @@ def force_terminate(workers): ) ) for p in running_processes: - results[p.name] = [{'failed': True}] + # If sanity check process is killed, it still has init results. + # set its failed to True. + if init_result: + init_result['failed'] = True + results[results.keys()[0]] = init_result + else: + results[p.name] = {'failed': True} try: os.kill(p.pid, signal.SIGKILL) except OSError as err: @@ -129,6 +135,10 @@ def force_terminate(workers): while len(nodes) and tasks_running < concurrent_tasks: node = nodes.pop(0) + # For sanity check process, initial results in case of timeout. + if init_result: + init_result["host"] = node.hostname + results[node.hostname] = init_result kwargs['node'] = node kwargs['results'] = results process_name = "{}--{}".format(target.__name__, node) @@ -154,7 +164,7 @@ def force_terminate(workers): logger.debug("all processes have timedout") tasks_running -= len(workers) tasks_done += len(workers) - force_terminate(workers) + force_terminate(workers, init_result) del workers[:] else: tasks_running -= len(gone) @@ -174,31 +184,35 @@ def force_terminate(workers): worker.name )) worker.terminate() - results[worker.name] = [{'failed': True}] + # If sanity check process is killed, it still has init results. + # set its failed to True. + if init_result: + init_result['failed'] = True + results[results.keys()[0]] = init_result + else: + results[worker.name] = {'failed': True} end_time = datetime.datetime.now() delta_time = end_time - start_time # force terminate any workers still running - force_terminate(workers) + force_terminate(workers, init_result) # if we have failed processes, we should log the exception and exit code # of each Process and fail if len(failed_processes.keys()): for process_name, process in failed_processes.items(): + p_exitcode = "" + p_exception = "" + p_traceback = "" if 'exception' in process and process['exception']: p_exception = process['exception'][0] p_traceback = process['exception'][1] p_exitcode = process['exit_code'] - logger.error("""Process {} had exit code {} and exception {} - and traceback {}""".format( - process_name, p_exitcode, p_exception, p_traceback - ) - ) pt_assert( False, - 'Processes "{}" had failures. Please check the logs'.format( - list(failed_processes.keys()) + 'Processes "{}" failed with exit code "{}"\nException:\n{}\nTraceback:\n{}'.format( + list(failed_processes.keys()), p_exitcode, p_exception, p_traceback ) ) @@ -208,7 +222,7 @@ def force_terminate(workers): ) ) - return results + return dict(results) def reset_ansible_local_tmp(target): diff --git a/tests/common/helpers/pfc_gen.py b/tests/common/helpers/pfc_gen.py index ec78a0b0224..86b7d60a8c4 100755 --- a/tests/common/helpers/pfc_gen.py +++ b/tests/common/helpers/pfc_gen.py @@ -39,17 +39,17 @@ def main(): parser.add_option('-p', "--priority", type="int", dest="priority", help="PFC class enable bitmap.", metavar="Priority", default=-1) parser.add_option("-t", "--time", type="int", dest="time", help="Pause time in quanta for global pause or enabled class",metavar="time") parser.add_option("-n", "--num", type="int", dest="num", help="Number of packets to be sent",metavar="number",default=1) - parser.add_option("-r", "--rsyslog-server", type="string", dest="rsyslog_server", default="127.0.0.1", help="Rsyslog server IPv4 address",metavar="IPAddress") + parser.add_option("-r", "--rsyslog-server", type="string", dest="rsyslog_server", default="127.0.0.1", help="Rsyslog server IPv4 address",metavar="IPAddress") parser.add_option('-g', "--global", action="store_true", dest="global_pf", help="Send global pause frames (not PFC)", default=False) (options, args) = parser.parse_args() if options.interface is None: - print "Need to specify the interface to send PFC/global pause frame packets." + print("Need to specify the interface to send PFC/global pause frame packets.") parser.print_help() sys.exit(1) if options.time > 65535 or options.time < 0: - print "Quanta is not valid. Need to be in range 0-65535." + print("Quanta is not valid. Need to be in range 0-65535.") parser.print_help() sys.exit(1) @@ -57,11 +57,11 @@ def main(): # Send global pause frames # -p option should not be set if options.priority != -1: - print "'-p' option is not valid when sending global pause frames ('--global' / '-g')" + print("'-p' option is not valid when sending global pause frames ('--global' / '-g')") parser.print_help() sys.exit(1) elif options.priority > 255 or options.priority < 0: - print "Enable class bitmap is not valid. Need to be in range 0-255." + print("Enable class bitmap is not valid. Need to be in range 0-255.") parser.print_help() sys.exit(1) @@ -72,7 +72,7 @@ def main(): for i in range(0, len(interfaces)): sockets.append(socket(AF_PACKET, SOCK_RAW)) except: - print "Unable to create socket. Check your permissions" + print("Unable to create socket. Check your permissions") sys.exit(1) # Configure logging @@ -122,13 +122,13 @@ def main(): pause time | 0x0000 | ------------------------- """ - src_addr = "\x00\x01\x02\x03\x04\x05" - dst_addr = "\x01\x80\xc2\x00\x00\x01" + src_addr = b"\x00\x01\x02\x03\x04\x05" + dst_addr = b"\x01\x80\xc2\x00\x00\x01" if options.global_pf: - opcode = "\x00\x01" + opcode = b"\x00\x01" else: - opcode = "\x01\x01" - ethertype = "\x88\x08" + opcode = b"\x01\x01" + ethertype = b"\x88\x08" packet = dst_addr + src_addr + ethertype + opcode if options.global_pf: @@ -138,14 +138,14 @@ def main(): class_enable_field = binascii.unhexlify(format(class_enable, '04x')) packet = packet + class_enable_field - for p in range(0,7): - if (class_enable & (1< 0: diff --git a/tests/common/helpers/snmp_helpers.py b/tests/common/helpers/snmp_helpers.py index d5388e0bf62..5e92494e4a0 100644 --- a/tests/common/helpers/snmp_helpers.py +++ b/tests/common/helpers/snmp_helpers.py @@ -11,16 +11,16 @@ global_snmp_facts={} -def _get_snmp_facts(localhost, host, version, community, is_dell, module_ignore_errors): - snmp_facts = localhost.snmp_facts(host=host, version=version, community=community, is_dell=is_dell, module_ignore_errors=module_ignore_errors) +def _get_snmp_facts(localhost, host, version, community, is_dell, include_swap, module_ignore_errors): + snmp_facts = localhost.snmp_facts(host=host, version=version, community=community, is_dell=is_dell, module_ignore_errors=module_ignore_errors, include_swap=include_swap) return snmp_facts -def _update_snmp_facts(localhost, host, version, community, is_dell): +def _update_snmp_facts(localhost, host, version, community, is_dell, include_swap): global global_snmp_facts try: - global_snmp_facts = _get_snmp_facts(localhost, host, version, community, is_dell, + global_snmp_facts = _get_snmp_facts(localhost, host, version, community, is_dell, include_swap, module_ignore_errors=False) except RunAnsibleModuleFail as e: logger.info("encountered error when getting snmp facts: {}".format(e)) @@ -31,12 +31,12 @@ def _update_snmp_facts(localhost, host, version, community, is_dell): def get_snmp_facts(localhost, host, version, community, is_dell=False, module_ignore_errors=False, - wait=False, timeout=DEF_WAIT_TIMEOUT, interval=DEF_CHECK_INTERVAL): + wait=False, include_swap=False, timeout=DEF_WAIT_TIMEOUT, interval=DEF_CHECK_INTERVAL): if not wait: - return _get_snmp_facts(localhost, host, version, community, is_dell, module_ignore_errors) + return _get_snmp_facts(localhost, host, version, community, is_dell, include_swap, module_ignore_errors) global global_snmp_facts pytest_assert(wait_until(timeout, interval, 0, _update_snmp_facts, localhost, host, version, - community, is_dell), "Timeout waiting for SNMP facts") + community, is_dell, include_swap), "Timeout waiting for SNMP facts") return global_snmp_facts diff --git a/tests/common/mellanox_data.py b/tests/common/mellanox_data.py index e5dc47206ac..3b22aa74a4e 100644 --- a/tests/common/mellanox_data.py +++ b/tests/common/mellanox_data.py @@ -2,7 +2,7 @@ SPC1_HWSKUS = ["ACS-MSN2700", "Mellanox-SN2700", "Mellanox-SN2700-D48C8", "ACS-MSN2740", "ACS-MSN2100", "ACS-MSN2410", "ACS-MSN2010", "ACS-MSN2201"] SPC2_HWSKUS = ["ACS-MSN3700", "ACS-MSN3700C", "ACS-MSN3800", "Mellanox-SN3800-D112C8", "ACS-MSN3420"] -SPC3_HWSKUS = ["ACS-MSN4700", "ACS-MSN4600C", "ACS-MSN4410", "ACS-MSN4600"] +SPC3_HWSKUS = ["ACS-MSN4700", "ACS-MSN4600C", "ACS-MSN4410", "ACS-MSN4600", "Mellanox-SN4600C-D112C8", "Mellanox-SN4600C-C64"] SWITCH_HWSKUS = SPC1_HWSKUS + SPC2_HWSKUS + SPC3_HWSKUS PSU_CAPABILITIES = [ @@ -555,7 +555,7 @@ "thermals": { "cpu_core": { "start": 0, - "number": 2 + "number": 4 }, "module": { "start": 1, diff --git a/tests/common/platform/device_utils.py b/tests/common/platform/device_utils.py index cc8228db723..806385605fc 100644 --- a/tests/common/platform/device_utils.py +++ b/tests/common/platform/device_utils.py @@ -30,7 +30,7 @@ def fanout_switch_port_lookup(fanout_switches, dut_name, dut_port): def get_dut_psu_line_pattern(dut): if "201811" in dut.os_version or "201911" in dut.os_version: psu_line_pattern = re.compile(r"PSU\s+(\d)+\s+(OK|NOT OK|NOT PRESENT)") - elif dut.facts['platform'] == "x86_64-dellemc_z9332f_d1508-r0": + elif dut.facts['platform'] == "x86_64-dellemc_z9332f_d1508-r0" or dut.facts['asic_type'] == "cisco-8000": psu_line_pattern = re.compile(r"PSU\s+(\d+).*?(OK|NOT OK|NOT PRESENT)\s+(N/A)") else: """ diff --git a/tests/common/platform/interface_utils.py b/tests/common/platform/interface_utils.py index 55fcead3d94..d163c4b2a39 100644 --- a/tests/common/platform/interface_utils.py +++ b/tests/common/platform/interface_utils.py @@ -60,7 +60,10 @@ def check_interface_status(dut, asic_index, interfaces, xcvr_skip_list): mg_ports = interface_list output = dut.command("show interface description") intf_status = parse_intf_status(output["stdout_lines"][2:]) - check_intf_presence_command = 'show interface transceiver presence {}' + if dut.is_multi_asic: + check_intf_presence_command = 'show interface transceiver presence -n {} {}'.format(namespace, {}) + else: + check_intf_presence_command = 'show interface transceiver presence {}' for intf in interfaces: expected_oper = "up" if intf in mg_ports else "down" expected_admin = "up" if intf in mg_ports else "down" diff --git a/tests/common/platform/transceiver_utils.py b/tests/common/platform/transceiver_utils.py index 1ff24daa57a..6f4ccd19013 100644 --- a/tests/common/platform/transceiver_utils.py +++ b/tests/common/platform/transceiver_utils.py @@ -78,7 +78,9 @@ def check_transceiver_details(dut, asic_index, interfaces, xcvr_skip_list): """ asichost = dut.asic_instance(asic_index) logging.info("Check detailed transceiver information of each connected port") - if dut.sonic_release in ["202012", "202106", "202111"]: + # NOTE: No more releases to be added here. Platform should use SFP-refactor. + # 'hardware_rev' is ONLY applicable to QSFP-DD/OSFP modules + if dut.sonic_release in ["201811", "201911", "202012", "202106", "202111"]: expected_fields = ["type", "hardware_rev", "serial", "manufacturer", "model"] else: expected_fields = ["type", "vendor_rev", "serial", "manufacturer", "model"] diff --git a/tests/common/plugins/conditional_mark/README.md b/tests/common/plugins/conditional_mark/README.md index 562ff213d23..05adf91c98d 100644 --- a/tests/common/plugins/conditional_mark/README.md +++ b/tests/common/plugins/conditional_mark/README.md @@ -134,6 +134,7 @@ Example variables can be used in condition string: "asic_type": "vs", "num_asic": 1, "is_multi_asic": False, + "asic_gen": "td2" } ``` diff --git a/tests/common/plugins/conditional_mark/__init__.py b/tests/common/plugins/conditional_mark/__init__.py index a8c8a63112b..5a0518a8f2a 100644 --- a/tests/common/plugins/conditional_mark/__init__.py +++ b/tests/common/plugins/conditional_mark/__init__.py @@ -19,6 +19,7 @@ logger = logging.getLogger(__name__) DEFAULT_CONDITIONS_FILE = 'common/plugins/conditional_mark/tests_mark_conditions*.yaml' +ASIC_NAME_PATH = '/../../../../ansible/group_vars/sonic/variables' def pytest_addoption(parser): @@ -86,6 +87,71 @@ def load_conditions(session): return conditions_list +def read_asic_name(hwsku): + ''' + Get asic generation name from file 'ansible/group_vars/sonic/variables' + + Args: + hwsku (str): Dut hwsku name + + Returns: + str or None: Return the asic generation name or None if something went wrong or nothing found in the file. + + ''' + asic_name_file = os.path.dirname(__file__) + ASIC_NAME_PATH + try: + with open(asic_name_file) as f: + asic_name = yaml.safe_load(f) + logger.info(asic_name) + + for key, value in asic_name.items(): + if ('td' not in key) and ('th' not in key): + asic_name.pop(key) + + for name, hw in asic_name.items(): + if hwsku in hw: + return name.split('_')[1] + + return "unknown" + + except IOError as e: + return None + + +def get_http_proxies(inv_name): + INV_ENV_FILE = '../../../../ansible/group_vars/{}/env.yml'.format(inv_name) + PUBLIC_ENV_FILE = '../../../../ansible/group_vars/all/env.yml' + base_path = os.path.dirname(__file__) + inv_env_path = os.path.join(base_path, INV_ENV_FILE) + public_env_path = os.path.join(base_path, PUBLIC_ENV_FILE) + proxies = {} + + if os.path.isfile(public_env_path): + try: + with open(public_env_path) as env_file: + proxy_env = yaml.safe_load(env_file) + if proxy_env is not None: + proxy = proxy_env.get("proxy_env", {}) + http_proxy = proxy.get('http_proxy', '') + proxies = {'http': http_proxy, 'https': http_proxy} + else: + proxies = {'http': '', 'https': ''} + except Exception as e: + logger.error('Load proxy env from {} failed with error: {}'.format(public_env_path, repr(e))) + + if os.path.isfile(inv_env_path): + try: + with open(inv_env_path) as env_file: + proxy_env = yaml.safe_load(env_file) + if proxy_env is not None: + proxy = proxy_env.get("proxy_env", {}) + http_proxy = proxy.get('http_proxy', '') + proxies = {'http': http_proxy, 'https': http_proxy} + except Exception as e: + logger.error('Load proxy env from {} failed with error: {}'.format(inv_env_path, repr(e))) + + return proxies + def load_dut_basic_facts(session): """Run 'ansible -m dut_basic_facts' command to get some basic DUT facts. @@ -117,14 +183,18 @@ def load_dut_basic_facts(session): inv_name = tbinfo['inv_name'] else: inv_name = 'lab' + proxies = get_http_proxies(inv_name) + session.config.cache.set('PROXIES', proxies) - ansible_cmd = 'ansible -m dut_basic_facts -i ../ansible/{} {} -o'.format(inv_name, dut_name) + inv_full_path = os.path.join(os.path.dirname(__file__), '../../../../ansible', inv_name) + ansible_cmd = 'ansible -m dut_basic_facts -i {} {} -o'.format(inv_full_path, dut_name) raw_output = subprocess.check_output(ansible_cmd.split()).decode('utf-8') logger.debug('raw dut basic facts:\n{}'.format(raw_output)) output_fields = raw_output.split('SUCCESS =>', 1) if len(output_fields) >= 2: results.update(json.loads(output_fields[1].strip())['ansible_facts']['dut_basic_facts']) + results['asic_gen'] = read_asic_name(results['hwsku']) except Exception as e: logger.error('Failed to load dut basic facts, exception: {}'.format(repr(e))) @@ -179,7 +249,7 @@ def find_longest_matches(nodeid, conditions): longest_matches.append(condition) return longest_matches -def update_issue_status(condition_str): +def update_issue_status(condition_str, session): """Replace issue URL with 'True' or 'False' based on its active state. If there is an issue URL is found, this function will try to query state of the issue and replace the URL @@ -189,20 +259,28 @@ def update_issue_status(condition_str): Args: condition_str (str): Condition string that may contain issue URLs. + session (obj): Pytest session object, for getting cached data. Returns: str: New condition string with issue URLs already replaced with 'True' or 'False'. """ - issues = re.findall('https?://[^ ]+', condition_str) + issues = re.findall('https?://[^ )]+', condition_str) if not issues: logger.debug('No issue specified in condition') return condition_str - results = check_issues(issues) + issue_status_cache = session.config.cache.get('ISSUE_STATUS', {}) + proxies = session.config.cache.get('PROXIES', {}) + + unknown_issues = [issue_url for issue_url in issues if issue_url not in issue_status_cache] + if unknown_issues: + results = check_issues(unknown_issues, proxies=proxies) + issue_status_cache.update(results) + session.config.cache.set('ISSUE_STATUS', issue_status_cache) for issue_url in issues: - if issue_url in results: - replace_str = str(results[issue_url]) + if issue_url in issue_status_cache: + replace_str = str(issue_status_cache[issue_url]) else: # Consider the issue as active anyway if unable to get issue state replace_str = 'True' @@ -211,7 +289,7 @@ def update_issue_status(condition_str): return condition_str -def evaluate_condition(condition, basic_facts): +def evaluate_condition(condition, basic_facts, session): """Evaluate a condition string based on supplied basic facts. Args: @@ -219,6 +297,7 @@ def evaluate_condition(condition, basic_facts): string may contain issue URLs that need further processing. basic_facts (dict): A one level dict with basic facts. Keys of the dict can be used as variables in the condition string evaluation. + session (obj): Pytest session object, for getting cached data. Returns: bool: True or False based on condition string evaluation result. @@ -226,7 +305,7 @@ def evaluate_condition(condition, basic_facts): if condition is None or condition.strip() == '': return True # Empty condition item will be evaluated as True. Equivalent to be ignored. - condition_str = update_issue_status(condition) + condition_str = update_issue_status(condition, session) try: return bool(eval(condition_str, basic_facts)) except Exception as e: @@ -236,28 +315,32 @@ def evaluate_condition(condition, basic_facts): return False -def evaluate_conditions(conditions, basic_facts): +def evaluate_conditions(conditions, basic_facts, session, conditions_logical_operator): """Evaluate all the condition strings. - Evaluate a single condition or multiple conditions. If multiple conditions are supplied, apply AND logical operation - to all of them. + Evaluate a single condition or multiple conditions. If multiple conditions are supplied, apply AND or OR + logical operation to all of them based on conditions_logical_operator(by default AND). Args: conditions (str or list): Condition string or list of condition strings. basic_facts (dict): A one level dict with basic facts. Keys of the dict can be used as variables in the condition string evaluation. + session (obj): Pytest session object, for getting cached data. + conditions_logical_operator (str): logical operator which should be applied to conditions(by default 'AND') Returns: bool: True or False based on condition strings evaluation result. """ if isinstance(conditions, list): - # Apply 'AND' operation to list of conditions - # Personally, I think it makes more sense to apply 'AND' logical operation to a list of conditions. - return all([evaluate_condition(c, basic_facts) for c in conditions]) + # Apply 'AND' or 'OR' operation to list of conditions based on conditions_logical_operator(by default 'AND') + if conditions_logical_operator == 'OR': + return any([evaluate_condition(c, basic_facts, session) for c in conditions]) + else: + return all([evaluate_condition(c, basic_facts, session) for c in conditions]) else: if conditions is None or conditions.strip() == '': return True - return evaluate_condition(conditions, basic_facts) + return evaluate_condition(conditions, basic_facts, session) def pytest_collection(session): @@ -316,18 +399,26 @@ def pytest_collection_modifyitems(session, config, items): for mark_name, mark_details in match.values()[0].items(): add_mark = False - mark_conditions = mark_details.get('conditions', None) - if not mark_conditions: - # Unconditionally add mark + if not mark_details: add_mark = True else: - add_mark = evaluate_conditions(mark_conditions, basic_facts) + mark_conditions = mark_details.get('conditions', None) + if not mark_conditions: + # Unconditionally add mark + add_mark = True + else: + conditions_logical_operator = mark_details.get('conditions_logical_operator', 'AND').upper() + add_mark = evaluate_conditions(mark_conditions, basic_facts, session, conditions_logical_operator) if add_mark: - reason = mark_details.get('reason', '') + reason = '' + if mark_details: + reason = mark_details.get('reason', '') if mark_name == 'xfail': - strict = mark_details.get('strict', False) + strict = False + if mark_details: + strict = mark_details.get('strict', False) mark = getattr(pytest.mark, mark_name)(reason=reason, strict=strict) # To generate xfail property in the report xml file item.user_properties.append(('xfail', strict)) diff --git a/tests/common/plugins/conditional_mark/issue.py b/tests/common/plugins/conditional_mark/issue.py index 5dd63382771..57c568b4117 100644 --- a/tests/common/plugins/conditional_mark/issue.py +++ b/tests/common/plugins/conditional_mark/issue.py @@ -37,11 +37,12 @@ class GitHubIssueChecker(IssueCheckerBase): NAME = 'GitHub' - def __init__(self, url): + def __init__(self, url, proxies): super(GitHubIssueChecker, self).__init__(url) self.user = '' self.api_token = '' self.api_url = url.replace('github.com', 'api.github.com/repos') + self.proxies = proxies self.get_cred() def get_cred(self): @@ -56,6 +57,9 @@ def get_cred(self): github_creds = creds.get(self.NAME, {}) self.user = github_creds.get('user', '') self.api_token = github_creds.get('api_token', '') + else: + self.user = os.environ.get("GIT_USER_NAME") + self.api_token = os.environ.get("GIT_API_TOKEN") except Exception as e: logger.error('Load credentials from {} failed with error: {}'.format(creds_file_path, repr(e))) @@ -68,7 +72,7 @@ def is_active(self): bool: False if the issue is closed else True. """ try: - response = requests.get(self.api_url, auth=(self.user, self.api_token)) + response = requests.get(self.api_url, auth=(self.user, self.api_token), proxies=self.proxies) response.raise_for_status() issue_data = response.json() if issue_data.get('state', '') == 'closed': @@ -85,7 +89,7 @@ def is_active(self): return True -def issue_checker_factory(url): +def issue_checker_factory(url, proxies): """Factory function for creating issue checker object based on the domain name in the issue URL. Args: @@ -98,14 +102,14 @@ def issue_checker_factory(url): if m and len(m.groups()) > 0: domain_name = m.groups()[0].lower() if 'github' in domain_name: - return GitHubIssueChecker(url) + return GitHubIssueChecker(url, proxies) else: logger.error('Unknown issue website: {}'.format(domain_name)) logger.error('Creating issue checker failed. Bad issue url {}'.format(url)) return None -def check_issues(issues): +def check_issues(issues, proxies=None): """Check state of the specified issues. Because issue state checking may involve sending HTTP request. This function uses parallel run to speed up @@ -117,7 +121,7 @@ def check_issues(issues): Returns: dict: Issue state check result. Key is issue URL, value is either True or False based on issue state. """ - checkers = [c for c in [issue_checker_factory(issue) for issue in issues] if c is not None] + checkers = [c for c in [issue_checker_factory(issue, proxies) for issue in issues] if c is not None] if not checkers: logger.error('No checker created for issues: {}'.format(issues)) return {} diff --git a/tests/common/plugins/conditional_mark/tests_mark_conditions.yaml b/tests/common/plugins/conditional_mark/tests_mark_conditions.yaml index f78314ccc2e..b8fc960593e 100644 --- a/tests/common/plugins/conditional_mark/tests_mark_conditions.yaml +++ b/tests/common/plugins/conditional_mark/tests_mark_conditions.yaml @@ -1,23 +1,87 @@ ####################################### -##### cacl ##### +##### acl ##### ####################################### -cacl/test_cacl_application.py::test_cacl_application: +acl/test_acl_outer_vlan.py: + #Outer VLAN id match support is planned for future release with SONIC on Cisco 8000 + #For the current release, will mark the related test cases as XFAIL xfail: - reason: "Image issue on Boradcom dualtor testbeds" + reason: "Cisco platform does not support ACL Outer VLAN ID tests" + conditions: + - asic_type=="cisco-8000" + +####################################### +##### arp ##### +####################################### +arp/test_arp_dualtor.py::test_proxy_arp_for_standby_neighbor: + skip: + reason: "`accept_untracked_na` currently only available in 202012" + conditions: + - "release not in ['202012']" + +####################################### +##### bfd ##### +####################################### +bfd/test_bfd.py: + skip: + reason: "Test not supported for 201911 images or older, other than mlnx4600c and cisco-8102. Skipping the test" + conditions: + - "(release in ['201811', '201911']) or (platform not in ['x86_64-mlnx_msn4600c-r0', 'x86_64-8102_64h_o-r0'])" + +bfd/test_bfd.py::test_bfd_basic: + skip: + reason: "Test not supported for cisco-8102 as it doesnt support single hop BFD. Skipping the test" + conditions: + - "platform in ['x86_64-8102_64h_o-r0']" + +bfd/test_bfd.py::test_bfd_scale: + skip: + reason: "Test not supported for cisco-8102 as it doesnt support single hop BFD. Skipping the test" + conditions: + - "platform in ['x86_64-8102_64h_o-r0']" + + +####################################### +##### bgp ##### +####################################### +bgp/test_bgp_speaker.py: + skip: + reason: "M0/backend topo does not support test_bgp_speaker" + conditions: + - "topo_name in ['m0', 't0-backend']" + +####################################### +##### cacl ##### +####################################### +cacl/test_cacl_application.py::test_cacl_application_nondualtor: + skip: + reason: "test_cacl_application is only supported on non dualtor topology" conditions: - - "asic_type in ['broadcom']" - "topo_name in ['dualtor', 'dualtor-56', 'dualtor-120']" +cacl/test_cacl_application.py::test_cacl_application_dualtor: + skip: + reason: "test_cacl_application_dualtor is only supported on dualtor topology" + conditions: + - "topo_name not in ['dualtor', 'dualtor-56', 'dualtor-120']" + +cacl/test_cacl_application.py::test_multiasic_cacl_application: + skip: + reason: "test_multiasic_cacl_application is only supported on multi-ASIC platform" + conditions: + - "is_multi_asic==False" + ####################################### -##### copp ##### +##### configlet ##### ####################################### -copp/test_copp.py::TestCOPP::test_add_new_trap: - xfail: - reason: "'Add always_enabled field to coppmgr' is not merged into 202012 yet, a 'strict' param will remind us to remove this mark condition by setting 'xpass' as 'fail'" - strict: True +configlet/test_add_rack.py: + skip: + reason: "AddRack is not yet supported on multi-ASIC platform" conditions: - - "release in ['202012']" + - "is_multi_asic==True" +####################################### +##### copp ##### +####################################### copp/test_copp.py::TestCOPP::test_trap_config_save_after_reboot: skip: reason: "'Add always_enabled field to coppmgr' is not merged into 202012 yet" @@ -27,60 +91,170 @@ copp/test_copp.py::TestCOPP::test_trap_config_save_after_reboot: ####################################### ##### decap ##### ####################################### +decap/test_decap.py::test_decap[ttl=pipe, dscp=pipe]: + skip: + reason: "Not supported on broadcom after 201911 release, mellanox all releases and cisco-8000 all releases" + conditions: + - "(asic_type in ['broadcom'] and release not in ['201811', '201911']) or (asic_type in ['mellanox']) or (asic_type in ['cisco-8000'])" + decap/test_decap.py::test_decap[ttl=pipe, dscp=uniform]: skip: - reason: "Not supported on backend" + reason: "Not supported on backend and broadcom before 202012 release" conditions: - - "topo_name in ['t1-backend', 't0-backend']" + - "(topo_name in ['t1-backend', 't0-backend']) or (asic_type in ['broadcom'] and release in ['201811', '201911'])" + +decap/test_decap.py::test_decap[ttl=uniform, dscp=pipe]: + skip: + reason: "Not supported uniform ttl mode" + +decap/test_decap.py::test_decap[ttl=uniform, dscp=uniform]: + skip: + reason: "Not supported uniform ttl mode" ####################################### -##### dhcp_relay ##### +##### drop_packets ##### ####################################### -dhcp_relay/test_dhcpv6_relay.py: - xfail: - reason: "Generic internal image feature missing" - strict: True +drop_packets: + skip: + reason: "M0 topo does not support drop_packets" conditions: - - "asic_type in ['broadcom']" - - "topo_name in ['dualtor', 'dualtor-56', 'dualtor-120']" - + - "topo_name in ['m0']" + ####################################### ##### dualtor ##### ####################################### +dualtor/test_orch_stress.py: + skip: + reason: "This testcase is designed for single tor testbed with mock dualtor config." + conditions: + - "(topo_type not in ['t0']) or ('dualtor' in topo_name)" + +dualtor/test_orchagent_active_tor_downstream.py: + skip: + reason: "This testcase is designed for single tor testbed with mock dualtor config." + conditions: + - "(topo_type not in ['t0']) or ('dualtor' in topo_name)" + +dualtor/test_orchagent_mac_move.py: + skip: + reason: "This testcase is designed for single tor testbed with mock dualtor config." + conditions: + - "(topo_type not in ['t0']) or ('dualtor' in topo_name)" + +dualtor/test_orchagent_standby_tor_downstream.py::test_standby_tor_downstream: + skip: + reason: "This testcase is designed for single tor testbed with mock dualtor config." + conditions: + - "(topo_type not in ['t0']) or ('dualtor' in topo_name)" + +dualtor/test_orchagent_standby_tor_downstream.py::test_standby_tor_downstream_t1_link_recovered: + skip: + reason: "This testcase is designed for single tor testbed with mock dualtor config." + conditions: + - "(topo_type not in ['t0']) or ('dualtor' in topo_name)" + +dualtor/test_orchagent_standby_tor_downstream.py::test_standby_tor_downstream_bgp_recovered: + skip: + reason: "This testcase is designed for single tor testbed with mock dualtor config." + conditions: + - "(topo_type not in ['t0']) or ('dualtor' in topo_name)" + +dualtor/test_orchagent_standby_tor_downstream.py::test_standby_tor_downstream_loopback_route_readded: + skip: + reason: "This testcase is designed for single tor testbed with mock dualtor config and dualtor." + conditions: + - "(topo_type not in ['t0'])" + +dualtor/test_orchagent_standby_tor_downstream.py::test_standby_tor_remove_neighbor_downstream_standby: + skip: + reason: "This testcase is designed for single tor testbed with mock dualtor config." + conditions: + - "(topo_type not in ['t0']) or ('dualtor' in topo_name)" + dualtor/test_orchagent_standby_tor_downstream.py::test_downstream_standby_mux_toggle_active: + skip: + reason: "This testcase is designed for single tor testbed with mock dualtor config." + conditions: + - "(topo_type not in ['t0']) or ('dualtor' in topo_name)" xfail: reason: "Image issue on Boradcom platforms, but not consistently failing" conditions: - "asic_type in ['broadcom']" -dualtor/test_tor_ecn.py::test_dscp_to_queue_during_decap_on: - xfail: - reason: "Image issue on Boradcom platforms, only fail on some dscp values" +dualtor/test_standby_tor_upstream_mux_toggle.py: + skip: + reason: "This testcase is designed for single tor testbed with mock dualtor config." conditions: - - "asic_type in ['broadcom']" - -dualtor/test_tor_ecn.py::test_ecn_during_decap_on: - xfail: - reason: "Test flaky" + - "(topo_type not in ['t0']) or ('dualtor' in topo_name)" ####################################### ##### ecmp ##### ####################################### ecmp/inner_hashing/test_inner_hashing.py: skip: - reason: "This test can only run on master branch since change in sonic-utility is not backport into 202012 branch yet" + conditions_logical_operator: or + reason: "PBH introduced in 202111 and skip this test on Mellanox 2700 platform" conditions: - - "branch not in ['master']" + - "branch in ['201811', '201911', '202012', '202106']" + - "platform in ['x86_64-mlnx_msn2700-r0']" + - "topo_type not in ['t0']" + - "asic_type not in ['mellanox']" + +ecmp/inner_hashing/test_inner_hashing_lag.py: + skip: + conditions_logical_operator: or + reason: "PBH introduced in 202111 and skip this test on Mellanox 2700 platform" + conditions: + - "branch in ['201811', '201911', '202012', '202106']" + - "platform in ['x86_64-mlnx_msn2700-r0']" + - "topo_type not in ['t0']" + - "asic_type not in ['mellanox']" ecmp/inner_hashing/test_wr_inner_hashing.py: skip: - reason: "This test can only run on master branch since change in sonic-utility is not backport into 202012 branch yet" + conditions_logical_operator: or + reason: "PBH introduced in 202111 and skip this test on Mellanox 2700 platform" + conditions: + - "branch in ['201811', '201911', '202012', '202106']" + - "platform in ['x86_64-mlnx_msn2700-r0']" + - "topo_type not in ['t0']" + - "asic_type not in ['mellanox']" + +ecmp/inner_hashing/test_wr_inner_hashing_lag.py: + skip: + conditions_logical_operator: or + reason: "PBH introduced in 202111 and skip this test on Mellanox 2700 platform" + conditions: + - "branch in ['201811', '201911', '202012', '202106']" + - "platform in ['x86_64-mlnx_msn2700-r0']" + - "topo_type not in ['t0']" + - "asic_type not in ['mellanox']" + +ecmp/test_fgnhg.py: + skip: + conditions_logical_operator: or + reason: "The test case only runs on Mellanox T0 platform running 202012 or above; Mellanox 2700 platform is skipped" conditions: - - "branch not in ['master']" + - "branch in ['201811', '201911']" + - "platform in ['x86_64-mlnx_msn2700-r0']" + - "topo_type not in ['t0']" + - "asic_type not in ['mellanox']" ####################################### ##### everflow ##### ####################################### +everflow: + skip: + reason: "Skipping everflow tests. Unsupported topology." + conditions: + - "topo_name in ['m0']" + +everflow/test_everflow_per_interface.py::test_everflow_per_interface[ipv6-default]: + skip: + reason: "Skip everflow per interface IPv6 test on unsupported platforms" + conditions: + - "asic_type in ['cisco-8000', 'marvell', 'mellanox']" + everflow/test_everflow_testbed.py::TestEverflowV4IngressAclIngressMirror::test_everflow_dscp_with_policer: xfail: strict: True @@ -88,6 +262,15 @@ everflow/test_everflow_testbed.py::TestEverflowV4IngressAclIngressMirror::test_e conditions: - "asic_type=='cisco-8000'" +####################################### +##### fib ##### +####################################### +fib/test_fib.py::test_ipinip_hash: + skip: + reason: 'ipinip hash test is not fully supported on mellanox platform (case#00581265)' + conditions: + - "asic_type in ['mellanox']" + ####################################### ##### iface_namingmode ##### ####################################### @@ -97,6 +280,10 @@ iface_namingmode/test_iface_namingmode.py::TestShowPriorityGroup: strict: True conditions: - "platform in ['x86_64-cel_e1031-r0']" + skip: + reason: "M0 topo does not support TestShowPriorityGroup" + conditions: + - "topo_name in ['m0']" iface_namingmode/test_iface_namingmode.py::TestShowQueue::test_show_queue_persistent_watermark: xfail: @@ -112,6 +299,46 @@ iface_namingmode/test_iface_namingmode.py::TestShowQueue::test_show_queue_waterm conditions: - "platform in ['x86_64-cel_e1031-r0']" +iface_namingmode/test_iface_namingmode.py::TestShowQueue: + skip: + reason: "M0 topo does not support TestShowQueue" + conditions: + - "topo_name in ['m0']" + +####################################### +##### ip ##### +####################################### + +ip/test_ip_packet.py::TestIPPacket::test_forward_ip_packet_with_0xffff_chksum_drop: + skip: + reason: "Broadcom, Cisco and Marvell Asic will tolorate IP packets with 0xffff checksum" + conditions: + - "asic_type in ['broadcom', 'cisco-8000', 'marvell'] and hwsku not in ['Arista-7280CR3-C40']" + +ip/test_ip_packet.py::TestIPPacket::test_forward_ip_packet_with_0xffff_chksum_tolerant: + skip: + reason: "Mellanox Asic will drop IP packets with 0xffff checksum" + conditions: + - "asic_type in ['mellanox'] or hwsku in ['Arista-7280CR3-C40']" + +####################################### +##### ipfwd ##### +####################################### +ipfwd/test_dir_bcast.py: + skip: + reason: "Unsupported topology." + conditions: + - "topo_type not in ['t0', 'm0', 'mx'] or 'dualtor' in topo_name or 't0-backend' in topo_name" + +####################################### +##### mvrf ##### +####################################### +mvrf: + skip: + reason: "M0 topo does not support mvrf" + conditions: + - "topo_name in ['m0']" + ####################################### ##### ntp ##### ####################################### @@ -122,20 +349,157 @@ ntp/test_ntp.py::test_ntp_long_jump_disabled: strict: True reason: "Known NTP bug" +####################################### +##### passw_hardening ##### +####################################### +passw_hardening/test_passw_hardening.py: + skip: + reason: "Password-hardening supported just in master version" + conditions_logical_operator: or + conditions: + - "release not in ['master']" + - https://github.com/sonic-net/sonic-mgmt/issues/6428 + +####################################### +##### pc ##### +####################################### +pc/test_lag_2.py::test_lag_db_status_with_po_update: + skip: + reason: "Only support t1-lag, t1-56-lag, t1-64-lag and t2 topology" + conditions: + - "topo_name not in ['t1-lag', 't1-56-lag', 't1-64-lag'] and 't2' not in topo_name" + +pc/test_po_cleanup.py: + skip: + reason: "Skip test due to there is no portchannel exists in current topology." + conditions: + - "len(minigraph_portchannels) == 0 and not is_multi_asic" + +pc/test_po_update.py::test_po_update: + skip: + reason: "Skip test due to there is no portchannel or no portchannel member exists in current topology." + conditions: + - "len(minigraph_portchannels) == 0 or len(minigraph_portchannels[minigraph_portchannels.keys()[0]]['members']) == 0" + +pc/test_po_update.py::test_po_update_io_no_loss: + skip: + reason: "Skip test due to there isn't enough port channel exists in current topology." + conditions: + - "len(minigraph_portchannel_interfaces) < 2" + +pc/test_po_voq.py: + skip: + reason: "Skip since there is no portchannel configured or no portchannel member exists in current topology." + conditions: + - "num_asic == 0 or len(minigraph_portchannels[minigraph_portchannels.keys()[0]]['members']) == 0 or asic_type in ['cisco-8000']" + +####################################### +##### pfc ##### +####################################### +pfc/test_unknown_mac.py: + skip: + reason: In cisco-8000 platform, a packet with unknown MAC will be flooded, not dropped. This case will not pass in cisco-8000. + conditions: + - "asic_type in ['cisco-8000']" + +####################################### +##### pfc_asym ##### +####################################### +pfc_asym/test_pfc_asym.py: + skip: + reason: 'pfc_asym test skip except for on Barefoot platforms' + conditions: + - "asic_type not in ['barefoot']" + +pfc_asym/test_pfc_asym.py::test_pfc_asym_off_rx_pause_frames: + skip: + reason: "skipped for Barefoot platform" + conditions: + - "asic_type in ['barefoot']" + ####################################### ##### pfcwd ##### ####################################### -pfcwd/test_pfcwd_timer_accuracy.py::TestPfcwdAllTimer::test_pfcwd_timer_accuracy: - xfail: - reason: "Test flaky" +pfcwd: + skip: + reason: "Pfcwd tests skipped on m0 testbed." + conditions: + - "topo_name in ['m0']" -pfcwd/test_pfcwd_warm_reboot.py::TestPfcwdWb::test_pfcwd_wb: - xfail: - reason: "Test flaky" +pfcwd/test_pfc_config.py::TestPfcConfig::test_forward_action_cfg: + skip: + reason: "Forward action not supported in cisco-8000" + conditions: + - "asic_type in ['cisco-8000']" + +pfcwd/test_pfcwd_warm_reboot.py: + skip: + reason: "Warm Reboot is not supported in T2." + conditions: + - "'t2' in topo_name" + xfail: + reason: "Warm Reboot is not supported in dualtor." + conditions: + - "'dualtor' in topo_name" + - https://github.com/sonic-net/sonic-mgmt/issues/8400 + +####################################### +##### platform_tests ##### +####################################### +platform_tests/sfp/test_sfputil.py::test_check_sfputil_low_power_mode: + skip: + reason: "Get/Set low power mode is not supported in Cisco 8000 platform" + conditions: + - "asic_type in ['cisco-8000'] or platform in ['x86_64-cel_e1031-r0']" + +platform_tests/test_auto_negotiation.py: + skip: + reason: "auto negotiation test highly depends on test enviroments, file issue to track and skip for now" + conditions: https://github.com/Azure/sonic-mgmt/issues/5447 + +platform_tests/sfp/test_sfputil.py::test_check_sfputil_reset: + skip: + reason: "platform does not support sfp reset" + conditions: + - "platform in ['x86_64-cel_e1031-r0']" + +platform_tests/api/test_sfp.py::TestSfpApi::test_reset: + skip: + reason: "platform does not support sfp reset" + conditions: + - "platform in ['x86_64-cel_e1031-r0']" + +platform_tests/api/test_sfp.py::TestSfpApi::test_tx_disable_channel: + skip: + reason: "platform does not support" + conditions: + - "platform in ['x86_64-cel_e1031-r0']" + +platform_tests/broadcom/test_ser.py::test_ser: + skip: + reason: "platform does not support test_ser" + conditions: + - "platform in ['x86_64-cel_e1031-r0']" + - https://github.com/sonic-net/sonic-mgmt/issues/6218 + +####################################### +##### configlet ##### +####################################### +configlet/test_add_rack.py: + skip: + reason: "AddRack is not yet supported on multi-ASIC platform" + conditions: + - "is_multi_asic==True" ####################################### ##### qos ##### ####################################### +qos: + skip: + reason: "M0 topo does not support qos" + conditions: + - "topo_name in ['m0']" + qos/test_pfc_pause.py::test_pfc_pause_lossless: # For this test, we use the fanout connected to the DUT to send PFC pause frames. # The fanout needs to send PFC frames fast enough so that the queue remains completely paused for the entire duration @@ -145,35 +509,71 @@ qos/test_pfc_pause.py::test_pfc_pause_lossless: skip: reason: "Fanout needs to send PFC frames fast enough to completely pause the queue" -qos/test_buffer_traditional.py: +qos/test_qos_sai.py: skip: - reason: "buffer traditional test is not yet supported on multi-ASIC platform" + reason: "qos_sai tests not supported on t1 topo" conditions: - - "is_multi_asic==True" + - "asic_type in ['barefoot'] and topo_name in ['t1']" -qos/test_qos_sai.py: +qos/test_qos_sai.py::TestQosSai: skip: - reason: "qos_sai tests not supported on t1 topo" + reason: "Unsupported testbed type." conditions: - - "asic_type in ['barefoot'] and topo_type in ['t1']" + - "topo_name not in ['t0', 't0-64', 't0-116', 't0-35', 'dualtor-56', 'dualtor-120', 'dualtor', 't0-80', 't0-backend', 't1-lag', 't1-64-lag', 't1-backend', 't2', 't2_2lc_36p-masic', 't2_2lc_min_ports-masic'] and asic_type not in ['mellanox']" -qos/test_qos_sai.py::TestQosSai::testQosSaiHeadroomPoolSize[None]: - xfail: - reason: "Image issue on Arista platforms" +qos/test_qos_sai.py::TestQosSai::testQosSaiDot1pPgMapping: + skip: + reason: "Dot1p-PG mapping is only supported on backend." conditions: - - "platform in ['x86_64-arista_7050cx3_32s']" + - "'backend' not in topo_name" -qos/test_qos_sai.py::TestQosSai::testQosSaiHeadroomPoolWatermark[None]: - xfail: - reason: "Image issue on Arista platforms" +qos/test_qos_sai.py::TestQosSai::testQosSaiDot1pQueueMapping: + skip: + reason: "Dot1p-queue mapping is only supported on backend." conditions: - - "platform in ['x86_64-arista_7050cx3_32s']" + - "'backend' not in topo_name" -qos/test_qos_sai.py::TestQosSai::testQosSaiPgSharedWatermark[None-wm_pg_shared_lossy]: - xfail: - reason: "Image issue on Arista platforms" +qos/test_qos_sai.py::TestQosSai::testQosSaiDscpQueueMapping: + skip: + reason: "Dscp-queue mapping is not supported on backend." + conditions: + - "'backend' in topo_name" + +qos/test_qos_sai.py::TestQosSai::testQosSaiDscpToPgMapping: + skip: + reason: "Dscp-PG mapping is not supported on backend." conditions: - - "platform in ['x86_64-arista_7050cx3_32s']" + - "'backend' in topo_name" + +qos/test_qos_sai.py::TestQosSai::testQosSaiDwrrWeightChange: + skip: + reason: "Skip DWRR weight change test on Mellanox platform." + conditions: + - "asic_type in ['mellanox']" + +qos/test_qos_sai.py::TestQosSai::testQosSaiHeadroomPoolSize: + skip: + reason: "Headroom pool size not supported." + conditions: + - "hwsku not in ['Arista-7060CX-32S-C32', 'Celestica-DX010-C32', 'Arista-7260CX3-D108C8', 'Force10-S6100', 'Arista-7260CX3-Q64', 'Arista-7050CX3-32S-C32', 'Arista-7050CX3-32S-D48C8'] and asic_type not in ['mellanox']" + +qos/test_qos_sai.py::TestQosSai::testQosSaiHeadroomPoolWatermark: + skip: + reason: "Headroom pool size not supported." + conditions: + - "hwsku not in ['Arista-7060CX-32S-C32', 'Celestica-DX010-C32', 'Arista-7260CX3-D108C8', 'Force10-S6100', 'Arista-7260CX3-Q64', 'Arista-7050CX3-32S-C32', 'Arista-7050CX3-32S-D48C8']" + +qos/test_qos_sai.py::TestQosSai::testQosSaiPGDrop: + skip: + reason: "PG drop size test is not supported." + conditions: + - "asic_type not in ['cisco-8000']" + +qos/test_qos_sai.py::TestQosSai::testQosSaiPgHeadroomWatermark: + skip: + reason: "Priority Group Headroom Watermark is not supported on cisco asic. PG drop counter stat is covered as a part of testQosSaiPfcXoffLimit" + conditions: + - "asic_type in ['cisco-8000']" ####################################### ##### restapi ##### @@ -184,6 +584,12 @@ restapi/test_restapi.py: conditions: - "asic_type not in ['mellanox']" +restapi/test_restapi_vxlan_ecmp.py: + skip: + reason: "Only supported on cisco 8102" + conditions: + - "asic_type not in ['cisco-8000']" + ####################################### ##### route ##### ####################################### @@ -194,6 +600,12 @@ route/test_static_route.py::test_static_route_ecmp_ipv6: reason: "Test case may fail due to a known issue" conditions: https://github.com/Azure/sonic-buildimage/issues/4930 +route/test_route_perf.py: + skip: + reason: "M0 topo does not support test_route_perf" + conditions: + - "topo_name in ['m0']" + ####################################### ##### show_techsupport ##### ####################################### @@ -203,7 +615,22 @@ show_techsupport/test_techsupport.py::test_techsupport[: strict: True conditions: - "branch in ['internal-202012']" - - "build_version <= '20201231.33'" + - "int(build_version.split('.')[1]) <= 33" + +####################################### +##### snmp ##### +####################################### +snmp/test_snmp_pfc_counters.py: + skip: + reason: "M0 topo does not support test_snmp_pfc_counters" + conditions: + - "topo_name in ['m0']" + +snmp/test_snmp_queue.py: + skip: + reason: "M0 topo does not support test_snmp_queue" + conditions: + - "topo_name in ['m0']" ####################################### ##### ssh ##### @@ -220,9 +647,9 @@ ssh/test_ssh_stress.py::test_ssh_stress: ####################################### sub_port_interfaces: skip: - reason: "sub port interfaces test is not yet supported on multi-ASIC platform" + reason: "Unsupported platform or asic" conditions: - - "is_multi_asic==True" + - "is_multi_asic==True or asic_gen not in ['td2']" sub_port_interfaces/test_sub_port_interfaces.py::TestSubPorts::test_tunneling_between_sub_ports: skip: @@ -238,7 +665,7 @@ syslog/test_syslog.py: reason: "Generic internal image issue" conditions: - "branch in ['internal-202012']" - - "build_version <= '20201231.33'" + - "int(build_version.split('.')[1]) <= 33" skip: reason: "Testcase enhancements needed for backend topo" conditions: @@ -253,4 +680,40 @@ system_health/test_system_health.py::test_service_checker_with_process_exit: strict: True conditions: - "branch in ['internal-202012']" - - "build_version <= '20201231.44'" + - "int(build_version.split('.')[1]) <= 44" + +####################################### +##### vlan ##### +####################################### +vlan/test_vlan_ping.py: + skip: + reason: "test_vlan_ping doesn't work on Broadcom platform" + conditions: + - "asic_type in ['broadcom']" + +####################################### +##### VxLAN ##### +####################################### +vxlan/test_vxlan_ecmp.py: + skip: + reason: "VxLAN ECMP test is not yet supported on multi-ASIC platform. Also this test can only run on 4600c and 8102." + conditions: + - "(is_multi_asic==True) or (platform not in ['x86_64-8102_64h_o-r0', 'x86_64-8101_32fh_o-r0'] and asic_type not in ['barefoot'])" + +vxlan/test_vnet_vxlan.py: + skip: + reason: "1. Enable tests only for: mellanox, barefoot + 2. Test skipped due to issue #8374" + conditions_logical_operator: OR + conditions: + - "asic_type not in ['mellanox', 'barefoot']" + - https://github.com/sonic-net/sonic-mgmt/issues/8374 + +####################################### +##### telemetry ##### +####################################### +telemetry/test_telemetry.py: + skip: + reason: "Skip telemetry test for 201911 and older branches" + conditions: + - "(is_multi_asic==True) and (release in ['201811', '201911'])" diff --git a/tests/common/plugins/conditional_mark/tests_mark_conditions_202012.yaml b/tests/common/plugins/conditional_mark/tests_mark_conditions_202012.yaml new file mode 100644 index 00000000000..920077beb90 --- /dev/null +++ b/tests/common/plugins/conditional_mark/tests_mark_conditions_202012.yaml @@ -0,0 +1,71 @@ +############################################################## +##### This file is dedicated for 202012 branch issues ##### +##### sonic-mgmt issue should be filed and linked here ##### +############################################################## + +dualtor/test_ipinip.py::test_decap_standby_tor: + skip: + reason: "dual tor mock test only support on 7050cx3 and 7260" + conditions: + - "release in ['202012']" + - "('dualtor' in topo_name) or ('7050cx3' not in platform.lower() and '7260' not in platform.lower())" + +dualtor/test_orchagent_active_tor_downstream.py: + skip: + reason: "dual tor mock test only support on 7050cx3 and 7260" + conditions: + - "release in ['202012']" + - "('dualtor' in topo_name) or ('7050cx3' not in platform.lower() and '7260' not in platform.lower())" + +dualtor/test_orchagent_mac_move.py: + skip: + reason: "dual tor mock test only support on 7050cx3 and 7260" + conditions: + - "release in ['202012']" + - "('dualtor' in topo_name) or ('7050cx3' not in platform.lower() and '7260' not in platform.lower())" + +dualtor/test_orch_stress.py::test_flap_neighbor_entry_standby: + skip: + reason: "dual tor mock test only support on 7050cx3 and 7260" + conditions: + - "release in ['202012']" + - "('dualtor' in topo_name) or ('7050cx3' not in platform.lower() and '7260' not in platform.lower())" + +dualtor/test_standby_tor_upstream_mux_toggle.py: + skip: + reason: "dual tor mock test only support on 7050cx3 and 7260" + conditions: + - "release in ['202012']" + - "('dualtor' in topo_name) or ('7050cx3' not in platform.lower() and '7260' not in platform.lower())" + + +dualtor_io/test_normal_op.py: + xfail: + reason: "test issue or image issue, to be RCA'ed" + conditions: + - "release in ['202012']" + - "'dualtor' in topo_name" + - "'7260' in platform" + +####################################### +##### everflow ##### +####################################### +everflow/test_everflow_per_interface.py::test_everflow_per_interface[ipv6]: + skip: + reason: "Skip everflow per interface IPv6 test on unsupported platforms" + conditions: + - "asic_type in ['cisco-8000', 'marvell', 'mellanox']" + +platform_tests/test_advanced_reboot.py: + xfail: + reason: "test issue or image issue, to be RCA'ed" + conditions: + - "release in ['202012']" + +vxlan/test_vxlan_ecmp.py: + skip: + reason: "test issue or image issue, to be RCA'ed" + conditions: + - "release in ['202012']" + - "'4600' in platform" + - https://github.com/sonic-net/sonic-mgmt/issues/6616 diff --git a/tests/common/plugins/conditional_mark/tests_mark_conditions_drop_packets.yaml b/tests/common/plugins/conditional_mark/tests_mark_conditions_drop_packets.yaml index a3ed94be365..765c1c18bd9 100644 --- a/tests/common/plugins/conditional_mark/tests_mark_conditions_drop_packets.yaml +++ b/tests/common/plugins/conditional_mark/tests_mark_conditions_drop_packets.yaml @@ -1,3 +1,14 @@ +#################################################### +##### test_configurable_drop_counters.py ##### +#################################################### +#Link local address(169.254.xxx.xxx) as a source address as IPv4 header is not invalid in all the cases +#Hence, it is not dropped by default in Cisco-8000. For dropping link local address, it should be done through security/DATA ACL +drop_packets/test_configurable_drop_counters.py::test_sip_link_local: + skip: + reason: "Cisco 8000 platform does not drop SIP link local packets" + conditions: + - asic_type=="cisco-8000" + ####################################### ##### test_drop_counters.py ##### ####################################### @@ -7,6 +18,26 @@ drop_packets/test_drop_counters.py::test_acl_egress_drop: conditions: - "asic_type in ['broadcom']" +drop_packets/test_drop_counters.py::test_absent_ip_header: + skip: + reason: "Test case not supported on Broadcom DNX platform" + conditions: + - "asic_subtype in ['broadcom-dnx']" + +drop_packets/test_drop_counters.py::test_equal_smac_dmac_drop: + skip: + reason: "Drop not enabled on chassis since internal traffic uses same smac & dmac" + conditions: + - "asic_subtype in ['broadcom-dnx']" + +drop_packets/test_drop_counters.py::test_dst_ip_absent: + skip: + reason: "Test case not supported on Broadcom DNX platform and Cisco 8000 platform" + conditions_logical_operator: or + conditions: + - "asic_type in ['cisco-8000']" + - "asic_subtype in ['broadcom-dnx']" + drop_packets/test_drop_counters.py::test_dst_ip_absent[vlan_members]: skip: reason: "Image issue on Boradcom dualtor testbeds" @@ -43,34 +74,34 @@ drop_packets/test_drop_counters.py::test_ip_is_zero_addr: drop_packets/test_drop_counters.py::test_ip_is_zero_addr[vlan_members-ipv4-dst]: skip: - reason: "Image issue on Boradcom dualtor testbeds" + reason: "Image issue on Boradcom dualtor testbeds. Cisco 8000 platform does not drop packets with 0.0.0.0 source or destination IP address" strict: True conditions: - - "asic_type in ['broadcom']" + - "asic_type in ['broadcom', 'cisco-8000']" - "topo_name in ['dualtor', 'dualtor-56', 'dualtor-120']" drop_packets/test_drop_counters.py::test_ip_is_zero_addr[vlan_members-ipv4-src]: skip: - reason: "Image issue on Boradcom dualtor testbeds" + reason: "Image issue on Boradcom dualtor testbeds. Cisco 8000 platform does not drop packets with 0.0.0.0 source or destination IP address" strict: True conditions: - - "asic_type in ['broadcom']" + - "asic_type in ['broadcom', 'cisco-8000']" - "topo_name in ['dualtor', 'dualtor-56', 'dualtor-120']" drop_packets/test_drop_counters.py::test_ip_is_zero_addr[vlan_members-ipv6-dst]: skip: - reason: "Image issue on Boradcom dualtor testbeds" + reason: "Image issue on Boradcom dualtor testbeds. Cisco 8000 platform does not drop packets with 0.0.0.0 source or destination IP address" strict: True conditions: - - "asic_type in ['broadcom']" + - "asic_type in ['broadcom', 'cisco-8000']" - "topo_name in ['dualtor', 'dualtor-56', 'dualtor-120']" drop_packets/test_drop_counters.py::test_ip_is_zero_addr[vlan_members-ipv6-src]: skip: - reason: "Image issue on Boradcom dualtor testbeds" + reason: "Image issue on Boradcom dualtor testbeds. Cisco 8000 platform does not drop packets with 0.0.0.0 source or destination IP address" strict: True conditions: - - "asic_type in ['broadcom']" + - "asic_type in ['broadcom', 'cisco-8000']" - "topo_name in ['dualtor', 'dualtor-56', 'dualtor-120']" drop_packets/test_drop_counters.py::test_loopback_filter: diff --git a/tests/common/plugins/conditional_mark/tests_mark_conditions_platform_tests.yaml b/tests/common/plugins/conditional_mark/tests_mark_conditions_platform_tests.yaml index af6ab6ca17a..79d872e39c6 100644 --- a/tests/common/plugins/conditional_mark/tests_mark_conditions_platform_tests.yaml +++ b/tests/common/plugins/conditional_mark/tests_mark_conditions_platform_tests.yaml @@ -56,6 +56,14 @@ platform_tests/api/test_chassis.py::TestChassisApi::test_status_led: ####################################### ##### api/test_chassis_fans.py ##### ####################################### +platform_tests/api/test_chassis_fans.py::TestChassisFans::test_get_fans_speed_tolerance: + #get_speed_tolerance API was disabled so platform code can perform fan tolerance checks + #using RPM rather than thermalctld checking tolerance on percentages + skip: + reason: "Unsupported platform API" + conditions: + - "asic_type in ['cisco-8000']" + platform_tests/api/test_chassis_fans.py::TestChassisFans::test_get_model: skip: reason: "Unsupported platform API" @@ -77,10 +85,13 @@ platform_tests/api/test_chassis_fans.py::TestChassisFans::test_set_fans_led: - "asic_type in ['mellanox']" platform_tests/api/test_chassis_fans.py::TestChassisFans::test_set_fans_speed: + #test_set_fans_speed requires get_speed_tolerance to be implemented + #get_speed_tolerance API was disabled so platform code can perform fan tolerance checks + #using RPM rather than thermalctld checking tolerance on percentages skip: reason: "Unsupported platform API" conditions: - - "asic_type in ['mellanox']" + - "asic_type in ['mellanox', 'cisco-8000']" ####################################### ##### api/test_component.py ##### @@ -89,7 +100,7 @@ platform_tests/api/test_component.py::TestComponentApi::test_get_available_firmw skip: reason: "Unsupported platform API" conditions: - - "asic_type in ['mellanox']" + - "asic_type in ['mellanox'] or platform in ['armhf-nokia_ixs7215_52x-r0']" platform_tests/api/test_component.py::TestComponentApi::test_get_description: skip: @@ -101,7 +112,7 @@ platform_tests/api/test_component.py::TestComponentApi::test_get_firmware_update skip: reason: "Unsupported platform API" conditions: - - "asic_type in ['mellanox']" + - "asic_type in ['mellanox'] or platform in ['armhf-nokia_ixs7215_52x-r0']" platform_tests/api/test_component.py::TestComponentApi::test_get_firmware_version: skip: @@ -161,7 +172,7 @@ platform_tests/api/test_component.py::TestComponentApi::test_update_firmware: skip: reason: "Unsupported platform API" conditions: - - "asic_type in ['mellanox']" + - "asic_type in ['mellanox'] or platform in ['armhf-nokia_ixs7215_52x-r0']" ####################################### ##### api/test_fan_drawer.py ##### @@ -170,7 +181,7 @@ platform_tests/api/test_fan_drawer.py::TestFanDrawerApi::test_get_maximum_consum skip: reason: "Unsupported platform API" conditions: - - "asic_type in ['mellanox']" + - "asic_type in ['mellanox'] or platform in ['armhf-nokia_ixs7215_52x-r0']" platform_tests/api/test_fan_drawer.py::TestFanDrawerApi::test_get_model: skip: @@ -196,12 +207,20 @@ platform_tests/api/test_fan_drawer.py::TestFanDrawerApi::test_set_fan_drawers_le skip: reason: "Unsupported platform API" conditions: - - "asic_type in ['mellanox']" + - "asic_type in ['mellanox'] or platform in ['armhf-nokia_ixs7215_52x-r0']" ####################################### ##### api/test_fan_drawer_fans.py ##### ####################################### +platform_tests/api/test_fan_drawer_fans.py::TestFanDrawerFans::test_get_fans_speed_tolerance: + #get_speed_tolerance API was disabled so platform code can perform fan tolerance checks + #using RPM rather than thermalctld checking tolerance on percentages + skip: + reason: "Unsupported platform API" + conditions: + - "asic_type in ['cisco-8000']" + platform_tests/api/test_fan_drawer_fans.py::TestFanDrawerFans::test_get_model: skip: reason: "Unsupported platform API" @@ -223,10 +242,22 @@ platform_tests/api/test_fan_drawer_fans.py::TestFanDrawerFans::test_set_fans_led - "asic_type in ['mellanox', 'cisco-8000']" platform_tests/api/test_fan_drawer_fans.py::TestFanDrawerFans::test_set_fans_speed: + #test_set_fans_speed requires get_speed_tolerance to be implemented + #get_speed_tolerance API was disabled so platform code can perform fan tolerance checks + #using RPM rather than thermalctld checking tolerance on percentages skip: reason: "Unsupported platform API" conditions: - - "asic_type in ['mellanox']" + - "asic_type in ['mellanox', 'cisco-8000']" + +####################################### +##### api/test_module.py ##### +####################################### +platform_tests/api/test_module.py: + skip: + reason: "Only support T2" + conditions: + - "topo_type not in ['t2']" ####################################### ##### api/test_psu.py ##### @@ -257,22 +288,21 @@ platform_tests/api/test_psu.py::TestPsuApi::test_get_status: platform_tests/api/test_psu.py::TestPsuApi::test_led: skip: - reason: "On Cisco 8000 and mellanox platform, PSU led are unable to be controlled by software" + reason: "On Cisco 8000, mellanox and Nokia 7215 platform, PSU led are unable to be controlled by software" conditions: - - "asic_type in ['mellanox', 'cisco-8000']" + - "asic_type in ['mellanox', 'cisco-8000'] or platform in ['armhf-nokia_ixs7215_52x-r0']" -platform_tests/api/test_psu.py::TestPsuApi::test_master_led: - xfail: - reason: "Image issue on Arista platforms" +platform_tests/api/test_psu.py::TestPsuApi::test_power: + skip: + reason: "Unsupported platform API" conditions: - - "platform in ['x86_64-arista_7050cx3_32s', 'x86_64-arista_7260cx3_64']" - - "branch in ['internal-202012']" + - "asic_type in ['mellanox', 'cisco-8000'] or platform in ['armhf-nokia_ixs7215_52x-r0']" -platform_tests/api/test_psu.py::TestPsuApi::test_power: +platform_tests/api/test_psu.py::TestPsuApi::test_temperature: skip: reason: "Unsupported platform API" conditions: - - "asic_type in ['mellanox']" + - "platform in ['armhf-nokia_ixs7215_52x-r0']" ####################################### ##### api/test_psu_fans.py ##### @@ -338,7 +368,7 @@ platform_tests/api/test_sfp.py::TestSfpApi::test_get_rx_los: skip: reason: "Unsupported platform API" conditions: - - "asic_type in ['mellanox']" + - "asic_type in ['mellanox'] or (asic_type in ['cisco-8000'] and release in ['202012'])" platform_tests/api/test_sfp.py::TestSfpApi::test_get_rx_power: skip: @@ -380,7 +410,7 @@ platform_tests/api/test_sfp.py::TestSfpApi::test_get_tx_fault: skip: reason: "Unsupported platform API" conditions: - - "asic_type in ['mellanox']" + - "asic_type in ['mellanox'] or (asic_type in ['cisco-8000'] and release in ['202012'])" platform_tests/api/test_sfp.py::TestSfpApi::test_get_tx_power: skip: @@ -392,7 +422,7 @@ platform_tests/api/test_sfp.py::TestSfpApi::test_power_override: skip: reason: "Unsupported platform API" conditions: - - "asic_type in ['mellanox']" + - "asic_type in ['mellanox'] or platform in ['armhf-nokia_ixs7215_52x-r0']" platform_tests/api/test_sfp.py::TestSfpApi::test_thermals: skip: @@ -410,7 +440,13 @@ platform_tests/api/test_sfp.py::TestSfpApi::test_tx_disable_channel: skip: reason: "Unsupported platform API" conditions: - - "asic_type in ['mellanox']" + - "asic_type in ['mellanox'] or platform in ['armhf-nokia_ixs7215_52x-r0']" + +platform_tests/api/test_sfp.py::TestSfpApi::test_get_transceiver_threshold_info: + skip: + reason: "Unsupported platform API" + conditions: + - "asic_type in ['cisco-8000'] and release in ['202012']" ####################################### ##### api/test_thermal.py ##### @@ -419,7 +455,7 @@ platform_tests/api/test_thermal.py::TestThermalApi::test_get_high_critical_thres skip: reason: "Unsupported platform API" conditions: - - "asic_type in ['mellanox']" + - "asic_type in ['mellanox'] or platform in ['armhf-nokia_ixs7215_52x-r0']" platform_tests/api/test_thermal.py::TestThermalApi::test_get_high_threshold: skip: @@ -506,6 +542,15 @@ platform_tests/api/test_watchdog.py: conditions: - "asic_type in ['barefoot'] and hwsku in ['newport', 'montara']" +####################################### +##### broadcom ##### +####################################### +platform_tests/broadcom: + skip: + reason: "Marvell devices does not support platform_tests/broadcom" + conditions: + - "asic_type in ['marvell']" + ####################################### ##### cli/test_show_platform.py ##### ####################################### @@ -537,19 +582,44 @@ platform_tests/daemon/test_ledd.py::test_pmon_ledd_kill_and_start_status: conditions: - "release in ['201911']" -platform_tests/daemon/test_syseepromd.py::test_pmon_syseepromd_running_status: - xfail: - reason: "Image issue on Arista platforms" - conditions: - - "platform in ['x86_64-arista_7050cx3_32s', 'x86_64-arista_7050_qx32s', 'x86_64-arista_7260cx3_64']" - ####################################### ##### fwutil/test_fwutil.py ##### ####################################### +platform_tests/fwutil/test_fwutil.py: + skip: + reason: "Non-Mellanox platforms doesn't support fwutil for now" + conditions: + - "asic_type not in ['mellanox']" + - https://github.com/sonic-net/sonic-mgmt/issues/7811 + platform_tests/fwutil/test_fwutil.py::test_fwutil_auto: skip: reason: "Command not yet merged into sonic-utilites" +####################################### +##### mellanox ##### +####################################### +platform_tests/mellanox: + skip: + reason: "Mellanox platform tests only supported on Mellanox devices" + conditions: + - "asic_type not in ['mellanox']" + +platform_tests/mellanox/test_reboot_cause.py: + skip: + reason: "Does not support platform_tests/mellanox/test_reboot_cause.py" + conditions: + - "platform in ['x86_64-mlnx_msn2010-r0', 'x86_64-mlnx_msn2700-r0', 'x86_64-mlnx_msn2100-r0', 'x86_64-mlnx_msn2410-r0', 'x86_64-nvidia_sn2201-r0']" + +####################################### +#### test_memory_exhaustion.py ##### +####################################### +platform_tests/test_memory_exhaustion.py: + skip: + reason: "Unsupported release or platforms" + conditions: + - "(is_multi_asic==True and release in ['201911']) or (hwsku in ['Celestica-E1031-T48S4'])" + ####################################### ##### test_platform_info.py ##### ####################################### @@ -563,41 +633,58 @@ platform_tests/test_platform_info.py::test_thermal_control_load_invalid_format_j #Thermal policies are implemented as part of BSP layer in Cisco 8000 platform, so there is no need for loading JSON file, #hence the test case needs to be skipped skip: - reason: "Cisco platforms use different mechanism to generate thermal policy, current method is not applicable" - conditions: - - "asic_type=='cisco-8000'" - skip: - reason: "Multi ASIC platfrom running 201911 release doesn't have thermalctld" + # Cisco platforms use different mechanism to generate thermal policy, current method is not applicable + # Multi ASIC platform running 201911 release doesn't have thermalctld + # s6100 has not supported get_thermal_manager yet + reason: "Skip on Cisco platform and multi-asic platform running 201911 release" conditions: - - "is_multi_asic==True and release in ['201911']" + - "asic_type=='cisco-8000' or (is_multi_asic==True and release in ['201911']) or ('dell_s6100' in platform) or ('sw_to3200k' in hwsku)" platform_tests/test_platform_info.py::test_thermal_control_load_invalid_value_json: - xfail: - reason: "Image issue on Arista platforms" - conditions: - - "platform in ['x86_64-arista_7050cx3_32s', 'x86_64-arista_7050_qx32s']" - - "branch in ['internal-202012']" #Thermal policies are implemented as part of BSP layer in Cisco 8000 platform, so there is no need for loading JSON file, #hence the test case needs to be skipped skip: - reason: "Cisco platforms use different mechanism to generate thermal policy, current method is not applicable" + # Cisco platforms use different mechanism to generate thermal policy, current method is not applicable + # Multi ASIC platform running 201911 release doesn't have thermalctld + # s6100 has not supported get_thermal_manager yet + reason: "Skip on Cisco platform and multi-asic platform running 201911 release" conditions: - - "asic_type=='cisco-8000'" - skip: - reason: "Multi ASIC platfrom running 201911 release doesn't have thermalctld" - conditions: - - "is_multi_asic==True and release in ['201911']" + - "asic_type=='cisco-8000' or (is_multi_asic==True and release in ['201911']) or ('dell_s6100' in platform) or ('sw_to3200k' in hwsku)" ####################################### ##### test_reboot.py ##### ####################################### +platform_tests/test_reboot.py::test_fast_reboot: + skip: + reason: "Skip test_fast_reboot for m0" + conditions: + - "topo_type in ['m0', 'mx', 't1', 't2']" + +platform_tests/test_reboot.py::test_power_off_reboot: + skip: + reason: "Skip power off reboot test for Nokia-7215" + conditions: + - "(hwsku in ['Celestica-E1031-T48S4']) or ('sw_to3200k' in hwsku) or (platform in ['armhf-nokia_ixs7215_52x-r0']) or (is_multi_asic==True and release in ['201911'])" + +platform_tests/test_reboot.py::test_soft_reboot: + skip: + reason: "Skip test_soft_reboot for m0" + conditions: + - "topo_type in ['m0']" + platform_tests/test_reboot.py::test_warm_reboot: - xfail: - reason: "Image issue on Boradcom dualtor testbeds" - strict: True + skip: + reason: "Skip test_warm_reboot for m0" + conditions: + - "topo_type in ['m0', 'mx', 't1', 't2']" + +platform_tests/test_reboot.py::test_watchdog_reboot: + skip: + reason: "Skip watchdog reboot test for Wistron / Nokia 7215 / cisco platform x86_64-8102_64h_o-r0" + conditions_logical_operator: or conditions: - - "asic_type in ['broadcom']" - - "topo_name in ['dualtor', 'dualtor-56', 'dualtor-120']" + - "platform in ['armhf-nokia_ixs7215_52x-r0']" + - "'t1' in topo_type and platform in ['x86_64-8102_64h_o-r0']" ####################################### ##### test_sequential_restart.py ##### diff --git a/tests/common/plugins/log_section_start/__init__.py b/tests/common/plugins/log_section_start/__init__.py index 580507b785e..7519b86033f 100644 --- a/tests/common/plugins/log_section_start/__init__.py +++ b/tests/common/plugins/log_section_start/__init__.py @@ -84,7 +84,7 @@ def _fixture_generator_decorator(fixture_generator, *args, **kargs): try: next(it) except StopIteration: - raise + return except Exception as detail: logging.exception("\n%r", detail) raise diff --git a/tests/common/plugins/loganalyzer/__init__.py b/tests/common/plugins/loganalyzer/__init__.py index 32015d16f7c..d872d9233b2 100644 --- a/tests/common/plugins/loganalyzer/__init__.py +++ b/tests/common/plugins/loganalyzer/__init__.py @@ -30,9 +30,6 @@ def analyzer_add_marker(analyzers, node=None, results=None): loganalyzer = analyzers[node.hostname] logging.info("Add start marker into DUT syslog for host {}".format(node.hostname)) marker = loganalyzer.init() - logging.info("Load config and analyze log for host {}".format(node.hostname)) - # Read existed common regular expressions located with legacy loganalyzer module - loganalyzer.load_common_config() results[node.hostname] = marker @@ -53,7 +50,9 @@ def loganalyzer(duthosts, request): analyzers = {} parallel_run(analyzer_logrotate, [], {}, duthosts, timeout=120) for duthost in duthosts: - analyzers[duthost.hostname] = LogAnalyzer(ansible_host=duthost, marker_prefix=request.node.name) + analyzer = LogAnalyzer(ansible_host=duthost, marker_prefix=request.node.name) + analyzer.load_common_config() + analyzers[duthost.hostname] = analyzer markers = parallel_run(analyzer_add_marker, [analyzers], {}, duthosts, timeout=120) yield analyzers @@ -63,4 +62,3 @@ def loganalyzer(duthosts, request): return logging.info("Starting to analyse on all DUTs") parallel_run(analyze_logs, [analyzers, markers], {}, duthosts, timeout=120) - diff --git a/tests/common/plugins/loganalyzer/loganalyzer.py b/tests/common/plugins/loganalyzer/loganalyzer.py index 7c9af58d4cd..d060a20bb1d 100644 --- a/tests/common/plugins/loganalyzer/loganalyzer.py +++ b/tests/common/plugins/loganalyzer/loganalyzer.py @@ -1,4 +1,3 @@ -import sys import logging import os import re @@ -9,7 +8,6 @@ from .system_msg_handler import AnsibleLogAnalyzer as ansible_loganalyzer from os.path import join, split -from os.path import normpath ANSIBLE_LOGANALYZER_MODULE = system_msg_handler.__file__.replace(r".pyc", ".py") COMMON_MATCH = join(split(__file__)[0], "loganalyzer_common_match.txt") @@ -34,6 +32,9 @@ def __enter__(self): """ Disable logrotate cron task and make sure the running logrotate is stopped. """ + # Disable logrotate systemd timer by best effort. The reason is that logrotate.timer service is not + # available in older version like 201911. + self.ansible_host.command("systemctl stop logrotate.timer", module_ignore_errors=True) # Disable logrotate cron task self.ansible_host.command("sed -i 's/^/#/g' /etc/cron.d/logrotate") logging.debug("Waiting for logrotate from previous cron task run to finish") @@ -57,12 +58,15 @@ def __exit__(self, exc_type, exc_val, exc_tb): """ # Enable logrotate cron task back self.ansible_host.command("sed -i 's/^#//g' /etc/cron.d/logrotate") + # Enable logrotate systemd timer by best effort. The reason is that logrotate.timer service is not + # available in older version like 201911. + self.ansible_host.command("systemctl start logrotate.timer", module_ignore_errors=True) class LogAnalyzerError(Exception): """Raised when loganalyzer found matches during analysis phase.""" def __repr__(self): - return pprint.pformat(self.message) + return pprint.pformat("Log Analyzer Error- Matches found, please check errors in log") class LogAnalyzer: @@ -125,17 +129,51 @@ def _verify_log(self, result): """ if not result: raise LogAnalyzerError("Log analyzer failed - no result.") - if result["total"]["match"] != 0 or result["total"]["expected_missing_match"] != 0: - raise LogAnalyzerError(result) + else: + result_str = self._results_repr(result) + if result["total"]["match"] != 0 or result["total"]["expected_missing_match"] != 0: + raise LogAnalyzerError(result_str) + + # Check for negative case + if self.expect_regex and result["total"]["expected_match"] == 0: + err_parse = 'Log Analyzer failed parsing expected messages\n' + raise LogAnalyzerError(err_parse + result_str) + + # if the number of expected matches is provided + if (self.expect_regex and (self.expected_matches_target > 0) + and result["total"]["expected_match"] != self.expected_matches_target): + err_target = "Log analyzer expected {} messages but found only {}\n".format(self.expected_matches_target, len(self.expect_regex)) + raise LogAnalyzerError(err_target + result_str) + + def _results_repr(self, result): + """ + @summary: The function converts error analysis dictionary to a readable string format. + @param result: Dictionary returned from analyze() function + """ + result_str = '' + total_dic = result["total"] + msg_dic = result["match_messages"] + expect_dic = result['expect_messages'] + unused_list = result['unused_expected_regexp'] - # Check for negative case - if self.expect_regex and result["total"]["expected_match"] == 0: - raise LogAnalyzerError(result) + for msg_type, counter in total_dic.items(): + result_str += msg_type + ": " + str(counter) + "\n" - # if the number of expected matches is provided - if (self.expect_regex and (self.expected_matches_target > 0) - and result["total"]["expected_match"] != self.expected_matches_target): - raise LogAnalyzerError(result) + if any(msg_dic.values()): + result_str += "\nMatch Messages:\n" + for match in msg_dic: + result_str += '\n'.join(msg_dic[match]) + + if any(expect_dic.values()): + result_str += "\nExpected Messages:\n" + for expect in expect_dic: + result_str += '\n'.join(expect_dic[expect]) + + if unused_list: + result_str += "\nExpected Messages that are missing:\n" + result_str += '\n'.join(unused_list) + + return result_str def update_marker_prefix(self, marker_prefix): """ @@ -152,6 +190,7 @@ def load_common_config(self): self.match_regex = self.ansible_loganalyzer.create_msg_regex([COMMON_MATCH])[1] self.ignore_regex = self.ansible_loganalyzer.create_msg_regex([COMMON_IGNORE])[1] self.expect_regex = self.ansible_loganalyzer.create_msg_regex([COMMON_EXPECT])[1] + logging.debug('Loaded common config.') def parse_regexp_file(self, src): """ @@ -198,6 +237,31 @@ def init(self): return self._setup_marker(log_files=log_files) + def add_start_ignore_mark(self, log_files=None): + """ + Adds the start ignore marker to the log files + """ + add_start_ignore_mark = ".".join((self.marker_prefix, time.strftime("%Y-%m-%d-%H:%M:%S", time.gmtime()))) + cmd = "python {run_dir}/loganalyzer.py --action add_start_ignore_mark --run_id {add_start_ignore_mark}".format(run_dir=self.dut_run_dir, add_start_ignore_mark=add_start_ignore_mark) + if log_files: + cmd += " --logs {}".format(','.join(log_files)) + + logging.debug("Adding start ignore marker '{}'".format(add_start_ignore_mark)) + self.ansible_host.command(cmd) + self._markers.append(add_start_ignore_mark) + + def add_end_ignore_mark(self, log_files=None): + """ + Adds the end ignore marker to the log files + """ + marker = self._markers.pop() + cmd = "python {run_dir}/loganalyzer.py --action add_end_ignore_mark --run_id {marker}".format(run_dir=self.dut_run_dir, marker=marker) + if log_files: + cmd += " --logs {}".format(','.join(log_files)) + + logging.debug("Adding end ignore marker '{}'".format(marker)) + self.ansible_host.command(cmd) + def _setup_marker(self, log_files=None): """ Adds the marker to the log files @@ -270,6 +334,9 @@ def analyze(self, marker, fail=True): expect_messages_regex = re.compile('|'.join(self.expect_regex)) if len(self.expect_regex) else None logging.debug("Analyze files {}".format(file_list)) + logging.debug(' match_regex="{}"'.format(match_messages_regex.pattern if match_messages_regex else '')) + logging.debug(' ignore_regex="{}"'.format(ignore_messages_regex.pattern if ignore_messages_regex else '')) + logging.debug(' expect_regex="{}"'.format(expect_messages_regex.pattern if expect_messages_regex else '')) analyzer_parse_result = self.ansible_loganalyzer.analyze_file_list(file_list, match_messages_regex, ignore_messages_regex, expect_messages_regex) # Print file content and remove the file for folder in file_list: @@ -277,8 +344,6 @@ def analyze(self, marker, fail=True): logging.debug("{} file content:\n\n{}".format(folder, fo.read())) os.remove(folder) - total_match_cnt = 0 - total_expect_cnt = 0 expected_lines_total = [] unused_regex_messages = [] @@ -300,6 +365,7 @@ def analyze(self, marker, fail=True): unused_regex_messages.append(regex) analyzer_summary["total"]["expected_missing_match"] = len(unused_regex_messages) analyzer_summary["unused_expected_regexp"] = unused_regex_messages + logging.debug("Analyzer summary: {}".format(pprint.pformat(analyzer_summary))) if fail: self._verify_log(analyzer_summary) diff --git a/tests/common/plugins/loganalyzer/utils.py b/tests/common/plugins/loganalyzer/utils.py new file mode 100644 index 00000000000..78778ec7725 --- /dev/null +++ b/tests/common/plugins/loganalyzer/utils.py @@ -0,0 +1,32 @@ +from functools import wraps + + +def ignore_loganalyzer(func): + @wraps(func) + def decorated(*args, **kwargs): + """ + try to fetch loganalyzer instances from kwargs: + if ignore_loganalyzer is not passed, do nothing but execute the decorated function. + if ignore_loganalyzer is passed, to avoid 'unexpected keyword argument error', + delete the ignore_loganalyzer from kwargs so that it would not be passed to the decorated function, + and set ignore_loganalyzer markers before and after the decorated function on all log analyzer instances. + """ + + loganalyzer = None + if 'ignore_loganalyzer' in kwargs and kwargs['ignore_loganalyzer'] is not None: + loganalyzer = kwargs['ignore_loganalyzer'] + kwargs.pop('ignore_loganalyzer') + + if loganalyzer: + for _, dut_loganalyzer in loganalyzer.items(): + dut_loganalyzer.add_start_ignore_mark() + + res = func(*args, **kwargs) + + if loganalyzer: + for _, dut_loganalyzer in loganalyzer.items(): + dut_loganalyzer.add_end_ignore_mark() + + return res + + return decorated diff --git a/tests/common/plugins/pdu_controller/__init__.py b/tests/common/plugins/pdu_controller/__init__.py index 6fab5728b6c..6b7f9af7df6 100644 --- a/tests/common/plugins/pdu_controller/__init__.py +++ b/tests/common/plugins/pdu_controller/__init__.py @@ -2,6 +2,7 @@ import pytest from .pdu_manager import pdu_manager_factory +from tests.common.utilities import get_host_visible_vars logger = logging.getLogger(__name__) @@ -21,8 +22,15 @@ def get_pdu_hosts(duthost): return pdu_hosts +def get_pdu_visible_vars(inventories, pdu_hostnames): + pdu_hosts_vars = {} + for pdu_hostname in pdu_hostnames: + pdu_hosts_vars[pdu_hostname] = get_host_visible_vars(inventories, pdu_hostname) + return pdu_hosts_vars + + @pytest.fixture(scope="module") -def pdu_controller(duthosts, enum_rand_one_per_hwsku_hostname, conn_graph_facts, pdu): +def pdu_controller(duthosts, enum_rand_one_per_hwsku_hostname, conn_graph_facts): """ @summary: Fixture for controlling power supply to PSUs of DUT @param duthost: Fixture duthost defined in sonic-mgmt/tests/conftest.py @@ -30,8 +38,16 @@ def pdu_controller(duthosts, enum_rand_one_per_hwsku_hostname, conn_graph_facts, controller_base.py. """ duthost = duthosts[enum_rand_one_per_hwsku_hostname] - pdu_hosts = get_pdu_hosts(duthost) - controller = pdu_manager_factory(duthost.hostname, pdu_hosts, conn_graph_facts, pdu) + pdu_hosts = get_pdu_hosts(duthost) + pdu_hostnames = [] + if pdu_hosts: + pdu_hostnames = pdu_hosts.keys() + else: + duthost_pdu_info = conn_graph_facts.get("device_pdu_info", {}).get(duthost.hostname, {}) + pdu_hostnames = [pdu["Hostname"] for pdu in duthost_pdu_info.values()] + + pdu_vars = get_pdu_visible_vars(duthost.host.options["inventory_manager"]._sources, pdu_hostnames) + controller = pdu_manager_factory(duthost.hostname, pdu_hosts, conn_graph_facts, pdu_vars) yield controller @@ -41,13 +57,21 @@ def pdu_controller(duthosts, enum_rand_one_per_hwsku_hostname, conn_graph_facts, controller.close() @pytest.fixture(scope="module") -def get_pdu_controller(conn_graph_facts, pdu): +def get_pdu_controller(conn_graph_facts): controller_map = {} def pdu_controller_helper(duthost): if duthost.hostname not in controller_map: pdu_hosts = get_pdu_hosts(duthost) - controller = pdu_manager_factory(duthost.hostname, pdu_hosts, conn_graph_facts, pdu) + pdu_hostnames = [] + if pdu_hosts: + pdu_hostnames = pdu_hosts.keys() + else: + + duthost_pdu_info = conn_graph_facts.get("device_pdu_info", {}).get(duthost.hostname, {}) + pdu_hostnames = [pdu["Hostname"] for pdu in duthost_pdu_info.values()] + pdu_vars = get_pdu_visible_vars(duthost.host.options["inventory_manager"]._sources, pdu_hostnames) + controller = pdu_manager_factory(duthost.hostname, pdu_hosts, conn_graph_facts, pdu_vars) controller_map[duthost.hostname] = controller return controller_map[duthost.hostname] diff --git a/tests/common/plugins/pdu_controller/pdu_manager.py b/tests/common/plugins/pdu_controller/pdu_manager.py index 69b901bc440..a1d8023a5b7 100644 --- a/tests/common/plugins/pdu_controller/pdu_manager.py +++ b/tests/common/plugins/pdu_controller/pdu_manager.py @@ -50,8 +50,8 @@ def __init__(self, dut_hostname): self.controllers = [] def _update_outlets(self, outlets, pdu_index, controller_index=None): - for outlet_idx, outlet in enumerate(outlets): - outlet['pdu_index'] = pdu_index + outlet_idx + for outlet in outlets: + outlet['pdu_index'] = pdu_index if controller_index is None: controller_index = pdu_index outlet['pdu_name'] = self.controllers[controller_index]['psu_peer']['peerdevice'] @@ -105,7 +105,7 @@ def add_controller(self, psu_name, psu_peer, pdu_vars): if not (shared_pdu and outlet is None): if controller is None: - controller = get_pdu_controller(pdu_ip, pdu_vars) + controller = get_pdu_controller(pdu_ip, pdu_vars, psu_peer['HwSku'], psu_peer['Type']) if not controller: logger.warning('Failed creating pdu controller: {}'.format(psu_peer)) return @@ -210,8 +210,8 @@ def _build_pdu_manager_from_graph(pduman, dut_hostname, conn_graph_facts, pdu_va logger.info('PDU informatin for {} is not found in graph'.format(dut_hostname)) return False - for psu_name, psu_peer in pdu_info[dut_hostname].items(): - pduman.add_controller(psu_name, psu_peer, pdu_vars) + for psu_name, psu_peer in list(pdu_info[dut_hostname].items()): + pduman.add_controller(psu_name, psu_peer, pdu_vars[psu_peer['Hostname']]) return len(pduman.controllers) > 0 @@ -243,7 +243,7 @@ def _build_pdu_manager_from_inventory(pduman, dut_hostname, pdu_hosts, pdu_vars) 'Type': 'Pdu', 'peerport': 'probing', } - pduman.add_controller(ph, psu_peer, pdu_vars) + pduman.add_controller(ph, psu_peer, pdu_vars[psu_peer['Hostname']]) return len(pduman.controllers) > 0 diff --git a/tests/common/plugins/pdu_controller/snmp_pdu_controllers.py b/tests/common/plugins/pdu_controller/snmp_pdu_controllers.py index 458c48e17f0..46778a0fdc5 100644 --- a/tests/common/plugins/pdu_controller/snmp_pdu_controllers.py +++ b/tests/common/plugins/pdu_controller/snmp_pdu_controllers.py @@ -12,50 +12,15 @@ logger = logging.getLogger(__name__) + class snmpPduController(PduControllerBase): """ PDU Controller class for SNMP conrolled PDUs - 'Sentry Switched CDU' and 'APC Web/SNMP Management Card' - This class implements the interface defined in PduControllerBase class for SNMP conrtolled PDU type + This class implements the interface defined in PduControllerBase class for SNMP conrtolled PDU type 'Sentry Switched CDU' and 'APC Web/SNMP Management Card' """ - def get_pdu_controller_type(self): - """ - @summary: Use SNMP to get the type of PDU controller host - @param pdu_controller_host: IP address of PDU controller host - @return: Returns type string of the specified PDU controller host - """ - pSYSDESCR = ".1.3.6.1.2.1.1.1.0" - SYSDESCR = "1.3.6.1.2.1.1.1.0" - pdu = None - cmdGen = cmdgen.CommandGenerator() - snmp_auth = cmdgen.CommunityData(self.snmp_rocommunity) - errorIndication, errorStatus, errorIndex, varBinds = cmdGen.getCmd( - snmp_auth, - cmdgen.UdpTransportTarget((self.controller, 161), timeout=5.0), - cmdgen.MibVariable(pSYSDESCR) - ) - if errorIndication: - logging.info("Failed to get pdu controller type, exception: " + str(errorIndication)) - for oid, val in varBinds: - current_oid = oid.prettyPrint() - current_val = val.prettyPrint() - if current_oid == SYSDESCR: - pdu = current_val - if pdu is None: - self.pduType = None - return - if 'Sentry Switched PDU' in pdu: - self.pduType = "SENTRY4" - if 'Sentry Switched CDU' in pdu: - self.pduType = "SENTRY" - if 'APC Web/SNMP Management Card' in pdu: - self.pduType = "APC" - if 'Emerson' in pdu: - self.pduType = 'Emerson' - return - def pduCntrlOid(self): """ Define Oids based on the PDU Type @@ -77,6 +42,15 @@ def pduCntrlOid(self): SENTRY4_PORT_STATUS_BASE_OID = "1.3.6.1.4.1.1718.4.1.8.3.1.1" SENTRY4_PORT_CONTROL_BASE_OID = "1.3.6.1.4.1.1718.4.1.8.5.1.2" SENTRY4_PORT_POWER_BASE_OID = "1.3.6.1.4.1.1718.4.1.8.3.1.9" + # MIB OID for 'Vertiv Geist Upgradeable PDU' + VERTIV_PORT_NAME_BASE_OID = "1.3.6.1.4.1.21239.5.2.3.5.1.3" + VERTIV_PORT_STATUS_BASE_OID = "1.3.6.1.4.1.21239.5.2.3.5.1.4" + VERTIV_PORT_CONTROL_BASE_OID = "1.3.6.1.4.1.21239.5.2.3.5.1.6" + VERTIV_PORT_POWER_BASE_OID = "1.3.6.1.4.1.21239.5.2.3.6.1.12" + # MIB OID for APC controller rPDU + APC_RPDU_PORT_NAME_BASE_OID = "1.3.6.1.4.1.318.1.1.12.3.3.1.1.2" + APC_RPDU_PORT_STATUS_BASE_OID = "1.3.6.1.4.1.318.1.1.12.3.5.1.1.4" + APC_RPDU_PORT_CONTROL_BASE_OID = "1.3.6.1.4.1.318.1.1.12.3.3.1.1.4" self.STATUS_ON = "1" self.STATUS_OFF = "0" self.CONTROL_ON = "1" @@ -84,35 +58,49 @@ def pduCntrlOid(self): self.has_lanes = True self.max_lanes = 5 self.PORT_POWER_BASE_OID = None - if self.pduType == "APC": - self.PORT_NAME_BASE_OID = APC_PORT_NAME_BASE_OID - self.PORT_STATUS_BASE_OID = APC_PORT_STATUS_BASE_OID - self.PORT_CONTROL_BASE_OID = APC_PORT_CONTROL_BASE_OID - elif self.pduType == "SENTRY": - self.PORT_NAME_BASE_OID = SENTRY_PORT_NAME_BASE_OID - self.PORT_STATUS_BASE_OID = SENTRY_PORT_STATUS_BASE_OID - self.PORT_CONTROL_BASE_OID = SENTRY_PORT_CONTROL_BASE_OID + if self.pduType == "Apc": + self.PORT_NAME_BASE_OID = APC_PORT_NAME_BASE_OID + self.PORT_STATUS_BASE_OID = APC_PORT_STATUS_BASE_OID + self.PORT_CONTROL_BASE_OID = APC_PORT_CONTROL_BASE_OID + elif self.pduType == "Sentry": + self.PORT_NAME_BASE_OID = SENTRY_PORT_NAME_BASE_OID + self.PORT_STATUS_BASE_OID = SENTRY_PORT_STATUS_BASE_OID + self.PORT_CONTROL_BASE_OID = SENTRY_PORT_CONTROL_BASE_OID elif self.pduType == "Emerson": - self.PORT_NAME_BASE_OID = EMERSON_PORT_NAME_BASE_OID - self.PORT_STATUS_BASE_OID = EMERSON_PORT_STATUS_BASE_OID - self.PORT_CONTROL_BASE_OID = EMERSON_PORT_CONTROL_BASE_OID - self.CONTROL_OFF = "0" - elif self.pduType == "SENTRY4": - self.PORT_NAME_BASE_OID = SENTRY4_PORT_NAME_BASE_OID - self.PORT_STATUS_BASE_OID = SENTRY4_PORT_STATUS_BASE_OID - self.PORT_CONTROL_BASE_OID = SENTRY4_PORT_CONTROL_BASE_OID - self.PORT_POWER_BASE_OID = SENTRY4_PORT_POWER_BASE_OID + self.PORT_NAME_BASE_OID = EMERSON_PORT_NAME_BASE_OID + self.PORT_STATUS_BASE_OID = EMERSON_PORT_STATUS_BASE_OID + self.PORT_CONTROL_BASE_OID = EMERSON_PORT_CONTROL_BASE_OID + self.CONTROL_OFF = "0" + elif self.pduType == "Sentry4": + self.PORT_NAME_BASE_OID = SENTRY4_PORT_NAME_BASE_OID + self.PORT_STATUS_BASE_OID = SENTRY4_PORT_STATUS_BASE_OID + self.PORT_CONTROL_BASE_OID = SENTRY4_PORT_CONTROL_BASE_OID + self.PORT_POWER_BASE_OID = SENTRY4_PORT_POWER_BASE_OID + self.has_lanes = False + self.max_lanes = 1 + elif self.pduType == "Vertiv": + self.PORT_NAME_BASE_OID = VERTIV_PORT_NAME_BASE_OID + self.PORT_STATUS_BASE_OID = VERTIV_PORT_STATUS_BASE_OID + self.PORT_CONTROL_BASE_OID = VERTIV_PORT_CONTROL_BASE_OID + self.PORT_POWER_BASE_OID = VERTIV_PORT_POWER_BASE_OID + self.STATUS_OFF = "2" + self.CONTROL_ON = "2" + self.CONTROL_OFF = "4" + self.has_lanes = False + self.max_lanes = 1 + elif self.pduType == "ApcRPDU": + self.PORT_NAME_BASE_OID = APC_RPDU_PORT_NAME_BASE_OID + self.PORT_STATUS_BASE_OID = APC_RPDU_PORT_STATUS_BASE_OID + self.PORT_CONTROL_BASE_OID = APC_RPDU_PORT_CONTROL_BASE_OID self.has_lanes = False self.max_lanes = 1 else: pass - def _build_outlet_maps(self, port_oid, label): self.port_oid_dict[port_oid] = { 'label' : label } self.port_label_dict[label] = { 'port_oid' : port_oid } - def _probe_lane(self, lane_id, cmdGen, snmp_auth): pdu_port_base = self.PORT_NAME_BASE_OID query_oid = '.' + pdu_port_base @@ -134,7 +122,6 @@ def _probe_lane(self, lane_id, cmdGen, snmp_auth): label = val.prettyPrint().lower() self._build_outlet_maps(port_oid, label) - def _get_pdu_ports(self): """ @summary: Helper method for getting PDU ports connected to PSUs of DUT @@ -143,7 +130,7 @@ def _get_pdu_ports(self): This method depends on this configuration to find out the PDU ports connected to PSUs of specific DUT. """ if not self.pduType: - logging.info('PDU type is unknown: pdu_ip {}'.format(self.controller)) + logging.error('PDU type is unknown: pdu_ip {}'.format(self.controller)) return cmdGen = cmdgen.CommandGenerator() @@ -152,22 +139,19 @@ def _get_pdu_ports(self): for lane_id in range(1, self.max_lanes + 1): self._probe_lane(lane_id, cmdGen, snmp_auth) - - def __init__(self, controller, pdu): - logging.info("Initializing " + self.__class__.__name__) + def __init__(self, controller, pdu, hwsku, psu_peer_type): + logger.info("Initializing " + self.__class__.__name__) PduControllerBase.__init__(self) self.controller = controller self.snmp_rocommunity = pdu['snmp_rocommunity'] self.snmp_rwcommunity = pdu['snmp_rwcommunity'] - self.pduType = None + self.pduType = 'Sentry4' if hwsku == 'Sentry' and psu_peer_type == 'Pdu' else hwsku self.port_oid_dict = {} self.port_label_dict = {} - self.get_pdu_controller_type() self.pduCntrlOid() self._get_pdu_ports() logging.info("Initialized " + self.__class__.__name__) - def turn_on_outlet(self, outlet): """ @summary: Use SNMP to turn on power to PDU of DUT specified by outlet @@ -222,11 +206,10 @@ def turn_off_outlet(self, outlet): (port_oid, rfc1902.Integer(self.CONTROL_OFF)) ) if errorIndication or errorStatus != 0: - logging.debug("Failed to turn on outlet %s, exception: %s" % (str(outlet), str(errorStatus))) + logging.debug("Failed to turn off outlet %s, exception: %s" % (str(outlet), str(errorStatus))) return False return True - def _get_one_outlet_power(self, cmdGen, snmp_auth, port_id, status): if not self.PORT_POWER_BASE_OID: return @@ -248,7 +231,6 @@ def _get_one_outlet_power(self, cmdGen, snmp_auth, port_id, status): status['output_watts'] = current_val return - def _get_one_outlet_status(self, cmdGen, snmp_auth, port_id): query_id = '.' + self.PORT_STATUS_BASE_OID + port_id errorIndication, errorStatus, errorIndex, varBinds = cmdGen.getCmd( @@ -270,7 +252,6 @@ def _get_one_outlet_status(self, cmdGen, snmp_auth, port_id): return None - def get_outlet_status(self, outlet=None, hostname=None): """ @summary: Use SNMP to get status of PDU ports supplying power to PSUs of DUT @@ -320,9 +301,9 @@ def close(self): pass -def get_pdu_controller(controller_ip, pdu): +def get_pdu_controller(controller_ip, pdu, hwsku, psu_peer_type): """ @summary: Factory function to create the actual PDU controller object. @return: The actual PDU controller object. Returns None if something went wrong. """ - return snmpPduController(controller_ip, pdu) + return snmpPduController(controller_ip, pdu, hwsku, psu_peer_type) diff --git a/tests/common/plugins/sanity_check/__init__.py b/tests/common/plugins/sanity_check/__init__.py index 48a099e6d6a..e509dcdfa95 100644 --- a/tests/common/plugins/sanity_check/__init__.py +++ b/tests/common/plugins/sanity_check/__init__.py @@ -9,9 +9,8 @@ from tests.common.plugins.sanity_check import constants from tests.common.plugins.sanity_check import checks -from tests.common.plugins.sanity_check.checks import * +from tests.common.plugins.sanity_check.checks import * # noqa: F401, F403 from tests.common.plugins.sanity_check.recover import recover -from tests.common.plugins.sanity_check.recover import neighbor_vm_restore from tests.common.plugins.sanity_check.constants import STAGE_PRE_TEST, STAGE_POST_TEST from tests.common.helpers.assertions import pytest_assert as pt_assert @@ -20,6 +19,24 @@ SUPPORTED_CHECKS = checks.CHECK_ITEMS +def pytest_sessionfinish(session, exitstatus): + + pre_sanity_failed = session.config.cache.get("pre_sanity_check_failed", None) + post_sanity_failed = session.config.cache.get("post_sanity_check_failed", None) + + if pre_sanity_failed: + session.config.cache.set("pre_sanity_check_failed", None) + if post_sanity_failed: + session.config.cache.set("post_sanity_check_failed", None) + + if pre_sanity_failed and not post_sanity_failed: + session.exitstatus = constants.PRE_SANITY_CHECK_FAILED_RC + elif not pre_sanity_failed and post_sanity_failed: + session.exitstatus = constants.POST_SANITY_CHECK_FAILED_RC + elif pre_sanity_failed and post_sanity_failed: + session.exitstatus = constants.SANITY_CHECK_FAILED_RC + + def fallback_serializer(_): """ Fallback serializer for non JSON serializable objects @@ -48,7 +65,7 @@ def _update_check_items(old_items, new_items, supported_items): else: # Add a check item if new_item[0] == "+": new_item = new_item[1:] - if new_item in supported_items : + if new_item in supported_items: if new_item not in updated_items: logger.info("Add checking '{}'".format(new_item)) updated_items.append(new_item) @@ -73,7 +90,7 @@ def print_logs(duthosts, print_dual_tor_logs=False): res.pop('stdout') res.pop('stderr') outputs.append(res) - logger.info("dut={}, cmd_outputs={}".format(dut.hostname,json.dumps(outputs, indent=4))) + logger.info("dut={}, cmd_outputs={}".format(dut.hostname, json.dumps(outputs, indent=4))) def filter_check_items(tbinfo, check_items): @@ -103,6 +120,11 @@ def do_checks(request, check_items, *args, **kwargs): @pytest.fixture(scope="module", autouse=True) def sanity_check(localhost, duthosts, request, fanouthosts, nbrhosts, tbinfo): + if request.config.option.skip_sanity: + logger.info("Skip sanity check according to command line argument") + yield + return + logger.info("Prepare sanity check") skip_sanity = False @@ -120,7 +142,7 @@ def sanity_check(localhost, duthosts, request, fanouthosts, nbrhosts, tbinfo): if customized_sanity_check: logger.info("Process marker {} in script. m.args={}, m.kwargs={}" - .format(customized_sanity_check.name, customized_sanity_check.args, customized_sanity_check.kwargs)) + .format(customized_sanity_check.name, customized_sanity_check.args, customized_sanity_check.kwargs)) skip_sanity = customized_sanity_check.kwargs.get("skip_sanity", False) allow_recover = customized_sanity_check.kwargs.get("allow_recover", False) recover_method = customized_sanity_check.kwargs.get("recover_method", "adaptive") @@ -136,16 +158,15 @@ def sanity_check(localhost, duthosts, request, fanouthosts, nbrhosts, tbinfo): post_check = customized_sanity_check.kwargs.get("post_check", False) - if request.config.option.skip_sanity: - skip_sanity = True if skip_sanity: - logger.info("Skip sanity check according to command line argument or configuration of test script.") + logger.info("Skip sanity check according to configuration of test script.") yield return if request.config.option.allow_recover: allow_recover = True + # Command line specified recover method has higher priority if request.config.option.recover_method: recover_method = request.config.getoption("--recover_method") @@ -157,7 +178,7 @@ def sanity_check(localhost, duthosts, request, fanouthosts, nbrhosts, tbinfo): if cli_check_items: logger.info('Fine tune pre-test check items based on CLI option --check_items') - cli_items_list=str(cli_check_items).split(',') + cli_items_list = str(cli_check_items).split(',') pre_check_items = _update_check_items(pre_check_items, cli_items_list, SUPPORTED_CHECKS) pre_check_items = filter_check_items(tbinfo, pre_check_items) # Filter out un-supported checks. @@ -180,8 +201,9 @@ def sanity_check(localhost, duthosts, request, fanouthosts, nbrhosts, tbinfo): else: post_check_items = set() - logger.info("Sanity check settings: skip_sanity=%s, pre_check_items=%s, allow_recover=%s, recover_method=%s, post_check=%s, post_check_items=%s" % \ - (skip_sanity, pre_check_items, allow_recover, recover_method, post_check, post_check_items)) + logger.info("Sanity check settings: skip_sanity=%s, pre_check_items=%s, allow_recover=%s, recover_method=%s, " + "post_check=%s, post_check_items=%s" % + (skip_sanity, pre_check_items, allow_recover, recover_method, post_check, post_check_items)) pre_post_check_items = pre_check_items + [item for item in post_check_items if item not in pre_check_items] for item in pre_post_check_items: @@ -201,39 +223,56 @@ def sanity_check(localhost, duthosts, request, fanouthosts, nbrhosts, tbinfo): print_logs(duthosts, print_dual_tor_logs=dual_tor) check_results = do_checks(request, pre_check_items, stage=STAGE_PRE_TEST) - logger.debug("Pre-test sanity check results:\n%s" % json.dumps(check_results, indent=4, default=fallback_serializer)) + logger.debug("Pre-test sanity check results:\n%s" % + json.dumps(check_results, indent=4, default=fallback_serializer)) failed_results = [result for result in check_results if result['failed']] if failed_results: if not allow_recover: - pt_assert(False, "!!!!!!!!!!!!!!!!Pre-test sanity check failed: !!!!!!!!!!!!!!!!\n{}"\ - .format(json.dumps(failed_results, indent=4, default=fallback_serializer))) + request.config.cache.set("pre_sanity_check_failed", True) + pt_assert(False, "!!!!!!!!!!!!!!!!Pre-test sanity check failed: !!!!!!!!!!!!!!!!\n{}" + .format(json.dumps(failed_results, indent=4, default=fallback_serializer))) else: - dut_failed_results = defaultdict(list) - infra_recovery_actions= [] - for failed_result in failed_results: - if 'host' in failed_result: - dut_failed_results[failed_result['host']].append(failed_result) - if failed_result['check_item'] in constants.INFRA_CHECK_ITEMS: - if 'action' in failed_result and failed_result['action'] is not None \ - and callable(failed_result['action']): - infra_recovery_actions.append(failed_result['action']) - for dut_name, dut_results in dut_failed_results.items(): - # Attempt to restore DUT state - recover(duthosts[dut_name], localhost, fanouthosts, dut_results, recover_method) - # Attempt to restore neighbor VM state - neighbor_vm_restore(duthosts[dut_name], nbrhosts, tbinfo) - for action in infra_recovery_actions: - action() + try: + dut_failed_results = defaultdict(list) + infra_recovery_actions = [] + for failed_result in failed_results: + if 'host' in failed_result: + dut_failed_results[failed_result['host']].append(failed_result) + if 'hosts' in failed_result: + for hostname in failed_result['hosts']: + dut_failed_results[hostname].append(failed_result) + if failed_result['check_item'] in constants.INFRA_CHECK_ITEMS: + if 'action' in failed_result and failed_result['action'] is not None \ + and callable(failed_result['action']): + infra_recovery_actions.append(failed_result['action']) + for dut_name, dut_results in dut_failed_results.items(): + # Attempt to restore DUT state + recover(duthosts[dut_name], localhost, fanouthosts, nbrhosts, tbinfo, dut_results, + recover_method) + for action in infra_recovery_actions: + action() + + except Exception as e: + request.config.cache.set("pre_sanity_check_failed", True) + logger.error("Recovery of sanity check failed with exception: ") + pt_assert( + False, + "!!!!!!!!!!!!!!!! Recovery of sanity check failed !!!!!!!!!!!!!!!!" + "Exception: {}".format(repr(e)) + ) logger.info("Run sanity check again after recovery") new_check_results = do_checks(request, pre_check_items, stage=STAGE_PRE_TEST, after_recovery=True) - logger.debug("Pre-test sanity check after recovery results:\n%s" % json.dumps(new_check_results, indent=4, default=fallback_serializer)) + logger.debug("Pre-test sanity check after recovery results:\n%s" % + json.dumps(new_check_results, indent=4, default=fallback_serializer)) new_failed_results = [result for result in new_check_results if result['failed']] if new_failed_results: - pt_assert(False, "!!!!!!!!!!!!!!!! Pre-test sanity check after recovery failed: !!!!!!!!!!!!!!!!\n{}"\ - .format(json.dumps(new_failed_results, indent=4, default=fallback_serializer))) + request.config.cache.set("pre_sanity_check_failed", True) + pt_assert(False, + "!!!!!!!!!!!!!!!! Pre-test sanity check after recovery failed: !!!!!!!!!!!!!!!!\n{}" + .format(json.dumps(new_failed_results, indent=4, default=fallback_serializer))) logger.info("Done pre-test sanity check") else: @@ -248,12 +287,14 @@ def sanity_check(localhost, duthosts, request, fanouthosts, nbrhosts, tbinfo): if post_check_items: logger.info("Start post-test sanity check") post_check_results = do_checks(request, post_check_items, stage=STAGE_POST_TEST) - logger.debug("Post-test sanity check results:\n%s" % json.dumps(post_check_results, indent=4, default=fallback_serializer)) + logger.debug("Post-test sanity check results:\n%s" % + json.dumps(post_check_results, indent=4, default=fallback_serializer)) post_failed_results = [result for result in post_check_results if result['failed']] if post_failed_results: - pt_assert(False, "!!!!!!!!!!!!!!!! Post-test sanity check failed: !!!!!!!!!!!!!!!!\n{}"\ - .format(json.dumps(post_failed_results, indent=4, default=fallback_serializer))) + request.config.cache.set("post_sanity_check_failed", True) + pt_assert(False, "!!!!!!!!!!!!!!!! Post-test sanity check failed: !!!!!!!!!!!!!!!!\n{}" + .format(json.dumps(post_failed_results, indent=4, default=fallback_serializer))) logger.info("Done post-test sanity check") else: diff --git a/tests/common/plugins/sanity_check/checks.py b/tests/common/plugins/sanity_check/checks.py index 37f83626a99..a45637d5f10 100644 --- a/tests/common/plugins/sanity_check/checks.py +++ b/tests/common/plugins/sanity_check/checks.py @@ -10,6 +10,7 @@ from tests.common.cache import FactsCache from tests.common.plugins.sanity_check.constants import STAGE_PRE_TEST, STAGE_POST_TEST from tests.common.helpers.parallel import parallel_run, reset_ansible_local_tmp +from tests.common.fixtures.duthost_utils import check_bgp_router_id logger = logging.getLogger(__name__) SYSTEM_STABILIZE_MAX_TIME = 300 @@ -23,8 +24,8 @@ 'check_bgp', 'check_dbmemory', 'check_monit', - 'check_mux_simulator', - 'check_secureboot'] + 'check_secureboot', + 'check_mux_simulator'] __all__ = CHECK_ITEMS @@ -73,8 +74,9 @@ def _find_down_ports(dut, phy_interfaces, ip_interfaces): @pytest.fixture(scope="module") def check_interfaces(duthosts): + init_result = {"failed": False, "check_item": "interfaces"} def _check(*args, **kwargs): - result = parallel_run(_check_interfaces_on_dut, args, kwargs, duthosts.frontend_nodes, timeout=600) + result = parallel_run(_check_interfaces_on_dut, args, kwargs, duthosts.frontend_nodes, timeout=600, init_result=init_result) return result.values() @reset_ansible_local_tmp @@ -133,9 +135,10 @@ def _check_interfaces_on_dut(*args, **kwargs): @pytest.fixture(scope="module") -def check_bgp(duthosts): +def check_bgp(duthosts, tbinfo): + init_result = {"failed": False, "check_item": "bgp"} def _check(*args, **kwargs): - result = parallel_run(_check_bgp_on_dut, args, kwargs, duthosts.frontend_nodes, timeout=600) + result = parallel_run(_check_bgp_on_dut, args, kwargs, duthosts.frontend_nodes, timeout=600, init_result=init_result) return result.values() @reset_ansible_local_tmp @@ -194,7 +197,14 @@ def _check_bgp_status_helper(): check_result = {"failed": False, "check_item": "bgp", "host": dut.hostname} networking_uptime = dut.get_networking_uptime().seconds - timeout = max(SYSTEM_STABILIZE_MAX_TIME - networking_uptime, 1) + if SYSTEM_STABILIZE_MAX_TIME - networking_uptime + 480 > 500: + # If max_timeout is higher than 600, it will exceed parallel_run's timeout + # the check will be killed by parallel_run, we can't get expected results. + # 500 seconds is about 8 mins, bgp has enough to get up + max_timeout = 500 + else: + max_timeout = SYSTEM_STABILIZE_MAX_TIME - networking_uptime + 480 + timeout = max(max_timeout, 1) interval = 20 wait_until(timeout, interval, 0, _check_bgp_status_helper) if (check_result['failed']): @@ -207,6 +217,12 @@ def _check_bgp_status_helper(): else: logger.info('No BGP neighbors are down on %s' % dut.hostname) + mgFacts = dut.get_extended_minigraph_facts(tbinfo) + if dut.num_asics() == 1 and tbinfo['topo']['type'] != 't2' and \ + not wait_until(timeout, interval, 0, check_bgp_router_id, dut, mgFacts): + check_result['failed'] = True + logger.info("Failed to verify BGP router identifier is Loopback0 address on %s" % dut.hostname) + logger.info("Done checking bgp status on %s" % dut.hostname) results[dut.hostname] = check_result @@ -234,7 +250,8 @@ def _is_db_omem_over_threshold(command_output): @pytest.fixture(scope="module") def check_dbmemory(duthosts): def _check(*args, **kwargs): - result = parallel_run(_check_dbmemory_on_dut, args, kwargs, duthosts, timeout=600) + init_result = {"failed": False, "check_item": "dbmemory"} + result = parallel_run(_check_dbmemory_on_dut, args, kwargs, duthosts, timeout=600, init_result=init_result) return result.values() @reset_ansible_local_tmp @@ -442,69 +459,80 @@ def _check_single_intf_status(intf_status, expected_side): def _check_dut_mux_status(duthosts, duts_minigraph_facts): + def _verify_show_mux_status(): + duts_mux_status = duthosts.show_and_parse("show mux status") + + duts_parsed_mux_status.clear() + for dut_hostname, dut_mux_status in duts_mux_status.items(): + logger.info('Verify that "show mux status" has output ON {}'.format(dut_hostname)) + if len(dut_mux_status) == 0: + err_msg_from_mux_status.append('No mux status in output of "show mux status"') + return False + + logger.info('Verify that mux ports match vlan interfaces of DUT.') + vlan_intf_names = set() + for vlan in duts_minigraph_facts[dut_hostname]['minigraph_vlans'].values(): + vlan_intf_names = vlan_intf_names.union(set(vlan['members'])) + dut_mux_intfs = [] + for row in dut_mux_status: + dut_mux_intfs.append(row['port']) + if vlan_intf_names != set(dut_mux_intfs): + err_msg_from_mux_status.append('Mux ports mismatch vlan interfaces, please check output of "show mux status"') + return False + + logger.info('Verify mux status and parse active/standby side') + dut_parsed_mux_status = {} + for row in dut_mux_status: + # Verify that mux status is either active or standby + if row['status'] not in ['active', 'standby']: + err_msg_from_mux_status.append('Unexpected mux status "{}", please check output of "show mux status"'.format(row['status'])) + return False + + # Parse mux status, transform port name to port index, which is also mux index + port_name = row['port'] + port_idx = duts_minigraph_facts[dut_hostname]['minigraph_port_indices'][port_name] + + # Transform "active" and "standby" to active side which is "upper_tor" or "lower_tor" + status = row['status'] + if dut_hostname == dut_upper_tor.hostname: + # On upper tor, mux status "active" means that active side of mux is upper_tor + # mux status "standby" means that active side of mux is lower_tor + active_side = UPPER_TOR if status == 'active' else LOWER_TOR + else: + # On lower tor, mux status "active" means that active side of mux is lower_tor + # mux status "standby" means that active side of mux is upper_tor + active_side = UPPER_TOR if status == 'standby' else LOWER_TOR + dut_parsed_mux_status[str(port_idx)] = active_side + duts_parsed_mux_status[dut_hostname] = dut_parsed_mux_status + + logger.info('Verify that the mux status on both ToRs are consistent') + upper_tor_mux_status = duts_parsed_mux_status[dut_upper_tor.hostname] + lower_tor_mux_status = duts_parsed_mux_status[dut_lower_tor.hostname] + + logger.info('Verify that mux status is consistent on both ToRs.') + for port_idx in upper_tor_mux_status: + if upper_tor_mux_status[port_idx] != lower_tor_mux_status[port_idx]: + err_msg_from_mux_status.append('Inconsistent mux status on dualtors, please check output of "show mux status"') + return False + + logger.info('Check passed, return parsed mux status') + err_msg_from_mux_status.append("") + return True + dut_upper_tor = duthosts[0] dut_lower_tor = duthosts[1] - # Run "show mux status" on dualtor DUTs to collect mux status - duts_mux_status = duthosts.show_and_parse('show mux status') - - # Parse and basic check duts_parsed_mux_status = {} - for dut_hostname, dut_mux_status in duts_mux_status.items(): - - logger.info('Verify that "show mux status" has output ON {}'.format(dut_hostname)) - if len(dut_mux_status) == 0: - err_msg = 'No mux status in output of "show mux status"' - return False, err_msg, {} - - logger.info('Verify that mux ports match vlan interfaces of DUT.') - vlan_intf_names = set() - for vlan in duts_minigraph_facts[dut_hostname]['minigraph_vlans'].values(): - vlan_intf_names = vlan_intf_names.union(set(vlan['members'])) - dut_mux_intfs = [] - for row in dut_mux_status: - dut_mux_intfs.append(row['port']) - if vlan_intf_names != set(dut_mux_intfs): - err_msg = 'Mux ports mismatch vlan interfaces, please check output of "show mux status"' - return False, err_msg, {} - - logger.info('Verify mux status and parse active/standby side') - dut_parsed_mux_status = {} - for row in dut_mux_status: - # Verify that mux status is either active or standby - if row['status'] not in ['active', 'standby']: - err_msg = 'Unexpected mux status "{}", please check output of "show mux status"'.format(row['status']) - return False, err_msg, {} - - # Parse mux status, transform port name to port index, which is also mux index - port_name = row['port'] - port_idx = duts_minigraph_facts[dut_hostname]['minigraph_port_indices'][port_name] - - # Transform "active" and "standby" to active side which is "upper_tor" or "lower_tor" - status = row['status'] - if dut_hostname == dut_upper_tor.hostname: - # On upper tor, mux status "active" means that active side of mux is upper_tor - # mux status "standby" means that active side of mux is lower_tor - active_side = UPPER_TOR if status == 'active' else LOWER_TOR - else: - # On lower tor, mux status "active" means that active side of mux is lower_tor - # mux status "standby" means that active side of mux is upper_tor - active_side = UPPER_TOR if status == 'standby' else LOWER_TOR - dut_parsed_mux_status[str(port_idx)] = active_side - duts_parsed_mux_status[dut_hostname] = dut_parsed_mux_status + err_msg_from_mux_status = [] - logger.info('Verify that the mux status on both ToRs are consistent') - upper_tor_mux_status = duts_parsed_mux_status[dut_upper_tor.hostname] - lower_tor_mux_status = duts_parsed_mux_status[dut_lower_tor.hostname] - - logger.info('Verify that mux status is consistent on both ToRs.') - for port_idx in upper_tor_mux_status: - if upper_tor_mux_status[port_idx] != lower_tor_mux_status[port_idx]: - err_msg = 'Inconsistent mux status on dualtors, please check output of "show mux status"' - return False, err_msg, {} + if not wait_until(30, 5, 0, _verify_show_mux_status): + if err_msg_from_mux_status: + err_msg = err_msg_from_mux_status[-1] + else: + err_msg = "Unknown error occured inside the check" + return False, err_msg, {} - logger.info('Check passed, return parsed mux status') - return True, "", upper_tor_mux_status + return True, "", duts_parsed_mux_status @pytest.fixture(scope='module') @@ -537,6 +565,7 @@ def _check(*args, **kwargs): logger.warning(err_msg) results['failed'] = True results['failed_reason'] = err_msg + results['hosts'] = [ dut.hostname for dut in duthosts ] results['action'] = reset_simulator_port return results @@ -571,7 +600,8 @@ def check_monit(duthosts): @return: A dictionary contains the testing result (failed or not failed) and the status of each service. """ def _check(*args, **kwargs): - result = parallel_run(_check_monit_on_dut, args, kwargs, duthosts, timeout=600) + init_result = {"failed": False, "check_item": "monit"} + result = parallel_run(_check_monit_on_dut, args, kwargs, duthosts, timeout=600, init_result=init_result) return result.values() @reset_ansible_local_tmp @@ -595,7 +625,8 @@ def _check_monit_on_dut(*args, **kwargs): check_result["failed"] = True check_result["failed_reason"] = "Monit was not running" logger.info("Checking status of each Monit service was done!") - return check_result + results[dut.hostname] = check_result + return check_result = _check_monit_services_status(check_result, monit_services_status) else: @@ -634,13 +665,14 @@ def _check_monit_on_dut(*args, **kwargs): @pytest.fixture(scope="module") def check_processes(duthosts): def _check(*args, **kwargs): + init_result = {"failed": False, "check_item": "processes"} timeout = 600 # Increase the timeout for multi-asic virtual switch DUT. for node in duthosts.nodes: if 'kvm' in node.sonichost.facts['platform'] and node.sonichost.is_multi_asic: timeout = 1000 break - result = parallel_run(_check_processes_on_dut, args, kwargs, duthosts, timeout=timeout) + result = parallel_run(_check_processes_on_dut, args, kwargs, duthosts, timeout=timeout, init_result=init_result) return result.values() @reset_ansible_local_tmp diff --git a/tests/common/plugins/sanity_check/constants.py b/tests/common/plugins/sanity_check/constants.py index f29c88f60b4..6d2b49181af 100644 --- a/tests/common/plugins/sanity_check/constants.py +++ b/tests/common/plugins/sanity_check/constants.py @@ -13,7 +13,7 @@ "mux_config": "show mux config", } -# Check items for testbed infrastructure that are not +# Check items for testbed infrastructure that are not # controlled by the DUT INFRA_CHECK_ITEMS = [ "mux_simulator" @@ -21,14 +21,52 @@ # Recover related definitions RECOVER_METHODS = { - "config_reload": {"cmd": "bash -c 'config reload -y &>/dev/null'", "reboot": False, "adaptive": False, 'recover_wait': 120}, - "config_reload_f": {"cmd": "bash -c 'config reload -f -y &>/dev/null'", "reboot": False, "adaptive": False, 'recover_wait': 120}, - "load_minigraph": {"cmd": "bash -c 'config load_minigraph -y &>/dev/null'", "reboot": False, "adaptive": False, 'recover_wait': 60}, - "reboot": {"cmd": "reboot", "reboot": True, "adaptive": False, 'recover_wait': 120}, - "warm_reboot": {"cmd": "warm-reboot", "reboot": True, "adaptive": False, 'recover_wait': 120}, - "fast_reboot": {"cmd": "fast_reboot", "reboot": True, "adaptive": False, 'recover_wait': 120}, - "adaptive": {"cmd": None, "reboot": False, "adaptive": True, 'recover_wait': 30}, + "config_reload": { + "cmd": "false", + "reload": True, + "reboot": False, + "adaptive": False, + 'recover_wait': 120 + }, + "load_minigraph": { + "cmd": "bash -c 'config load_minigraph -y &>/dev/null'", + "reload": False, + "reboot": False, + "adaptive": False, + 'recover_wait': 60 + }, + "reboot": { + "cmd": "reboot", + "reload": False, + "reboot": True, + "adaptive": False, + 'recover_wait': 120 + }, + "warm_reboot": { + "cmd": "warm-reboot", + "reload": False, + "reboot": True, + "adaptive": False, + 'recover_wait': 120 + }, + "fast_reboot": { + "cmd": "fast_reboot", + "reload": False, + "reboot": True, + "adaptive": False, + 'recover_wait': 120 + }, + "adaptive": { + "cmd": None, + "reload": False, + "reboot": False, + "adaptive": True, + 'recover_wait': 30 + }, } # All supported recover methods STAGE_PRE_TEST = 'stage_pre_test' STAGE_POST_TEST = 'stage_post_test' +PRE_SANITY_CHECK_FAILED_RC = 10 +POST_SANITY_CHECK_FAILED_RC = 11 +SANITY_CHECK_FAILED_RC = 12 diff --git a/tests/common/plugins/sanity_check/recover.py b/tests/common/plugins/sanity_check/recover.py index 24ed1314bbe..47395f55405 100644 --- a/tests/common/plugins/sanity_check/recover.py +++ b/tests/common/plugins/sanity_check/recover.py @@ -1,14 +1,13 @@ import json import logging -from . import constants - -from tests.common.utilities import wait +from tests.common import config_reload +from tests.common.helpers.parallel import parallel_run, reset_ansible_local_tmp from tests.common.platform.device_utils import fanout_switch_port_lookup -from tests.common.config_reload import config_force_option_supported -from tests.common.reboot import reboot from tests.common.reboot import REBOOT_TYPE_WARM, REBOOT_TYPE_FAST, REBOOT_TYPE_COLD -from tests.common.helpers.parallel import parallel_run, reset_ansible_local_tmp +from tests.common.reboot import reboot +from tests.common.utilities import wait +from . import constants logger = logging.getLogger(__name__) @@ -23,10 +22,10 @@ def reboot_dut(dut, localhost, cmd): else: reboot_type = REBOOT_TYPE_COLD - reboot(dut, localhost, reboot_type=reboot_type) + reboot(dut, localhost, reboot_type=reboot_type, safe_reboot=True) -def __recover_interfaces(dut, fanouthosts, result, wait_time): +def _recover_interfaces(dut, fanouthosts, result, wait_time): action = None for port in result['down_ports']: logging.warning("Restoring port: {}".format(port)) @@ -38,72 +37,26 @@ def __recover_interfaces(dut, fanouthosts, result, wait_time): fanout, fanout_port = fanout_switch_port_lookup(fanouthosts, dut.hostname, port) if fanout and fanout_port: + fanout.shutdown(fanout_port) fanout.no_shutdown(fanout_port) - asic = dut.get_port_asic_instance(port) - dut.asic_instance(asic.asic_index).startup_interface(port) + if dut.facts["num_asic"] > 1: + asic = dut.get_port_asic_instance(port) + dut.asic_instance(asic.asic_index).startup_interface(port) + else: + dut.no_shutdown(port) wait(wait_time, msg="Wait {} seconds for interface(s) to restore.".format(wait_time)) return action -def __recover_services(dut, result): +def _recover_services(dut, result): status = result['services_status'] services = [ x for x in status if not status[x] ] logging.warning("Service(s) down: {}".format(services)) return 'reboot' if 'database' in services else 'config_reload' -def __recover_with_command(dut, cmd, wait_time): - dut.command(cmd) - wait(wait_time, msg="Wait {} seconds for system to be stable.".format(wait_time)) - - -def adaptive_recover(dut, localhost, fanouthosts, check_results, wait_time): - outstanding_action = None - for result in check_results: - if result['failed']: - if result['check_item'] == 'interfaces': - action = __recover_interfaces(dut, fanouthosts, result, wait_time) - elif result['check_item'] == 'services': - action = __recover_services(dut, result) - elif result['check_item'] in [ 'processes', 'bgp' ]: - action = 'config_reload' - else: - action = 'reboot' - - # Any action can override no action or 'config_reload'. - # 'reboot' is last resort and cannot be overridden. - if action and (not outstanding_action or outstanding_action == 'config_reload'): - outstanding_action = action - - logging.warning("Restoring {} with proposed action: {}, final action: {}".format(result, action, outstanding_action)) - - if outstanding_action: - if outstanding_action == "config_reload" and config_force_option_supported(dut): - outstanding_action = "config_reload_f" - method = constants.RECOVER_METHODS[outstanding_action] - wait_time = method['recover_wait'] - if method["reboot"]: - reboot_dut(dut, localhost, method["cmd"]) - else: - __recover_with_command(dut, method['cmd'], wait_time) - - -def recover(dut, localhost, fanouthosts, check_results, recover_method): - logger.warning("Try to recover %s using method %s" % (dut.hostname, recover_method)) - if recover_method == "config_reload" and config_force_option_supported(dut): - recover_method = "config_reload_f" - method = constants.RECOVER_METHODS[recover_method] - wait_time = method['recover_wait'] - if method["adaptive"]: - adaptive_recover(dut, localhost, fanouthosts, check_results, wait_time) - elif method["reboot"]: - reboot_dut(dut, localhost, method["cmd"]) - else: - __recover_with_command(dut, method['cmd'], wait_time) - - @reset_ansible_local_tmp -def neighbor_vm_recover_bgpd(node=None, results=None): +def _neighbor_vm_recover_bgpd(node=None, results=None): """Function for restoring BGP on neighbor VMs using the parallel_run tool. Args: @@ -149,5 +102,58 @@ def neighbor_vm_restore(duthost, nbrhosts, tbinfo): mg_facts = duthost.get_extended_minigraph_facts(tbinfo) vm_neighbors = mg_facts['minigraph_neighbors'] if vm_neighbors: - results = parallel_run(neighbor_vm_recover_bgpd, (), {}, nbrhosts.values(), timeout=300) + results = parallel_run(_neighbor_vm_recover_bgpd, (), {}, nbrhosts.values(), timeout=300) logger.debug('Results of restoring neighbor VMs: {}'.format(json.dumps(dict(results)))) + return 'config_reload' # May still need to do a config reload + + +def _recover_with_command(dut, cmd, wait_time): + dut.command(cmd) + wait(wait_time, msg="Wait {} seconds for system to be stable.".format(wait_time)) + + +def adaptive_recover(dut, localhost, fanouthosts, nbrhosts, tbinfo, check_results, wait_time): + outstanding_action = None + for result in check_results: + if result['failed']: + if result['check_item'] == 'interfaces': + action = _recover_interfaces(dut, fanouthosts, result, wait_time) + elif result['check_item'] == 'services': + action = _recover_services(dut, result) + elif result['check_item'] == 'bgp': + action = neighbor_vm_restore(dut, nbrhosts, tbinfo) + elif result['check_item'] in [ 'processes', 'mux_simulator' ]: + action = 'config_reload' + else: + action = 'reboot' + + # Any action can override no action or 'config_reload'. + # 'reboot' is last resort and cannot be overridden. + if action and (not outstanding_action or outstanding_action == 'config_reload'): + outstanding_action = action + + logging.warning("Restoring {} with proposed action: {}, final action: {}".format(result, action, outstanding_action)) + + if outstanding_action: + method = constants.RECOVER_METHODS[outstanding_action] + wait_time = method['recover_wait'] + if method["reload"]: + config_reload(dut, safe_reload=True) + elif method["reboot"]: + reboot_dut(dut, localhost, method["cmd"]) + else: + _recover_with_command(dut, method['cmd'], wait_time) + + +def recover(dut, localhost, fanouthosts, nbrhosts, tbinfo, check_results, recover_method): + logger.warning("Try to recover %s using method %s" % (dut.hostname, recover_method)) + method = constants.RECOVER_METHODS[recover_method] + wait_time = method['recover_wait'] + if method["adaptive"]: + adaptive_recover(dut, localhost, fanouthosts, nbrhosts, tbinfo, check_results, wait_time) + elif method["reload"]: + config_reload(dut, safe_reload=True) + elif method["reboot"]: + reboot_dut(dut, localhost, method["cmd"]) + else: + _recover_with_command(dut, method['cmd'], wait_time) diff --git a/tests/common/plugins/test_completeness/__init__.py b/tests/common/plugins/test_completeness/__init__.py index 6fae1e8ab61..c828fd33a14 100644 --- a/tests/common/plugins/test_completeness/__init__.py +++ b/tests/common/plugins/test_completeness/__init__.py @@ -39,7 +39,7 @@ def get_level_name(cls, level): Returns: CompletenessLevel as a string """ - if type(level) is not CompletenessLevel: + if not isinstance(level, CompletenessLevel): logging.error("Invalid completeness type. Expected: {}. Format {}".format(str(CompletenessLevel), type(level))) level_name = level.name.lower() return level_name diff --git a/tests/common/reboot.py b/tests/common/reboot.py index c390d35590c..460c1ca15c7 100644 --- a/tests/common/reboot.py +++ b/tests/common/reboot.py @@ -4,6 +4,9 @@ import logging from multiprocessing.pool import ThreadPool, TimeoutError from collections import deque + +from tests.common.helpers.assertions import pytest_assert +from tests.common.platform.processes_utils import wait_critical_processes from utilities import wait_until logger = logging.getLogger(__name__) @@ -19,6 +22,7 @@ REBOOT_TYPE_POWEROFF = "power off" REBOOT_TYPE_WATCHDOG = "watchdog" REBOOT_TYPE_UNKNOWN = "Unknown" +REBOOT_TYPE_THERMAL_OVERLOAD = "Thermal Overload" # Event to signal DUT activeness DUT_ACTIVE = threading.Event() @@ -95,7 +99,7 @@ def check_warmboot_finalizer_inactive(duthost): def reboot(duthost, localhost, reboot_type='cold', delay=10, \ timeout=0, wait=0, wait_for_ssh=True, wait_warmboot_finalizer=False, warmboot_finalizer_timeout=0,\ - reboot_helper=None, reboot_kwargs=None): + reboot_helper=None, reboot_kwargs=None, safe_reboot=False): """ reboots DUT :param duthost: DUT host object @@ -108,6 +112,7 @@ def reboot(duthost, localhost, reboot_type='cold', delay=10, \ :param wait_warmboot_finalizer=True: Wait for WARMBOOT_FINALIZER done :param reboot_helper: helper function to execute the power toggling :param reboot_kwargs: arguments to pass to the reboot_helper + :param safe_reboot: arguments to wait DUT ready after reboot :return: """ @@ -180,7 +185,17 @@ def execute_reboot_helper(): logger.info('waiting for switch {} to initialize'.format(hostname)) - time.sleep(wait) + if safe_reboot: + # The wait time passed in might not be guaranteed to cover the actual + # time it takes for containers to come back up. Therefore, add 5 + # minutes to the maximum wait time. If it's ready sooner, then the + # function will return sooner. + pytest_assert(wait_until(wait + 400, 20, 0, duthost.critical_services_fully_started), + "All critical services should be fully started!") + wait_critical_processes(duthost) + + else: + time.sleep(wait) # Wait warmboot-finalizer service if reboot_type == REBOOT_TYPE_WARM and wait_warmboot_finalizer: @@ -223,12 +238,27 @@ def check_reboot_cause(dut, reboot_cause_expected): logging.debug("dut {} last reboot-cause {}".format(dut.hostname, reboot_cause_got)) return reboot_cause_got == reboot_cause_expected + def sync_reboot_history_queue_with_dut(dut): """ @summary: Sync DUT and internal history queues @param dut: The AnsibleHost object of DUT. """ + global REBOOT_TYPE_HISTOYR_QUEUE + global MAX_NUM_REBOOT_CAUSE_HISTORY + + # Initialize local deque for storing DUT reboot cause history + dut_reboot_history_queue = deque([], MAX_NUM_REBOOT_CAUSE_HISTORY) + + # Skip this function if sonic image is 201811 or 201911 + if "201811" in dut.os_version or "201911" in dut.os_version: + logging.info("Skip sync reboot-cause history for version before 202012") + return + + # IF control is here it means the SONiC image version is > 201911 + # Try and get the entire reboot-cause history from DUT + # Retry logic for increased robustness dut_reboot_history_received = False for retry_count in range(MAX_RETRIES): @@ -244,15 +274,18 @@ def sync_reboot_history_queue_with_dut(dut): logging.info("Exception type: %s" % e_type.__name__) logging.info("Exception message: %s" % e_value) logging.info("Backing off for %d seconds before retrying", ((retry_count+1) * RETRY_BACKOFF_TIME)) - + time.sleep(((retry_count+1) * RETRY_BACKOFF_TIME)) continue - # If retry logic did not yield reboot cause history from DUT, + # If retry logic did not yield reboot cause history from DUT, # return without clearing the existing reboot history queue. if not dut_reboot_history_received: + logging.warn("Unable to sync reboot history queue") return + # If the reboot cause history is received from DUT, + # we sync the two queues. TO that end, # Clear the current reboot history queue REBOOT_TYPE_HISTOYR_QUEUE.clear() @@ -260,14 +293,23 @@ def sync_reboot_history_queue_with_dut(dut): # iterate through every item in the reboot dict until # a "cause" match is found. Then add that key to the # reboot history queue REBOOT_TYPE_HISTOYR_QUEUE + # If no cause is found add 'Unknown' as reboot type. + # NB: appendleft used because queue received from DUT - # NB: is in reverse-chronological order. + # is in reverse-chronological order. for reboot_type in (dut_reboot_history_queue): + dict_iter_found = False for dict_iter in (reboot_ctrl_dict): if re.search(reboot_ctrl_dict[dict_iter]["cause"], reboot_type["cause"]): + logging.info("Adding {} to REBOOT_TYPE_HISTOYR_QUEUE".format(dict_iter)) REBOOT_TYPE_HISTOYR_QUEUE.appendleft(dict_iter) + dict_iter_found = True break + if not dict_iter_found: + logging.info("Adding {} to REBOOT_TYPE_HISTOYR_QUEUE".format(REBOOT_TYPE_UNKNOWN)) + REBOOT_TYPE_HISTOYR_QUEUE.appendleft(REBOOT_TYPE_UNKNOWN) + def check_reboot_cause_history(dut, reboot_type_history_queue): """ @@ -302,6 +344,9 @@ def check_reboot_cause_history(dut, reboot_type_history_queue): reboot_type_history_len = len(reboot_type_history_queue) if reboot_type_history_len <= len(reboot_cause_history_got): for index, reboot_type in enumerate(reboot_type_history_queue): + if reboot_type not in reboot_ctrl_dict: + logging.warn("Reboot type: {} not in dictionary. Skipping history check for this entry.".format(reboot_type)) + continue logging.info("index: %d, reboot cause: %s, reboot cause from DUT: %s" % (index, reboot_ctrl_dict[reboot_type]["cause"], reboot_cause_history_got[reboot_type_history_len-index-1]["cause"])) if not re.search(reboot_ctrl_dict[reboot_type]["cause"], reboot_cause_history_got[reboot_type_history_len-index-1]["cause"]): logging.error("The {} reboot-cause not match. expected_reboot type={}, actual_reboot_cause={}".format( diff --git a/tests/common/snappi/__init__.py b/tests/common/snappi_tests/__init__.py similarity index 100% rename from tests/common/snappi/__init__.py rename to tests/common/snappi_tests/__init__.py diff --git a/tests/common/snappi/common_helpers.py b/tests/common/snappi_tests/common_helpers.py similarity index 79% rename from tests/common/snappi/common_helpers.py rename to tests/common/snappi_tests/common_helpers.py index 4a8404db1b4..1dda422f7f3 100644 --- a/tests/common/snappi/common_helpers.py +++ b/tests/common/snappi_tests/common_helpers.py @@ -3,7 +3,6 @@ secondary activities like convert the ansible Unicode STDOUT output to string, get IP address in a subnet, increment an IP address, get VLAN subnet etc. - This file is also a placeholder for auxiliary function that are required for supporting automation with Snappi devices in future: like collecting diagnostics, uploading and downloading files @@ -11,6 +10,7 @@ in .csv format etc. """ +from enum import Enum import ipaddr from netaddr import IPNetwork from tests.common.mellanox_data import is_mellanox_device as isMellanoxDevice @@ -19,11 +19,9 @@ def increment_ip_address(ip, incr=1): """ Increment IP address by an integer number. - Args: ip (str): IP address in string format. incr (int): Increment by the specified number. - Return: IP address in the argument incremented by the given integer. """ @@ -37,10 +35,8 @@ def ansible_stdout_to_str(ansible_stdout): """ The stdout of Ansible host is essentially a list of unicode characters. This function converts it to a string. - Args: ansible_stdout: stdout of Ansible - Returns: Return a string """ @@ -53,10 +49,8 @@ def ansible_stdout_to_str(ansible_stdout): def get_vlan_subnet(host_ans): """ Get VLAN subnet of a T0 device - Args: host_ans: Ansible host instance of the device - Returns: VLAN subnet, e.g., "192.168.1.1/24" where 192.168.1.1 is gateway and 24 is prefix length @@ -77,10 +71,8 @@ def get_vlan_subnet(host_ans): def get_egress_lossless_buffer_size(host_ans): """ Get egress lossless buffer size of a switch - Args: host_ans: Ansible host instance of the device - Returns: total switch buffer size in byte (int) """ @@ -99,15 +91,68 @@ def get_egress_lossless_buffer_size(host_ans): egress_lossless_pool = buffer_pools[profile_name] return int(egress_lossless_pool['size']) +def get_lossless_buffer_size(host_ans): + """ + Get egress lossless buffer size of a switch, unless an 8102 switch, + in which case, get the ingress lossless buffer size + Args: + host_ans: Ansible host instance of the device + Returns: + total switch buffer size in byte (int) + """ + config_facts = host_ans.config_facts(host=host_ans.hostname, + source="running")['ansible_facts'] + is_cisco_8102 = True if ('Cisco' or 'cisco') and '8102' in host_ans.facts['platform'] else False + + if "BUFFER_POOL" not in config_facts.keys(): + return None + + buffer_pools = config_facts['BUFFER_POOL'] + profile_name = 'ingress_lossless_pool' if is_cisco_8102 else 'egress_lossless_pool' + + if profile_name not in buffer_pools.keys(): + return None + + lossless_pool = buffer_pools[profile_name] + return int(lossless_pool['size']) + + +def get_pg_dropped_packets(duthost, phys_intf, prio): + """ + Get number of ingress packets dropped on a specific priority + of a physical interface + Args: + host_ans: Ansible host instance of the device + phys_intf (str): Name of physical interface ex. Ethernet4 + prio (int): Priority group to check ex. 4 + Returns: + total number of dropped packets (int) + """ + oid_cmd = "sonic-db-cli " \ + "COUNTERS_DB HGET COUNTERS_QUEUE_NAME_MAP " + phys_intf + ":" + str(prio) + oid_out = duthost.command(oid_cmd) + oid_str = str(oid_out["stdout_lines"][0] or 1) + + if oid_str == "1": + return None + + cmd = "sonic-db-cli COUNTERS_DB HGET COUNTERS:" + oid_str + \ + " SAI_QUEUE_STAT_DROPPED_PACKETS" + out = duthost.command(cmd) + dropped_packets = int(out["stdout_lines"][0] or -1) + + if dropped_packets == -1: + return None + + return dropped_packets + def get_addrs_in_subnet(subnet, number_of_ip): """ Get N IP addresses in a subnet. - Args: subnet (str): IPv4 subnet, e.g., '192.168.1.1/24' number_of_ip (int): Number of IP addresses to get - Return: Return n IPv4 addresses in this subnet in a list. """ @@ -127,11 +172,9 @@ def get_peer_snappi_chassis(conn_data, dut_hostname): """ Get the Snappi chassis connected to the DUT Note that a DUT can only be connected to a Snappi chassis - Args: conn_data (dict): the dictionary returned by conn_graph_fact. Example format of the conn_data is given below: - {u'device_conn': {u'sonic-s6100-dut': {u'Ethernet64': {u'peerdevice': u'snappi-sonic', u'peerport': u'Card4/Port1', @@ -168,9 +211,7 @@ def get_peer_snappi_chassis(conn_data, dut_hostname): u'device_vlan_list': {u'sonic-s6100-dut': [2, 2, 2, 2]}, u'device_vlan_map_list': {u'sonic-s6100-dut': {u'19': 2}}, u'device_vlan_range': {u'sonic-s6100-dut': [u'2']}} - dut_hostname (str): hostname of the DUT - Returns: The name of the peer Snappi chassis or None """ @@ -192,11 +233,9 @@ def get_peer_snappi_chassis(conn_data, dut_hostname): def get_peer_port(conn_data, dut_hostname, dut_intf): """ Get the peer port of the DUT port - Args: conn_data (dict): the dictionary returned by conn_graph_fact. Example format of the conn_data is given below: - {u'device_conn': {u'sonic-s6100-dut': {u'Ethernet64': {u'peerdevice': u'snappi-sonic', u'peerport': u'Card4/Port1', @@ -233,10 +272,8 @@ def get_peer_port(conn_data, dut_hostname, dut_intf): u'device_vlan_list': {u'sonic-s6100-dut': [2, 2, 2, 2]}, u'device_vlan_map_list': {u'sonic-s6100-dut': {u'19': 2}}, u'device_vlan_range': {u'sonic-s6100-dut': [u'2']}} - dut_hostname (str): hostname of the DUT dut_intf (str): name of DUT interface - Returns: The name of the peer port or None """ @@ -254,11 +291,9 @@ def get_peer_port(conn_data, dut_hostname, dut_intf): def get_dut_intfs(conn_data, dut_hostname): """ Get DUT's interfaces - Args: conn_data (dict): the dictionary returned by conn_graph_fact. Example format of the conn_data is given below: - {u'device_conn': {u'sonic-s6100-dut': {u'Ethernet64': {u'peerdevice': u'snappi-sonic', u'peerport': u'Card4/Port1', @@ -295,9 +330,7 @@ def get_dut_intfs(conn_data, dut_hostname): u'device_vlan_list': {u'sonic-s6100-dut': [2, 2, 2, 2]}, u'device_vlan_map_list': {u'sonic-s6100-dut': {u'19': 2}}, u'device_vlan_range': {u'sonic-s6100-dut': [u'2']}} - dut_hostname (str): hostname of the DUT - Returns: Return the list of interface names """ @@ -313,10 +346,8 @@ def get_dut_intfs(conn_data, dut_hostname): def pfc_class_enable_vector(prio_list): """ Calculate class-enable vector field in PFC PAUSE frames - Args: prio_list (list): list of priorities to pause, e.g., [3, 4] - Returns: Return class-enable vector """ @@ -331,14 +362,11 @@ def pfc_class_enable_vector(prio_list): def get_wred_profiles(host_ans): """ Get all the WRED/ECN profiles of a SONiC switch - Args: host_ans: Ansible host instance of the device - Returns: WRED/ECN profiles (dictionary) or None. Example format is given below: - { u'AZURE_LOSSLESS': { u'ecn': u'ecn_all', @@ -369,14 +397,12 @@ def get_wred_profiles(host_ans): def config_wred(host_ans, kmin, kmax, pmax, profile=None): """ Config a WRED/ECN profile of a SONiC switch - Args: host_ans: Ansible host instance of the device kmin (int): RED/ECN minimum threshold in bytes kmax (int): RED/ECN maximum threshold in bytes pmax (int): RED/ECN maximum marking probability in percentage profile (str): name of profile to configure (None means any profile) - Returns: If configuration succeeds (bool) """ @@ -424,11 +450,9 @@ def config_wred(host_ans, kmin, kmax, pmax, profile=None): def enable_ecn(host_ans, prio): """ Enable ECN marking on a priority - Args: host_ans: Ansible host instance of the device prio (int): priority - Returns: N/A """ @@ -438,11 +462,9 @@ def enable_ecn(host_ans, prio): def disable_ecn(host_ans, prio): """ Disable ECN marking on a priority - Args: host_ans: Ansible host instance of the device prio (int): priority - Returns: N/A """ @@ -452,12 +474,10 @@ def disable_ecn(host_ans, prio): def config_buffer_alpha(host_ans, profile, alpha_log2): """ Configure buffer threshold (a.k.a., alpha) - Args: host_ans: Ansible host instance of the device profile (str): buffer profile name alpha_log2 (int): set threshold to 2^alpha_log2 - Returns: N/A """ @@ -467,11 +487,9 @@ def config_buffer_alpha(host_ans, profile, alpha_log2): def config_ingress_lossless_buffer_alpha(host_ans, alpha_log2): """ Configure ingress buffer thresholds (a.k.a., alpha) of a device to 2^alpha_log2 - Args: host_ans: Ansible host instance of the device alpha_log2 (int): set threshold to 2^alpha_log2 - Returns: If configuration succeeds (bool) """ @@ -508,12 +526,10 @@ def config_ingress_lossless_buffer_alpha(host_ans, alpha_log2): def get_pfcwd_config_attr(host_ans, config_scope, attr): """ Get PFC watchdog configuration attribute - Args: host_ans: Ansible host instance of the device config_scope (str): 'GLOBAL' or interface name attr (str): config attribute name, e.g., 'detection_time' - Returns: config attribute (str) or None """ @@ -537,10 +553,8 @@ def get_pfcwd_config_attr(host_ans, config_scope, attr): def get_pfcwd_poll_interval(host_ans): """ Get PFC watchdog polling interval - Args: host_ans: Ansible host instance of the device - Returns: Polling interval in ms (int) or None """ @@ -557,11 +571,9 @@ def get_pfcwd_poll_interval(host_ans): def get_pfcwd_detect_time(host_ans, intf): """ Get PFC watchdog detection time of a given interface - Args: host_ans: Ansible host instance of the device intf (str): interface name - Returns: Detection time in ms (int) or None """ @@ -578,11 +590,9 @@ def get_pfcwd_detect_time(host_ans, intf): def get_pfcwd_restore_time(host_ans, intf): """ Get PFC watchdog restoration time of a given interface - Args: host_ans: Ansible host instance of the device intf (str): interface name - Returns: Restoration time in ms (int) or None """ @@ -599,10 +609,8 @@ def get_pfcwd_restore_time(host_ans, intf): def start_pfcwd(duthost): """ Start PFC watchdog with default setting - Args: duthost (AnsibleHost): Device Under Test (DUT) - Returns: N/A """ @@ -612,10 +620,8 @@ def start_pfcwd(duthost): def stop_pfcwd(duthost): """ Stop PFC watchdog - Args: duthost (AnsibleHost): Device Under Test (DUT) - Returns: N/A """ @@ -625,10 +631,8 @@ def stop_pfcwd(duthost): def disable_packet_aging(duthost): """ Disable packet aging feature (only on MLNX switches) - Args: duthost (AnsibleHost): Device Under Test (DUT) - Returns: N/A """ @@ -642,10 +646,8 @@ def disable_packet_aging(duthost): def enable_packet_aging(duthost): """ Enable packet aging feature (only on MLNX switches) - Args: duthost (AnsibleHost): Device Under Test (DUT) - Returns: N/A """ @@ -654,3 +656,108 @@ def enable_packet_aging(duthost): duthost.command("docker cp /tmp/packets_aging.py syncd:/") duthost.command("docker exec syncd python /packets_aging.py enable") duthost.command("docker exec syncd rm -rf /packets_aging.py") + + +def get_ipv6_addrs_in_subnet(subnet, number_of_ip): + """ + Get N IPv6 addresses in a subnet. + Args: + subnet (str): IPv6 subnet, e.g., '2001::1/64' + number_of_ip (int): Number of IP addresses to get + Return: + Return n IPv6 addresses in this subnet in a list. + """ + + subnet = str(IPNetwork(subnet).network) + "/" + str(subnet.split("/")[1]) + subnet = unicode(subnet, "utf-8") + ipv6_list = [] + for i in range(number_of_ip): + network = IPv6Network(subnet) + address = IPv6Address( + network.network_address + getrandbits( + network.max_prefixlen - network.prefixlen)) + ipv6_list.append(str(address)) + + return ipv6_list + + +def sec_to_nanosec(secs): + """ Convert seconds to nanoseconds """ + return secs * 1e9 + + +def get_pfc_frame_count(duthost, port, priority, is_tx=False): + """ + Get the PFC frame count for a given port and priority from SONiC CLI + Args: + duthost (Ansible host instance): device under test + port (str): port name + priority (int): priority of flow + is_tx (bool): if the PFC pause frame count is for Tx or Rx + Returns: + int: PFC pause frame count + """ + if is_tx: + raw_out = duthost.shell("show pfc counters | sed -n '/Port Tx/,/^$/p' | grep {}".format(port))['stdout'] + else: + raw_out = duthost.shell("show pfc counters | sed -n '/Port Rx/,/^$/p' | grep {}".format(port))['stdout'] + + pause_frame_count = raw_out.split()[priority + 1] + + return int(pause_frame_count.replace(',', '')) + + +def get_egress_queue_count(duthost, port, priority): + """ + Get the egress queue count in packets and bytes for a given port and priority from SONiC CLI. + This is the equivalent of the "show queue counters" command. + Args: + duthost (Ansible host instance): device under test + port (str): port name + priority (int): priority of flow + Returns: + tuple (int, int): total count of packets and bytes in the queue + """ + raw_out = duthost.shell("show queue counters {} | sed -n '/UC{}/p'".format(port, priority))['stdout'] + total_pkts = raw_out.split()[2] + total_bytes = raw_out.split()[3] + return int(total_pkts.replace(',', '')), int(total_bytes.replace(',', '')) + + +class packet_capture(Enum): + """ + ENUM of packet capture settings + NO_CAPTURE - No capture + PFC_CAPTURE - PFC capture enabled + IP_CAPTURE - IP capture enabled + """ + NO_CAPTURE = "No_Capture" + PFC_CAPTURE = "PFC_Capture" + IP_CAPTURE = "IP_Capture" + + +def config_capture_pkt(testbed_config, port_names, capture_type, capture_name=None): + """ + Generate the configuration to capture packets on a port for a specific type of packet + Args: + testbed_config (obj): L2/L3 snappi config of a testbed + port_names (list of string): names of ixia ports to capture packets on + capture_type (Enum): Type of packet to capture + capture_name (str): Name of the capture + Returns: + N/A + """ + + cap = testbed_config.captures.capture(name=capture_name if capture_name else "PacketCapture")[-1] + cap.port_names = [] + for p_name in port_names: + cap.port_names.append(p_name) + cap.format = cap.PCAP + + if capture_type == packet_capture.IP_CAPTURE: + # Capture IP packets + ip_filter = cap.filters.custom()[-1] + # Version for IPv4 packets is "4" which has to be in the upper 4 bits of the first byte, hence filter is 0x40 + ip_filter.value = '40' + ip_filter.offset = 14 # Offset is the length of the Ethernet header + ip_filter.mask = '0f' # Mask is 0x0f to only match the upper 4 bits of the first byte which is the version diff --git a/tests/common/snappi_tests/pfc_packet.py b/tests/common/snappi_tests/pfc_packet.py new file mode 100644 index 00000000000..4e0618f41a3 --- /dev/null +++ b/tests/common/snappi_tests/pfc_packet.py @@ -0,0 +1,156 @@ +""" +The PFCPacket module allows for modular pass through of a PFC Packet's parameters for all Snappi based tests, +and appropriate storage of the PFC Packet's parameters. +""" + +import logging +import struct + +logger = logging.getLogger(__name__) + +PFC_MAC_CONTROL_CODE = 0x8808 +PFC_DEST_MAC = "01:80:c2:00:00:01" +PRIO_DEFAULT_LEN = 8 + + +class PFCPacket(): + def __init__(self, pfc_frame_bytes=None, cbfc_opcode=None, class_enable_vec=None, class_pause_times=None): + """ + Initialize the PFCPacket class + + Params: + cbfc_opcode (int): Class-based Flow Control (CBFC) opcode + class_enable_vec (list of int (binary)): class enable vector for PFC frame + ex. ['0', '0', '1', '0', '0', '0', '0', '0'] + class_pause_times (list of int): class pause times for PFC frame between 0 and 65535 for 8 priorities + is_valid_frame (bool): True if valid PFC frame, False otherwise + """ + if pfc_frame_bytes: + self.read_pfc_frame(pfc_frame_bytes=pfc_frame_bytes) + self.validate_pfc_frame() + else: + self.cbfc_opcode = cbfc_opcode + self.class_enable_vec = class_enable_vec + self.class_pause_times = class_pause_times + self.validate_pfc_frame() + + def read_pfc_frame(self, pfc_frame_bytes): + """ + Read PFC frame bytes and return the components of the frame, specifically, + the CBFC opcode, the class enable vector, and the class pause times. + + Args: + pfc_frame_bytes (bytes): bytes of PFC frame + Returns: + """ + cbfc_opcode = struct.unpack(">H", pfc_frame_bytes[0:2])[0] + class_enable_vec = struct.unpack(">H", pfc_frame_bytes[2:4])[0] + class_pause_times = [] + for i in range(0, 16, 2): + class_pause_times.append(struct.unpack(">H", pfc_frame_bytes[i + 4:i + 6])[0]) + + self.cbfc_opcode = cbfc_opcode + self.class_enable_vec = _num_to_class_enable_vec_array(class_enable_vec) + self.class_pause_times = class_pause_times + + def _check_cbfc_opcode(self): + """ + Check if CBFC opcode is valid. + + Args: + + Returns: + True if valid CBFC opcode, False otherwise + """ + if self.cbfc_opcode == 0x0101: + return True + else: + return False + + def _check_class_enable_vec(self): + """ + Check if class enable vector is valid i.e. either each bit is 0 or 1. + + Args: + class_enable_vec (list of chars): class enable vector + + Returns: + True if valid class enable vector, False otherwise + """ + valid_options = ["0", "1"] + for val in self.class_enable_vec: + if val not in valid_options: + return False + + return True + + def _check_class_pause_times(self): + """ + Check if class pause times are valid. Both conditions must be met: + 1) class pause times are between 0x0 and 0xFFFF + 2) class pause times are 0 if the corresponding bit in the class enable vector is 0, and vice versa + + Args: + + Returns: + True if valid class pause times, False otherwise + """ + for i in range(len(self.class_pause_times)): + if self.class_pause_times[i] < 0x0 and self.class_pause_times[i] > 0xFFFF: + return False + elif self.class_pause_times[i] > 0x0 and self.class_enable_vec[PRIO_DEFAULT_LEN - i - 1] == "0": + return False + elif self.class_pause_times[i] == 0x0 and self.class_enable_vec[PRIO_DEFAULT_LEN - i - 1] == "1": + return False + + return True + + def validate_pfc_frame(self): + """ + Validate the PFC frame. The PFC frame is valid if: + 1) CBFC opcode is 0x0101 + 2) class enable vector is valid + 3) class pause times are valid + + Check function subdefinitions for more details. + """ + is_valid_cbfc_opcode = self._check_cbfc_opcode() + is_valid_class_enable_vec = self._check_class_enable_vec() + is_valid_class_pause_times = self._check_class_pause_times() + if not is_valid_cbfc_opcode or not is_valid_class_enable_vec or not is_valid_class_pause_times: + self.is_valid_frame = False + else: + self.is_valid_frame = True + + def is_valid(self): + """ + Check if PFC frame is valid. + + Args: + + Returns: + True if valid PFC frame, False otherwise + """ + return self.is_valid_frame + + +# Helper methods +def _num_to_class_enable_vec_array(class_enable_vec_int): + """ + Convert a class enable vector number (base 10) to a class enable vector array (binary). + + Args: + class_enable_vec_int (int): class enable vector number (base 10) + Returns: + class_enable_vec_array (list of chars): class enable vector array (binary string format) + ex. ['0', '0', '1', '0', '0', '0', '0', '0'] + """ + class_enable_vec_binary = bin(class_enable_vec_int)[2:] + + if len(class_enable_vec_binary) < PRIO_DEFAULT_LEN: + fill = "0" * (PRIO_DEFAULT_LEN - len(class_enable_vec_binary)) + class_enable_vec_binary = fill + class_enable_vec_binary + + class_enable_vec_array = [val for val in class_enable_vec_binary] + + return class_enable_vec_array diff --git a/tests/common/snappi/port.py b/tests/common/snappi_tests/port.py similarity index 100% rename from tests/common/snappi/port.py rename to tests/common/snappi_tests/port.py diff --git a/tests/common/snappi/qos_fixtures.py b/tests/common/snappi_tests/qos_fixtures.py similarity index 100% rename from tests/common/snappi/qos_fixtures.py rename to tests/common/snappi_tests/qos_fixtures.py diff --git a/tests/common/snappi_tests/read_pcap.py b/tests/common/snappi_tests/read_pcap.py new file mode 100644 index 00000000000..9de721513b5 --- /dev/null +++ b/tests/common/snappi_tests/read_pcap.py @@ -0,0 +1,55 @@ +import logging +import dpkt +from dpkt.utils import mac_to_str + +from tests.common.snappi_tests.pfc_packet import PFCPacket + +logger = logging.getLogger(__name__) + +PFC_MAC_CONTROL_CODE = 0x8808 +PFC_DEST_MAC = "01:80:c2:00:00:01" + + +def validate_pfc_frame(pfc_pcap_file, SAMPLE_SIZE=15000, UTIL_THRESHOLD=0.8): + """ + Validate PFC frame by checking the CBFC opcode, class enable vector and class pause times. + + Args: + pfc_cap: PFC pcap file + SAMPLE_SIZE: number of packets to sample + UTIL_THRESHOLD: threshold for PFC utilization to check if enough PFC frames were sent + + Returns: + True if valid PFC frame, False otherwise + """ + f = open(pfc_pcap_file, "rb") + pcap = dpkt.pcapng.Reader(f) + + curPktCount = 0 + curPFCPktCount = 0 + for _, buf in pcap: + if curPktCount >= SAMPLE_SIZE: + break + eth = dpkt.ethernet.Ethernet(buf) + if eth.type == PFC_MAC_CONTROL_CODE: + dest_mac = mac_to_str(eth.dst) + if dest_mac.lower() != PFC_DEST_MAC: + return False + pfc_packet = PFCPacket(pfc_frame_bytes=bytes(eth.data)) + if not pfc_packet.is_valid(): + logger.info("PFC frame {} is not valid. Please check the capture file.".format(curPktCount)) + return False + curPFCPktCount += 1 + curPktCount += 1 + + f.close() + pfc_util = curPktCount / SAMPLE_SIZE + + if curPktCount == 0: + logger.info("No PFC frames found in the capture file.") + return False + elif pfc_util < UTIL_THRESHOLD: + logger.info("PFC utilization is too low. Please check the capture file.") + return False + + return True diff --git a/tests/common/snappi/snappi_fixtures.py b/tests/common/snappi_tests/snappi_fixtures.py similarity index 97% rename from tests/common/snappi/snappi_fixtures.py rename to tests/common/snappi_tests/snappi_fixtures.py index 0f884c39034..e690ff302ea 100644 --- a/tests/common/snappi/snappi_fixtures.py +++ b/tests/common/snappi_tests/snappi_fixtures.py @@ -6,10 +6,10 @@ from ipaddress import ip_address, IPv4Address from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ fanout_graph_facts -from tests.common.snappi.common_helpers import get_addrs_in_subnet,\ +from tests.common.snappi_tests.common_helpers import get_addrs_in_subnet,\ get_peer_snappi_chassis -from tests.common.snappi.snappi_helpers import SnappiFanoutManager, get_snappi_port_location -from tests.common.snappi.port import SnappiPortConfig, SnappiPortType +from tests.common.snappi_tests.snappi_helpers import SnappiFanoutManager, get_snappi_port_location +from tests.common.snappi_tests.port import SnappiPortConfig, SnappiPortType from tests.common.helpers.assertions import pytest_assert @@ -38,8 +38,7 @@ def snappi_api_serv_port(duthosts, rand_one_dut_hostname): """ duthost = duthosts[rand_one_dut_hostname] return (duthost.host.options['variable_manager']. - _hostvars[duthost.hostname]['secret_group_vars'] - ['snappi_api_server']['rest_port']) + _hostvars[duthost.hostname]['snappi_api_server']['rest_port']) @pytest.fixture(scope='module') @@ -83,7 +82,7 @@ def __gen_pc_mac(id): Returns: MAC address (string) """ - return '11:22:33:44:55:{:02d}'.format(id) + return '10:22:33:44:55:{:02d}'.format(id) def __valid_ipv4_addr(ip): """ diff --git a/tests/common/snappi/snappi_helpers.py b/tests/common/snappi_tests/snappi_helpers.py similarity index 99% rename from tests/common/snappi/snappi_helpers.py rename to tests/common/snappi_tests/snappi_helpers.py index 90ede313f44..91700bc6282 100644 --- a/tests/common/snappi/snappi_helpers.py +++ b/tests/common/snappi_tests/snappi_helpers.py @@ -6,7 +6,7 @@ """ from tests.common.helpers.assertions import pytest_assert -from tests.common.snappi.common_helpers import ansible_stdout_to_str, get_peer_snappi_chassis +from tests.common.snappi_tests.common_helpers import ansible_stdout_to_str, get_peer_snappi_chassis from tests.common.reboot import logger import time diff --git a/tests/common/snappi_tests/snappi_test_params.py b/tests/common/snappi_tests/snappi_test_params.py new file mode 100644 index 00000000000..524f6de1fb6 --- /dev/null +++ b/tests/common/snappi_tests/snappi_test_params.py @@ -0,0 +1,35 @@ +""" +The SnappiTestParams module allows for modular pass through of test parameters for all Snappi based tests. +""" + +from tests.common.snappi_tests.common_helpers import packet_capture + + +class SnappiTestParams(): + def __init__(self): + """ + Initialize the SnappiTestParams class + + Params: + headroom_test_params (array): 2 element array if the associated pfc pause quanta + results in no packet drop [pfc_delay, headroom_result] + pfc_pause_src_mac (str): PFC pause source MAC address ex. '00:00:00:fa:ce:01' + set_pfc_class_enable_vec (bool): PFC class enable vector setting + packet_capture_type (ENUM): packet capture type ex. packet_capture.IP_CAPTURE + packet_capture_file (str): packet capture file ex. 'capture.pcapng' + packet_capture_ports (list): packet capture ports on ixia chassis ex. ['Port 1', 'Port 2'] + is_snappi_ingress_port_cap (bool): whether or not the packet capture is on the tgen ingress port, if False, + then pcap is on the tgen egress port + base_flow_config (dict): base flow configuration + test_tx_frames (list): number of test frames transmitted for priorities to test ex. [2000, 3000] + for priorities 3 and 4 + """ + self.headroom_test_params = None + self.pfc_pause_src_mac = None + self.set_pfc_class_enable_vec = True + self.packet_capture_type = packet_capture.NO_CAPTURE + self.packet_capture_file = None + self.packet_capture_ports = None + self.is_snappi_ingress_port_cap = True + self.base_flow_config = None + self.test_tx_frames = 0 diff --git a/tests/common/snappi_tests/traffic_generation.py b/tests/common/snappi_tests/traffic_generation.py new file mode 100644 index 00000000000..4a00e51598b --- /dev/null +++ b/tests/common/snappi_tests/traffic_generation.py @@ -0,0 +1,586 @@ +""" +This module allows various snappi based tests to generate various traffic configurations. +""" + +import time +import logging +from tests.common.helpers.assertions import pytest_assert +from tests.common.snappi_tests.common_helpers import get_egress_queue_count, pfc_class_enable_vector,\ + get_lossless_buffer_size, get_pg_dropped_packets,\ + sec_to_nanosec, get_pfc_frame_count, packet_capture +from tests.common.snappi_tests.port import select_ports, select_tx_port +from tests.common.snappi_tests.snappi_helpers import wait_for_arp + +logger = logging.getLogger(__name__) + +SNAPPI_POLL_DELAY_SEC = 2 +CONTINUOUS_MODE = -5 + + +def setup_base_traffic_config(testbed_config, + port_config_list, + port_id): + """ + Generate base configurations of flows, including test flows, background flows and + pause storm. Test flows and background flows are also known as data flows. + Args: + testbed_config (obj): testbed L1/L2/L3 configuration + port_config_list (list): list of port configuration + port_id (int): ID of DUT port to test + + Returns: + base_flow_config (dict): base flow configuration containing dut_port_config, tx_mac, + rx_mac, tx_port_config, rx_port_config, tx_port_name, rx_port_name + dict key-value pairs (all keys are strings): + tx_port_id (int): ID of ixia TX port ex. 1 + rx_port_id (int): ID of ixia RX port ex. 2 + tx_port_config (SnappiPortConfig): port config obj for ixia TX port + rx_port_config (SnappiPortConfig): port config obj for ixia RX port + tx_mac (str): MAC address of ixia TX port ex. '00:00:fa:ce:fa:ce' + rx_mac (str): MAC address of ixia RX port ex. '00:00:fa:ce:fa:ce' + tx_port_name (str): name of ixia TX port ex. 'Port 1' + rx_port_name (str): name of ixia RX port ex. 'Port 2' + dut_port_config (list): a list of two dictionaries of tx and rx ports on the peer (switch) side, + and the associated test priorities + ex. [{'Ethernet4':[3, 4]}, {'Ethernet8':[3, 4]}] + """ + base_flow_config = {} + rx_port_id = port_id + tx_port_id_list, _ = select_ports(port_config_list=port_config_list, + pattern="many to one", + rx_port_id=rx_port_id) + + pytest_assert(len(tx_port_id_list) > 0, "Cannot find any TX ports") + tx_port_id = select_tx_port(tx_port_id_list=tx_port_id_list, + rx_port_id=rx_port_id) + pytest_assert(tx_port_id is not None, "Cannot find a suitable TX port") + base_flow_config["rx_port_id"] = rx_port_id + base_flow_config["tx_port_id"] = tx_port_id + + tx_port_config = next((x for x in port_config_list if x.id == tx_port_id), None) + rx_port_config = next((x for x in port_config_list if x.id == rx_port_id), None) + base_flow_config["tx_port_config"] = tx_port_config + base_flow_config["rx_port_config"] = rx_port_config + + # Instantiate peer ports in dut_port_config + dut_port_config = [] + tx_dict = {str(tx_port_config.peer_port): []} + rx_dict = {str(rx_port_config.peer_port): []} + dut_port_config.append(tx_dict) + dut_port_config.append(rx_dict) + base_flow_config["dut_port_config"] = dut_port_config + + base_flow_config["tx_mac"] = tx_port_config.mac + if tx_port_config.gateway == rx_port_config.gateway and \ + tx_port_config.prefix_len == rx_port_config.prefix_len: + """ If soruce and destination port are in the same subnet """ + base_flow_config["rx_mac"] = rx_port_config.mac + else: + base_flow_config["rx_mac"] = tx_port_config.gateway_mac + + base_flow_config["tx_port_name"] = testbed_config.ports[tx_port_id].name + base_flow_config["rx_port_name"] = testbed_config.ports[rx_port_id].name + + return base_flow_config + + +def generate_test_flows(testbed_config, + test_flow_name, + test_flow_prio_list, + test_flow_rate_percent, + test_flow_dur_sec, + test_flow_delay_sec, + test_flow_pkt_size, + prio_dscp_map, + snappi_extra_params): + """ + Generate configurations of test flows. Test flows and background flows are also known as data flows. + + Args: + testbed_config (obj): testbed L1/L2/L3 configuration + test_flow_name (str): name of test flow + test_flow_prio_list (list): list of test flow priorities + test_flow_rate_percent (int): rate percentage of test flows + test_flow_dur_sec (int): duration of test flows + test_flow_delay_sec (int): delay of test flows in seconds + test_flow_pkt_size (int): packet size of test flows + prio_dscp_map (dict): priority to DSCP mapping + snappi_extra_params (SnappiTestParams obj): additional parameters for Snappi traffic + """ + base_flow_config = snappi_extra_params.base_flow_config + pytest_assert(base_flow_config is not None, "Cannot find base flow configuration") + + for prio in test_flow_prio_list: + test_flow = testbed_config.flows.flow(name='{} Prio {}'.format(test_flow_name, prio))[-1] + test_flow.tx_rx.port.tx_name = base_flow_config["tx_port_name"] + test_flow.tx_rx.port.rx_name = base_flow_config["rx_port_name"] + + eth, ipv4 = test_flow.packet.ethernet().ipv4() + eth.src.value = base_flow_config["tx_mac"] + eth.dst.value = base_flow_config["rx_mac"] + eth.pfc_queue.value = prio + + ipv4.src.value = base_flow_config["tx_port_config"].ip + ipv4.dst.value = base_flow_config["rx_port_config"].ip + ipv4.priority.choice = ipv4.priority.DSCP + ipv4.priority.dscp.phb.values = prio_dscp_map[prio] + ipv4.priority.dscp.ecn.value = ( + ipv4.priority.dscp.ecn.CAPABLE_TRANSPORT_1) + + test_flow.size.fixed = test_flow_pkt_size + test_flow.rate.percentage = test_flow_rate_percent + test_flow.duration.fixed_seconds.seconds = test_flow_dur_sec + test_flow.duration.fixed_seconds.delay.nanoseconds = int(sec_to_nanosec(test_flow_delay_sec)) + + test_flow.metrics.enable = True + test_flow.metrics.loss = True + + """ Set flow port config values """ + dut_port_config = base_flow_config["dut_port_config"] + dut_port_config[0][str(base_flow_config["tx_port_config"].peer_port)].append(int(prio)) + dut_port_config[1][str(base_flow_config["rx_port_config"].peer_port)].append(int(prio)) + base_flow_config["dut_port_config"] = dut_port_config + + snappi_extra_params.base_flow_config = base_flow_config + + +def generate_background_flows(testbed_config, + bg_flow_name, + bg_flow_prio_list, + bg_flow_rate_percent, + bg_flow_dur_sec, + bg_flow_delay_sec, + bg_flow_pkt_size, + prio_dscp_map, + snappi_extra_params): + """ + Generate background configurations of flows. Test flows and background flows are also known as data flows. + + Args: + testbed_config (obj): testbed L1/L2/L3 configuration + bg_flow_name (str): name of background flow + bg_flow_prio_list (list): list of background flow priorities + bg_flow_rate_percent (int): rate percentage of background flows + bg_flow_dur_sec (int): duration of background flows + bg_flow_delay_sec (int): delay of background flows in seconds + bg_flow_pkt_size (int): packet size of background flows + prio_dscp_map (dict): priority to DSCP mapping + snappi_extra_params (SnappiTestParams obj): additional parameters for Snappi traffic + """ + base_flow_config = snappi_extra_params.base_flow_config + pytest_assert(base_flow_config is not None, "Cannot find base flow configuration") + + for prio in bg_flow_prio_list: + bg_flow = testbed_config.flows.flow(name='{} Prio {}'.format(bg_flow_name, prio))[-1] + bg_flow.tx_rx.port.tx_name = base_flow_config["tx_port_name"] + bg_flow.tx_rx.port.rx_name = base_flow_config["rx_port_name"] + + eth, ipv4 = bg_flow.packet.ethernet().ipv4() + eth.src.value = base_flow_config["tx_mac"] + eth.dst.value = base_flow_config["rx_mac"] + eth.pfc_queue.value = prio + + ipv4.src.value = base_flow_config["tx_port_config"].ip + ipv4.dst.value = base_flow_config["rx_port_config"].ip + ipv4.priority.choice = ipv4.priority.DSCP + ipv4.priority.dscp.phb.values = prio_dscp_map[prio] + ipv4.priority.dscp.ecn.value = ( + ipv4.priority.dscp.ecn.CAPABLE_TRANSPORT_1) + + bg_flow.size.fixed = bg_flow_pkt_size + bg_flow.rate.percentage = bg_flow_rate_percent + bg_flow.duration.fixed_seconds.seconds = bg_flow_dur_sec + bg_flow.duration.fixed_seconds.delay.nanoseconds = int(sec_to_nanosec(bg_flow_delay_sec)) + + bg_flow.metrics.enable = True + bg_flow.metrics.loss = True + + +def generate_pause_flows(testbed_config, + pause_flow_name, + pause_prio_list, + global_pause, + snappi_extra_params, + pause_flow_delay_sec=0, + pause_flow_dur_sec=CONTINUOUS_MODE): + """ + Generate configurations of pause flows. + + Args: + testbed_config (obj): testbed L1/L2/L3 configuration + pause_flow_name (str): name of pause flow + pause_prio_list (list): list of pause priorities + global_pause (bool): global pause or per priority pause + snappi_extra_params (SnappiTestParams obj): additional parameters for Snappi traffic + pause_flow_delay_sec (int): delay of pause flows in seconds + pause_flow_dur_sec (int): duration of pause flows in seconds except when set to continuous + """ + base_flow_config = snappi_extra_params.base_flow_config + pytest_assert(base_flow_config is not None, "Cannot find base flow configuration") + + pause_flow = testbed_config.flows.flow(name=pause_flow_name)[-1] + pause_flow.tx_rx.port.tx_name = testbed_config.ports[base_flow_config["rx_port_id"]].name + pause_flow.tx_rx.port.rx_name = testbed_config.ports[base_flow_config["tx_port_id"]].name + + if global_pause: + pause_pkt = pause_flow.packet.ethernetpause()[-1] + pause_pkt.dst.value = "01:80:C2:00:00:01" + pause_pkt.src.value = snappi_extra_params.pfc_pause_src_mac if snappi_extra_params.pfc_pause_src_mac \ + else "00:00:fa:ce:fa:ce" + else: + pause_time = [] + for x in range(8): + if x in pause_prio_list: + pause_time.append(int('ffff', 16)) + else: + pause_time.append(int('0000', 16)) + + vector = pfc_class_enable_vector(pause_prio_list) + pause_pkt = pause_flow.packet.pfcpause()[-1] + pause_pkt.src.value = snappi_extra_params.pfc_pause_src_mac if snappi_extra_params.pfc_pause_src_mac \ + else "00:00:fa:ce:fa:ce" + pause_pkt.dst.value = "01:80:C2:00:00:01" + pause_pkt.class_enable_vector.value = vector if snappi_extra_params.set_pfc_class_enable_vec else 0 + pause_pkt.pause_class_0.value = pause_time[0] + pause_pkt.pause_class_1.value = pause_time[1] + pause_pkt.pause_class_2.value = pause_time[2] + pause_pkt.pause_class_3.value = pause_time[3] + pause_pkt.pause_class_4.value = pause_time[4] + pause_pkt.pause_class_5.value = pause_time[5] + pause_pkt.pause_class_6.value = pause_time[6] + pause_pkt.pause_class_7.value = pause_time[7] + + # Pause frames are sent from the RX port of ixia + speed_str = testbed_config.layer1[0].speed + speed_gbps = int(speed_str.split('_')[1]) + pause_dur = 65535 * 64 * 8.0 / (speed_gbps * 1e9) + pps = int(2 / pause_dur) + + pause_flow.rate.pps = pps + pause_flow.size.fixed = 64 + if pause_flow_dur_sec != CONTINUOUS_MODE: + pause_flow.duration.fixed_seconds.seconds = pause_flow_dur_sec + pause_flow.duration.fixed_seconds.delay.nanoseconds = int(sec_to_nanosec(pause_flow_delay_sec)) + else: + pause_flow.duration.choice = pause_flow.duration.CONTINUOUS + pause_flow.duration.continuous.delay.nanoseconds = int(sec_to_nanosec(pause_flow_delay_sec)) + + pause_flow.metrics.enable = True + pause_flow.metrics.loss = True + + +def run_traffic(api, + config, + data_flow_names, + all_flow_names, + exp_dur_sec, + snappi_extra_params): + + """ + Run traffic and return per-flow statistics, and capture packets if needed. + Args: + api (obj): snappi session + config (obj): experiment config (testbed config + flow config) + data_flow_names (list): list of names of data (test and background) flows + all_flow_names (list): list of names of all the flows + exp_dur_sec (int): experiment duration in second + snappi_extra_params (SnappiTestParams obj): additional parameters for Snappi traffic + Returns: + per-flow statistics (list) + """ + + api.set_config(config) + + logger.info("Wait for Arp to Resolve ...") + wait_for_arp(api, max_attempts=30, poll_interval_sec=2) + + pcap_type = snappi_extra_params.packet_capture_type + + if pcap_type != packet_capture.NO_CAPTURE: + cs = api.capture_state() + cs.port_names = snappi_extra_params.packet_capture_ports + cs.state = cs.START + api.set_capture_state(cs) + + logger.info("Starting transmit on all flows ...") + ts = api.transmit_state() + ts.state = ts.START + api.set_transmit_state(ts) + + time.sleep(exp_dur_sec) + + attempts = 0 + max_attempts = 20 + + while attempts < max_attempts: + request = api.metrics_request() + request.flow.flow_names = data_flow_names + flow_metrics = api.get_metrics(request).flow_metrics + + # If all the data flows have stopped + transmit_states = [metric.transmit for metric in flow_metrics] + if len(flow_metrics) == len(data_flow_names) and\ + list(set(transmit_states)) == ['stopped']: + time.sleep(SNAPPI_POLL_DELAY_SEC) + break + else: + time.sleep(1) + attempts += 1 + + pytest_assert(attempts < max_attempts, + "Flows do not stop in {} seconds".format(max_attempts)) + + if pcap_type != packet_capture.NO_CAPTURE: + request = api.capture_request() + request.port_name = snappi_extra_params.packet_capture_ports[0] + cs = api.capture_state() + cs.state = cs.STOP + api.set_capture_state(cs) + pcap_bytes = api.get_capture(request) + with open(snappi_extra_params.packet_capture_file + ".pcapng", 'wb') as fid: + fid.write(pcap_bytes.getvalue()) + + # Dump per-flow statistics + request = api.metrics_request() + request.flow.flow_names = all_flow_names + flow_metrics = api.get_metrics(request).flow_metrics + logger.info("Stop transmit on all flows ...") + ts = api.transmit_state() + ts.state = ts.STOP + api.set_transmit_state(ts) + + return flow_metrics + + +def verify_pause_flow(flow_metrics, + pause_flow_name): + """ + Verify pause flow statistics i.e. all pause frames should be dropped + + Args: + flow_metrics (list): per-flow statistics + pause_flow_name (str): name of the pause flow + Returns: + """ + pause_flow_row = next(metric for metric in flow_metrics if metric.name == pause_flow_name) + pause_flow_tx_frames = pause_flow_row.frames_tx + pause_flow_rx_frames = pause_flow_row.frames_rx + + pytest_assert(pause_flow_tx_frames > 0 and pause_flow_rx_frames == 0, + "All the pause frames should be dropped") + + +def verify_background_flow(flow_metrics, + bg_flow_name, + bg_flow_rate_percent, + bg_flow_dur_sec, + bg_flow_pkt_size, + speed_gbps, + tolerance, + snappi_extra_params): + """ + Verify background flow statistics. Background traffic on lossy priorities should not be dropped when there is no + congestion, else some packets should be dropped if there is congestion. + + Args: + flow_metrics (list): per-flow statistics + bg_flow_name (str): name of the background flow + bg_flow_rate_percent (int): background flow rate in percentage + bg_flow_dur_sec (int): background data flow duration in second + bg_flow_pkt_size (int): background data packet size in bytes + speed_gbps (int): speed of the port in Gbps + tolerance (float): tolerance for background flow deviation + snappi_extra_params (SnappiTestParams obj): additional parameters for Snappi traffic + Returns: + + """ + for metric in flow_metrics: + if bg_flow_name not in metric.name: + continue + + tx_frames = metric.frames_tx + rx_frames = metric.frames_rx + + exp_bg_flow_rx_pkts = bg_flow_rate_percent / 100.0 * speed_gbps \ + * 1e9 * bg_flow_dur_sec / 8.0 / bg_flow_pkt_size + deviation = (rx_frames - exp_bg_flow_rx_pkts) / float(exp_bg_flow_rx_pkts) + + pytest_assert(tx_frames == rx_frames, + "{} should not have any dropped packet".format(metric.name)) + + pytest_assert(abs(deviation) < tolerance, + "{} should receive {} packets (actual {})".format(metric.name, exp_bg_flow_rx_pkts, rx_frames)) + + +def verify_basic_test_flow(flow_metrics, + test_flow_name, + test_flow_rate_percent, + test_flow_dur_sec, + test_flow_pkt_size, + speed_gbps, + tolerance, + test_flow_pause, + snappi_extra_params): + """ + Verify basic test flow statistics from ixia. Test traffic on lossless priorities should not be dropped regardless + of whether there is congestion or not. + + Args: + flow_metrics (list): per-flow statistics + test_flow_name (str): name of the test flow + test_flow_rate_percent (int): test flow rate in percentage + test_flow_dur_sec (int): test flow duration in second + test_flow_pkt_size (int): test packet size in bytes + speed_gbps (int): speed of the port in Gbps + tolerance (float): tolerance for test flow deviation + test_flow_pause (bool): whether test flow is expected to be paused + snappi_extra_params (SnappiTestParams obj): additional parameters for Snappi traffic + Returns: + + """ + test_tx_frames = [] + + for metric in flow_metrics: + if test_flow_name not in metric.name: + continue + + tx_frames = metric.frames_tx + rx_frames = metric.frames_rx + test_tx_frames.append(tx_frames) + + if test_flow_pause: + pytest_assert(tx_frames > 0 and rx_frames == 0, + "{} should be paused".format(metric.name)) + else: + pytest_assert(tx_frames == rx_frames, + "{} should not have any dropped packet".format(metric.name)) + + exp_test_flow_rx_pkts = test_flow_rate_percent / 100.0 * speed_gbps \ + * 1e9 * test_flow_dur_sec / 8.0 / test_flow_pkt_size + deviation = (rx_frames - exp_test_flow_rx_pkts) / float(exp_test_flow_rx_pkts) + pytest_assert(abs(deviation) < tolerance, + "{} should receive {} packets (actual {})". + format(test_flow_name, exp_test_flow_rx_pkts, rx_frames)) + + snappi_extra_params.test_tx_frames = test_tx_frames + + +def verify_in_flight_buffer_pkts(duthost, + flow_metrics, + test_flow_name, + test_flow_pkt_size, + snappi_extra_params): + """ + Verify in-flight TX bytes of test flows should be held by switch buffer unless PFC delay is applied + for when test traffic is expected to be paused + + Args: + duthost (obj): DUT host object + snappi_extra_params (SnappiTestParams obj): additional parameters for Snappi traffic + Returns: + + """ + tx_frames_total = sum(metric.frames_tx for metric in flow_metrics if test_flow_name in metric.name) + tx_bytes_total = tx_frames_total * test_flow_pkt_size + dut_buffer_size = get_lossless_buffer_size(host_ans=duthost) + headroom_test_params = snappi_extra_params.headroom_test_params + dut_port_config = snappi_extra_params.base_flow_config["dut_port_config"] + pytest_assert(dut_port_config is not None, "Flow port config is not provided") + + if headroom_test_params is None: + exceeds_headroom = False + elif headroom_test_params[1]: + exceeds_headroom = False + else: + exceeds_headroom = True + + if exceeds_headroom: + pytest_assert(tx_bytes_total > dut_buffer_size, + "Total TX bytes {} should exceed DUT buffer size {}". + format(tx_bytes_total, dut_buffer_size)) + + for peer_port, prios in dut_port_config[0].items(): + for prio in prios: + dropped_packets = get_pg_dropped_packets(duthost, peer_port, prio) + pytest_assert(dropped_packets > 0, + "Total TX dropped packets {} should be more than 0". + format(dropped_packets)) + else: + pytest_assert(tx_bytes_total < dut_buffer_size, + "Total TX bytes {} should be smaller than DUT buffer size {}". + format(tx_bytes_total, dut_buffer_size)) + + for peer_port, prios in dut_port_config[0].items(): + for prio in prios: + dropped_packets = get_pg_dropped_packets(duthost, peer_port, prio) + pytest_assert(dropped_packets == 0, + "Total TX dropped packets {} should be 0". + format(dropped_packets)) + + +def verify_pause_frame_count(duthost, + snappi_extra_params): + """ + Verify correct frame count for pause frames when the traffic is expected to be paused + + Args: + duthost (obj): DUT host object + snappi_extra_params (SnappiTestParams obj): additional parameters for Snappi traffic + Returns: + + """ + dut_port_config = snappi_extra_params.base_flow_config["dut_port_config"] + pytest_assert(dut_port_config is not None, 'Flow port config is not provided') + + for peer_port, prios in dut_port_config[1].items(): + for prio in range(len(prios)): + pfc_pause_rx_frames = get_pfc_frame_count(duthost, peer_port, prios[prio]) + pytest_assert(pfc_pause_rx_frames > 0, + "PFC pause frames with zero source MAC are not counted in the PFC counters") + + +def verify_unset_cev_pause_frame_count(duthost, + snappi_extra_params): + """ + Verify zero pause frames are counted when the PFC class enable vector is not set + + Args: + duthost (obj): DUT host object + snappi_extra_params (SnappiTestParams obj): additional parameters for Snappi traffic + Returns: + + """ + dut_port_config = snappi_extra_params.base_flow_config["dut_port_config"] + pytest_assert(dut_port_config is not None, 'Flow port config is not provided') + set_class_enable_vec = snappi_extra_params.set_pfc_class_enable_vec + + if not set_class_enable_vec: + for peer_port, prios in dut_port_config[1].items(): + for prio in range(len(prios)): + pfc_pause_rx_frames = get_pfc_frame_count(duthost, peer_port, prios[prio]) + pytest_assert(pfc_pause_rx_frames == 0, + "PFC pause frames with no bit set in the class enable vector should be dropped") + + +def verify_egress_queue_frame_count(duthost, + snappi_extra_params): + """ + Verify correct frame count for regular traffic from DUT egress queue + + Args: + duthost (obj): DUT host object + snappi_extra_params (SnappiTestParams obj): additional parameters for Snappi traffic + Returns: + + """ + dut_port_config = snappi_extra_params.base_flow_config["dut_port_config"] + pytest_assert(dut_port_config is not None, 'Flow port config is not provided') + set_class_enable_vec = snappi_extra_params.set_pfc_class_enable_vec + test_tx_frames = snappi_extra_params.test_tx_frames + + if not set_class_enable_vec: + for peer_port, prios in dut_port_config[1].items(): + for prio in range(len(prios)): + total_egress_packets, _ = get_egress_queue_count(duthost, peer_port, prios[prio]) + pytest_assert(total_egress_packets == test_tx_frames[prio], + "Queue counters should increment for invalid PFC pause frames") diff --git a/tests/common/system_utils/docker.py b/tests/common/system_utils/docker.py index 7cc7041bc68..c2cdb323b89 100644 --- a/tests/common/system_utils/docker.py +++ b/tests/common/system_utils/docker.py @@ -9,6 +9,7 @@ from tests.common.broadcom_data import is_broadcom_device from tests.common.mellanox_data import is_mellanox_device from tests.common.errors import RunAnsibleModuleFail +from tests.common.cisco_data import is_cisco_device logger = logging.getLogger(__name__) @@ -26,7 +27,8 @@ class DockerRegistryInfo(_DockerRegistryInfo): username (str): The username used to access the registry. password (str): The password used to access the registry. """ - pass + def __repr__(self): + return "DockerRegistryInfo(host='{}', username='{}', password='******')".format(self.host, self.username) def load_docker_registry_info(duthost, creds): @@ -182,7 +184,6 @@ def ready_for_swap(): if any([ duthost.is_container_running("syncd"), duthost.is_container_running("swss"), - not duthost.is_bgp_state_idle() ]): return False @@ -205,6 +206,8 @@ def _get_vendor_id(duthost): vendor_id = "brcm" elif is_mellanox_device(duthost): vendor_id = "mlnx" + elif is_cisco_device(duthost): + vendor_id = "cisco" else: error_message = '"{}" does not currently support swap_syncd'.format(duthost.facts["asic_type"]) logger.error(error_message) diff --git a/tests/common/templates/backend_acl_update_config.j2 b/tests/common/templates/backend_acl_update_config.j2 new file mode 100644 index 00000000000..b641f38ea15 --- /dev/null +++ b/tests/common/templates/backend_acl_update_config.j2 @@ -0,0 +1,69 @@ +{%- set vlan2ports = {} %} +{%- for vlan in VLAN %} + {% set portlist = [] %} + {%- for vlan_name, port in VLAN_MEMBER %} + {%- if vlan_name == vlan %} + {%- if portlist.append(port) %}{%- endif %} + {%- endif %} + {%- endfor %} + {%- set _ = vlan2ports.update({vlan: portlist| sort | join(',')}) %} +{%- endfor %} + + +{ + "acl": { + "acl-sets": { + "acl-set": { + "DATAACL": { + "acl-entries": { + "acl-entry": { + {% for vlan, vlan_entries in VLAN.items() %} + "{{ loop.index }}": { + "config": { + "sequence-id": {{ loop.index }} + }, + "actions": { + "config": { + "forwarding-action": "ACCEPT" + } + }, + "l2": { + "config": { + "vlan_id": "{{ vlan_entries['vlanid'] }}" + } + }, + "input_interface": { + "interface_ref": { + "config": { + "interface": "{{ vlan2ports[vlan] }}" + } + } + } + + }, + {% endfor -%} + "999": { + "config": { + "sequence-id": 999 + }, + "actions": { + "config": { + "forwarding-action": "ACCEPT" + } + }, + "input_interface": { + "interface_ref": { + "config": { + "interface": "{{ intf_list }}" + } + } + } + } + + } + } + } + } + } + } +} diff --git a/tests/common/templates/default_acl_rules.json b/tests/common/templates/default_acl_rules.json new file mode 100644 index 00000000000..e3de18c870c --- /dev/null +++ b/tests/common/templates/default_acl_rules.json @@ -0,0 +1,38 @@ +{ + "acl": { + "acl-sets": { + "acl-set": { + "dataacl": { + "acl-entries": { + "acl-entry": { + "1": { + "actions": { + "config": { + "forwarding-action": "ACCEPT" + } + }, + "config": { + "sequence-id": 1 + }, + "l2": { + "config": { + "ethertype": "2048", + "vlan_id": "1000" + } + }, + "input_interface": { + "interface_ref": + { + "config": { + "interface": "Ethernet12,Ethernet16,Ethernet20,Ethernet24,Ethernet28,Ethernet32,Ethernet36,Ethernet4,Ethernet40,Ethernet44,Ethernet48,Ethernet52,Ethernet56,Ethernet60,Ethernet64,Ethernet68,Ethernet72,Ethernet76,Ethernet8" + } + } + } + } + } + } + } + } + } + } +} \ No newline at end of file diff --git a/tests/common/templates/pfc_storm_sonic.j2 b/tests/common/templates/pfc_storm_sonic.j2 index cf523ba4e43..a626c77fe6b 100644 --- a/tests/common/templates/pfc_storm_sonic.j2 +++ b/tests/common/templates/pfc_storm_sonic.j2 @@ -1,6 +1,6 @@ cd {{pfc_gen_dir}} {% if (pfc_asym is defined) and (pfc_asym == True) %} -nohup sh -c "{% if pfc_storm_defer_time is defined %}sleep {{pfc_storm_defer_time}} &&{% endif %} python {{pfc_gen_file}} -p {{pfc_queue_index}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface}}" > /dev/null 2>&1 & +nohup sh -c "{% if pfc_storm_defer_time is defined %}sleep {{pfc_storm_defer_time}} &&{% endif %} sudo python {{pfc_gen_file}} -p {{pfc_queue_index}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface}}" > /dev/null 2>&1 & {% else %} -nohup sh -c "{% if pfc_storm_defer_time is defined %}sleep {{pfc_storm_defer_time}} &&{% endif %} python {{pfc_gen_file}} -p {{(1).__lshift__(pfc_queue_index)}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface}} -r {{ansible_eth0_ipv4_addr}}" > /dev/null 2>&1 & +nohup sh -c "{% if pfc_storm_defer_time is defined %}sleep {{pfc_storm_defer_time}} &&{% endif %} sudo python {{pfc_gen_file}} -p {{(1).__lshift__(pfc_queue_index)}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface}} -r {{ansible_eth0_ipv4_addr}}" > /dev/null 2>&1 & {% endif %} diff --git a/tests/common/templates/pfc_storm_stop_sonic.j2 b/tests/common/templates/pfc_storm_stop_sonic.j2 index 4208e15dc88..43c4dc5f99d 100644 --- a/tests/common/templates/pfc_storm_stop_sonic.j2 +++ b/tests/common/templates/pfc_storm_stop_sonic.j2 @@ -1,6 +1,6 @@ cd {{pfc_gen_dir}} {% if (pfc_asym is defined) and (pfc_asym == True) %} -nohup sh -c "{% if pfc_storm_stop_defer_time is defined %}sleep {{pfc_storm_stop_defer_time}} &&{% endif %} pkill -f 'python {{pfc_gen_file}} -p {{pfc_queue_index}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface}}'" > /dev/null 2>&1 & +nohup sh -c "{% if pfc_storm_stop_defer_time is defined %}sleep {{pfc_storm_stop_defer_time}} &&{% endif %} sudo pkill -f 'python {{pfc_gen_file}} -p {{pfc_queue_index}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface}}'" > /dev/null 2>&1 & {% else %} -nohup sh -c "{% if pfc_storm_stop_defer_time is defined %}sleep {{pfc_storm_stop_defer_time}} &&{% endif %} pkill -f 'python {{pfc_gen_file}} -p {{(1).__lshift__(pfc_queue_index)}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface}} -r {{ansible_eth0_ipv4_addr}}'" > /dev/null 2>&1 & +nohup sh -c "{% if pfc_storm_stop_defer_time is defined %}sleep {{pfc_storm_stop_defer_time}} &&{% endif %} sudo pkill -f 'python {{pfc_gen_file}} -p {{(1).__lshift__(pfc_queue_index)}} -t 65535 -n {{pfc_frames_number}} -i {{pfc_fanout_interface}} -r {{ansible_eth0_ipv4_addr}}'" > /dev/null 2>&1 & {% endif %} diff --git a/tests/common/testbed.py b/tests/common/testbed.py index 71a8c7b0a4d..f2bd89db900 100644 --- a/tests/common/testbed.py +++ b/tests/common/testbed.py @@ -260,7 +260,7 @@ def _generate_sai_ptf_topo(self, tb_dict): def get_testbed_type(self, topo_name): - pattern = re.compile(r'^(t0|t1|ptf|fullmesh|dualtor|t2|tgen|mgmttor)') + pattern = re.compile(r'^(t0|t1|ptf|fullmesh|dualtor|t2|tgen|mgmttor|m0)') match = pattern.match(topo_name) if match == None: logger.warning("Unsupported testbed type - {}".format(topo_name)) diff --git a/tests/common/utilities.py b/tests/common/utilities.py index c7e0e24754c..e1be8f929ce 100644 --- a/tests/common/utilities.py +++ b/tests/common/utilities.py @@ -11,6 +11,10 @@ import threading import time import traceback +import json +import copy +import tempfile +import os from io import BytesIO import pytest @@ -21,6 +25,7 @@ from tests.common import constants from tests.common.cache import cached from tests.common.cache import FactsCache +from tests.common.helpers.constants import UPSTREAM_NEIGHBOR_MAP, DOWNSTREAM_NEIGHBOR_MAP logger = logging.getLogger(__name__) cache = FactsCache() @@ -304,8 +309,10 @@ def get_host_visible_vars(inv_files, hostname): The variable could be defined in host_vars or in group_vars that the host belongs to. Args: - inv_files (list or string): List of inventory file pathes, or string of a single inventory file path. In tests, + inv_files (list or string): List of inventory file paths, or string of a single inventory file path. In tests, it can be get from request.config.getoption("ansible_inventory"). + MUST use the inventory file under the ansible folder, otherwise host_vars and group_vars would not be + visible. hostname (string): Hostname Returns: @@ -548,3 +555,125 @@ def str2bool(str): :return: False if value is 0 or false, else True """ return str.lower() not in ["0", "false", "no"] + + +def safe_filename(filename, replacement_char='_'): + """Replace illegal characters in the original filename with "_" or other specified characters. + + Reference: https://www.mtu.edu/umc/services/websites/writing/characters-avoid/ + + Args: + filename (str): The original filename + replacement_char (str, optional): Replacement for illegal characters. Defaults to '_'. + + Returns: + str: New filename with illegal characters replaced. + """ + illegal_chars_pattern = re.compile("[#%&{}\\<>\*\?/ \$!'\":@\+`|=]") + return re.sub(illegal_chars_pattern, replacement_char, filename) + + +def get_neighbor_port_list(duthost, neighbor_name): + """ + @summary: Get neighbor port in dut by neighbor_name + @param duthost: The DUT + @param neighbor_name: name or keyword contained in name of neighbor + @return a list of port name + Sample output: ["Ethernet45", "Ethernet46"] + """ + config_facts = duthost.get_running_config_facts() + neighbor_port_list = [] + for port_name, value in list(config_facts["DEVICE_NEIGHBOR"].items()): + if neighbor_name.upper() in value["name"].upper(): + neighbor_port_list.append(port_name) + + return neighbor_port_list + + +def get_neighbor_ptf_port_list(duthost, neighbor_name, tbinfo): + """ + @summary: Get neighbor port in ptf by neighbor_name + @param duthost: The DUT + @param neighbor_name: name or keyword contained in name of neighbor + @param tbinfo: testbed information + @return a list of port index + Sample output: [45, 46] + """ + mg_facts = duthost.get_extended_minigraph_facts(tbinfo) + neighbor_port_list = get_neighbor_port_list(duthost, neighbor_name) + ptf_port_list = [] + for neighbor_port in neighbor_port_list: + ptf_port_list.append(mg_facts["minigraph_ptf_indices"][neighbor_port]) + + return ptf_port_list + + +def get_upstream_neigh_type(topo_type, is_upper=True): + """ + @summary: Get neighbor type by topo type + @param topo_type: topo type + @param is_upper: if is_upper is True, return uppercase str, else return lowercase str + @return a str + Sample output: "mx" + """ + if topo_type in UPSTREAM_NEIGHBOR_MAP: + return UPSTREAM_NEIGHBOR_MAP[topo_type].upper() if is_upper else UPSTREAM_NEIGHBOR_MAP[topo_type] + + return None + +def get_downstream_neigh_type(topo_type, is_upper=True): + """ + @summary: Get neighbor type by topo type + @param topo_type: topo type + @param is_upper: if is_upper is True, return uppercase str, else return lowercase str + @return a str + Sample output: "mx" + """ + if topo_type in DOWNSTREAM_NEIGHBOR_MAP: + return DOWNSTREAM_NEIGHBOR_MAP[topo_type].upper() if is_upper else DOWNSTREAM_NEIGHBOR_MAP[topo_type] + + return None + +def delete_running_config(config_entry, duthost, is_json=True): + if is_json: + duthost.copy(content=json.dumps(config_entry, indent=4), dest="/tmp/del_config_entry.json") + else: + duthost.copy(src=config_entry, dest="/tmp/del_config_entry.json") + duthost.shell("configlet -d -j {}".format("/tmp/del_config_entry.json")) + duthost.shell("rm -f {}".format("/tmp/del_config_entry.json")) + +def get_data_acl(duthost): + acl_facts = duthost.acl_facts()["ansible_facts"]["ansible_acl_facts"] + pre_acl_rules = acl_facts.get("DATAACL", {}).get("rules", None) + return pre_acl_rules + + +def recover_acl_rule(duthost, data_acl): + base_dir = os.path.dirname(os.path.realpath(__file__)) + template_dir = os.path.join(base_dir, "templates") + acl_rules_template = "default_acl_rules.json" + dut_tmp_dir = "/tmp" + dut_conf_file_path = os.path.join(dut_tmp_dir, acl_rules_template) + + for key, value in data_acl.items(): + if key != "DEFAULT_RULE": + seq_id = key.split('_')[1] + acl_config = json.loads(open(os.path.join(template_dir, acl_rules_template)).read()) + acl_entry_template = \ + acl_config["acl"]["acl-sets"]["acl-set"]["dataacl"]["acl-entries"]["acl-entry"]["1"] + acl_entry_config = acl_config["acl"]["acl-sets"]["acl-set"]["dataacl"]["acl-entries"]["acl-entry"] + + acl_entry_config[seq_id] = copy.deepcopy(acl_entry_template) + acl_entry_config[seq_id]["config"]["sequence-id"] = seq_id + acl_entry_config[seq_id]["l2"]["config"]["ethertype"] = value["ETHER_TYPE"] + acl_entry_config[seq_id]["l2"]["config"]["vlan_id"] = value["VLAN_ID"] + acl_entry_config[seq_id]["input_interface"]["interface_ref"]["config"]["interface"] = value["IN_PORTS"] + + with tempfile.NamedTemporaryFile(suffix=".json", prefix="acl_config", mode="w") as fp: + json.dump(acl_config, fp) + fp.flush() + logger.info("Generating config for ACL rule, ACL table - DATAACL") + duthost.template(src=fp.name, dest=dut_conf_file_path, force=True) + + logger.info("Applying {}".format(dut_conf_file_path)) + duthost.command("acl-loader update full {}".format(dut_conf_file_path)) diff --git a/tests/snappi/pfc/__init__.py b/tests/configlet/__init__.py similarity index 100% rename from tests/snappi/pfc/__init__.py rename to tests/configlet/__init__.py diff --git a/tests/configlet/test_add_rack.py b/tests/configlet/test_add_rack.py index d908131b43d..dee128d6dd8 100644 --- a/tests/configlet/test_add_rack.py +++ b/tests/configlet/test_add_rack.py @@ -3,10 +3,10 @@ import pytest import sys -sys.path.append("./configlet/util") +from tests.configlet.util.base_test import restore_orig_minigraph, backup_minigraph, do_test_add_rack +from tests.configlet.util.helpers import log_info -from base_test import do_test_add_rack, backup_minigraph, restore_orig_minigraph -from helpers import log_info +sys.path.append("./configlet/util") pytestmark = [ pytest.mark.topology("t1") @@ -26,7 +26,7 @@ def configure_dut(duthosts, rand_one_dut_hostname): log_info("configure_dut fixture DONE for {}".format(rand_one_dut_hostname)) - +@pytest.mark.disable_loganalyzer def test_add_rack(configure_dut, tbinfo, duthosts, rand_one_dut_hostname): global data_dir, orig_db_dir, clet_db_dir, files_dir diff --git a/tests/snappi/pfcwd/__init__.py b/tests/configlet/util/__init__.py similarity index 100% rename from tests/snappi/pfcwd/__init__.py rename to tests/configlet/util/__init__.py diff --git a/tests/configlet/util/base_test.py b/tests/configlet/util/base_test.py index 1b08b58ead1..63bb61e0f65 100644 --- a/tests/configlet/util/base_test.py +++ b/tests/configlet/util/base_test.py @@ -3,11 +3,10 @@ import json import os -from helpers import * -from common import * -import strip -import configlet -import generic_patch +from tests.configlet.util import strip, configlet, generic_patch +from tests.configlet.util.common import * +from tests.configlet.util.helpers import * + if os.path.exists("/etc/sonic/sonic-environment"): from mock_for_switch import config_reload, wait_until diff --git a/tests/configlet/util/common.py b/tests/configlet/util/common.py index aa0322780bf..68298947da9 100755 --- a/tests/configlet/util/common.py +++ b/tests/configlet/util/common.py @@ -1,12 +1,13 @@ #! /usr/bin/env python +import inspect import json import os import re import sys import time -from helpers import * +from tests.configlet.util.helpers import * CONFIG_DB_FILE = "etc/sonic/config_db.json" MINIGRAPH_FILE = "etc/sonic/minigraph.xml" @@ -158,7 +159,8 @@ def init_global_data(): def report_error(m): - log_error("failure: {}".format(m)) + log_error("failure: {}:{}: {}".format(inspect.stack()[1][1], + inspect.stack()[1][2], m)) assert False, m diff --git a/tests/configlet/util/configlet.py b/tests/configlet/util/configlet.py index c79011bc022..6f74afd71e2 100755 --- a/tests/configlet/util/configlet.py +++ b/tests/configlet/util/configlet.py @@ -3,8 +3,8 @@ import json from tempfile import mkstemp -from helpers import * -from common import * +from tests.configlet.util.helpers import * +from tests.configlet.util.common import * import strip orig_config = None @@ -194,7 +194,8 @@ def get_port_related_data(is_mlnx, is_storage_backend): cable[local_port] = orig_config["CABLE_LENGTH|AZURE"]['value'][local_port] # "BUFFER_PG" - buffer_pg["{}|0".format(local_port)] = orig_config["BUFFER_PG|Ethernet64|0"]['value'] + buffer_pg["{}|0".format(local_port)] = orig_config["BUFFER_PG|{}|0".format( + local_port)]['value'] # "QUEUE" for i in range(7): diff --git a/tests/configlet/util/generic_patch.py b/tests/configlet/util/generic_patch.py index bb15d0dca64..1e492bb29a1 100644 --- a/tests/configlet/util/generic_patch.py +++ b/tests/configlet/util/generic_patch.py @@ -6,8 +6,8 @@ import os import re -from helpers import * -from common import * +from tests.configlet.util.helpers import * +from tests.configlet.util.common import * if os.path.exists("/etc/sonic/sonic-environment"): from mock_for_switch import config_reload, wait_until diff --git a/tests/configlet/util/mock_for_switch.py b/tests/configlet/util/mock_for_switch.py index 77e0a4cd259..6f9ee720f68 100644 --- a/tests/configlet/util/mock_for_switch.py +++ b/tests/configlet/util/mock_for_switch.py @@ -10,7 +10,7 @@ import sys import time -from helpers import * +from tests.configlet.util.helpers import * class DutHost: diff --git a/tests/configlet/util/run_test_in_switch.py b/tests/configlet/util/run_test_in_switch.py index ec3094c365e..0cc20a6a1ee 100644 --- a/tests/configlet/util/run_test_in_switch.py +++ b/tests/configlet/util/run_test_in_switch.py @@ -5,10 +5,9 @@ import filecmp import os -from mock_for_switch import get_duthost - -from base_test import do_test_add_rack, backup_minigraph, restore_orig_minigraph -from helpers import * +from tests.configlet.util.mock_for_switch import get_duthost +from tests.configlet.util.base_test import backup_minigraph, restore_orig_minigraph, do_test_add_rack +from tests.configlet.util.helpers import * # To run test in switch: # Copy all files in this dir (tests/configlet/util) into switch diff --git a/tests/configlet/util/strip.py b/tests/configlet/util/strip.py index 65b7e3dcfde..6e4481b1454 100755 --- a/tests/configlet/util/strip.py +++ b/tests/configlet/util/strip.py @@ -4,8 +4,8 @@ import sys import xml.etree.ElementTree as ET -from helpers import * -from common import * +from tests.configlet.util.common import * +from tests.configlet.util.helpers import * from tempfile import mkstemp diff --git a/tests/conftest.py b/tests/conftest.py index 4ab82b12988..dcb93f44a1b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,7 +23,10 @@ from tests.common.devices.duthosts import DutHosts from tests.common.devices.vmhost import VMHost from tests.common.devices.base import NeighborDevice +from tests.common.dualtor.dual_tor_utils import lower_tor_host +from tests.common.helpers.parallel import parallel_run from tests.common.fixtures.duthost_utils import backup_and_restore_config_db_session +from tests.common.fixtures.ptfhost_utils import ptf_portmap_file # lgtm[py/unused-import] from tests.common.helpers.constants import ( ASIC_PARAM_TYPE_ALL, ASIC_PARAM_TYPE_FRONTEND, DEFAULT_ASIC_ID, @@ -40,11 +43,13 @@ from tests.common.cache import FactsCache from tests.common.connections.console_host import ConsoleHost +from tests.common.utilities import safe_filename from tests.common.utilities import str2bool from tests.platform_tests.args.advanced_reboot_args import add_advanced_reboot_args from tests.platform_tests.args.cont_warm_reboot_args import add_cont_warm_reboot_args from tests.platform_tests.args.normal_reboot_args import add_normal_reboot_args - +from ptf import testutils # lgtm[py/unused-import] +from tests.common.config_reload import config_reload logger = logging.getLogger(__name__) cache = FactsCache() @@ -128,7 +133,17 @@ def pytest_addoption(parser): parser.addoption("--deep_clean", action="store_true", default=False, help="Deep clean DUT before tests (remove old logs, cores, dumps)") parser.addoption("--py_saithrift_url", action="store", default=None, type=str, - help="Specify the url of the saithrift package to be installed on the ptf (should be http:///path/python-saithrift_0.9.4_amd64.deb") + help="Specify the url of the saithrift package to be installed on the ptf " + "(should be http:///path/python-saithrift_0.9.4_amd64.deb") + + ######################### + # post-test options # + ######################### + parser.addoption("--posttest_show_tech_since", action="store", default="yesterday", + help="collect show techsupport since . should be a string which can " + "be parsed by bash command 'date --d '. Default value is yesterday. " + "To collect all time spans, please use '@0' as the value.") + ############################ # keysight ixanvl options # ############################ @@ -141,6 +156,12 @@ def pytest_addoption(parser): add_cont_warm_reboot_args(parser) add_normal_reboot_args(parser) + ############################ + # QoS options # + ############################ + parser.addoption("--public_docker_registry", action="store_true", default=False, + help="To use public docker registry for syncd swap, by default is disabled (False)") + ############################ # loop_times options # ############################ @@ -163,6 +184,8 @@ def enhance_inventory(request): This fixture is automatically applied, you don't need to declare it in your test script. """ inv_opt = request.config.getoption("ansible_inventory") + if isinstance(inv_opt, list): + return inv_files = [inv_file.strip() for inv_file in inv_opt.split(",")] try: setattr(request.config.option, "ansible_inventory", inv_files) @@ -313,6 +336,20 @@ def rand_unselected_dut(request, duthosts, rand_one_dut_hostname): return duthosts[dut_hostnames[1 - idx]] +@pytest.fixture(scope="module") +def selected_rand_one_per_hwsku_hostname(request): + """ + Return the selected hostnames for the given module. + This fixture will return the list of selected dut hostnames + when another fixture like enum_rand_one_per_hwsku_hostname + or enum_rand_one_per_hwsku_frontend_hostname is used. + """ + if request.module in _hosts_per_hwsku_per_module: + return _hosts_per_hwsku_per_module[request.module] + else: + return [] + + @pytest.fixture(scope="module") def rand_one_dut_portname_oper_up(request): oper_up_ports = generate_port_lists(request, "oper_up_ports") @@ -344,7 +381,7 @@ def localhost(ansible_adhoc): @pytest.fixture(scope="session") -def ptfhost(ansible_adhoc, tbinfo, duthost): +def ptfhost(enhance_inventory, ansible_adhoc, tbinfo, duthost): if "ptf_image_name" in tbinfo and "docker-keysight-api-server" in tbinfo["ptf_image_name"]: return None if "ptf" in tbinfo: @@ -357,7 +394,7 @@ def ptfhost(ansible_adhoc, tbinfo, duthost): @pytest.fixture(scope="module") -def k8smasters(ansible_adhoc, request): +def k8smasters(enhance_inventory, ansible_adhoc, request): """ Shortcut fixture for getting Kubernetes master hosts """ @@ -389,7 +426,7 @@ def k8scluster(k8smasters): @pytest.fixture(scope="module") -def nbrhosts(ansible_adhoc, tbinfo, creds, request): +def nbrhosts(enhance_inventory, ansible_adhoc, tbinfo, creds, request): """ Shortcut fixture for getting VM host """ @@ -432,7 +469,7 @@ def nbrhosts(ansible_adhoc, tbinfo, creds, request): @pytest.fixture(scope="module") -def fanouthosts(ansible_adhoc, conn_graph_facts, creds, duthosts): +def fanouthosts(enhance_inventory, ansible_adhoc, conn_graph_facts, creds, duthosts): """ Shortcut fixture for getting Fanout hosts """ @@ -440,69 +477,85 @@ def fanouthosts(ansible_adhoc, conn_graph_facts, creds, duthosts): dev_conn = conn_graph_facts.get('device_conn', {}) fanout_hosts = {} # WA for virtual testbed which has no fanout - try: - for dut_host, value in dev_conn.items(): - duthost = duthosts[dut_host] - mg_facts = duthost.minigraph_facts(host=duthost.hostname)['ansible_facts'] - for dut_port in value.keys(): - fanout_rec = value[dut_port] - fanout_host = str(fanout_rec['peerdevice']) - fanout_port = str(fanout_rec['peerport']) - - if fanout_host in fanout_hosts.keys(): - fanout = fanout_hosts[fanout_host] + for dut_host, value in dev_conn.items(): + duthost = duthosts[dut_host] + if duthost.facts['platform'] == 'x86_64-kvm_x86_64-r0': + continue # skip for kvm platform which has no fanout + mg_facts = duthost.minigraph_facts(host=duthost.hostname)['ansible_facts'] + for dut_port in value.keys(): + fanout_rec = value[dut_port] + fanout_host = str(fanout_rec['peerdevice']) + fanout_port = str(fanout_rec['peerport']) + + if fanout_host in fanout_hosts.keys(): + fanout = fanout_hosts[fanout_host] + else: + host_vars = ansible_adhoc().options[ + 'inventory_manager'].get_host(fanout_host).vars + os_type = host_vars.get('os', 'eos') + if 'fanout_tacacs_user' in creds: + fanout_user = creds['fanout_tacacs_user'] + fanout_password = creds['fanout_tacacs_password'] + elif 'fanout_tacacs_{}_user'.format(os_type) in creds: + fanout_user = creds['fanout_tacacs_{}_user'.format(os_type)] + fanout_password = creds['fanout_tacacs_{}_password'.format(os_type)] + elif os_type == 'sonic': + fanout_user = creds.get('fanout_sonic_user', None) + fanout_password = creds.get('fanout_sonic_password', None) + elif os_type == 'eos': + fanout_user = creds.get('fanout_network_user', None) + fanout_password = creds.get('fanout_network_password', None) else: - host_vars = ansible_adhoc().options[ - 'inventory_manager'].get_host(fanout_host).vars - os_type = host_vars.get('os', 'eos') + # when os is mellanox, not supported + pytest.fail("os other than sonic and eos not supported") + + eos_shell_user = None + eos_shell_password = None + if os_type == "eos": admin_user = creds['fanout_admin_user'] admin_password = creds['fanout_admin_password'] - # `fanout_network_user` and `fanout_network_password` are for - # accessing the non-shell CLI of fanout. - # Ansible will use this set of credentail for establishing - # `network_cli` connection with device when applicable. - network_user = creds.get('fanout_network_user', admin_user) - network_password = creds.get('fanout_network_password', - admin_password) - shell_user = creds.get('fanout_shell_user', admin_user) - shell_password = creds.get('fanout_shell_pass', admin_password) - if os_type == 'sonic': - shell_user = creds['fanout_sonic_user'] - shell_password = creds['fanout_sonic_password'] - - fanout = FanoutHost(ansible_adhoc, - os_type, - fanout_host, - 'FanoutLeaf', - network_user, - network_password, - shell_user=shell_user, - shell_passwd=shell_password) - fanout.dut_hostnames = [dut_host] - fanout_hosts[fanout_host] = fanout - fanout.add_port_map(encode_dut_port_name(dut_host, dut_port), fanout_port) - - # Add port name to fanout port mapping port if dut_port is alias. - if dut_port in mg_facts['minigraph_port_alias_to_name_map']: - mapped_port = mg_facts['minigraph_port_alias_to_name_map'][dut_port] - # only add the mapped port which isn't in device_conn ports to avoid overwriting port map wrongly, - # it happens when an interface has the same name with another alias, for example: - # Interface Alias - # -------------------- - # Ethernet108 Ethernet32 - # Ethernet32 Ethernet13/1 - if mapped_port not in value.keys(): - fanout.add_port_map(encode_dut_port_name(dut_host, mapped_port), fanout_port) - - if dut_host not in fanout.dut_hostnames: - fanout.dut_hostnames.append(dut_host) - except: - pass + eos_shell_user = creds.get('fanout_shell_user', admin_user) + eos_shell_password = creds.get('fanout_shell_password', admin_password) + + fanout = FanoutHost(ansible_adhoc, + os_type, + fanout_host, + 'FanoutLeaf', + fanout_user, + fanout_password, + eos_shell_user=eos_shell_user, + eos_shell_passwd=eos_shell_password) + fanout.dut_hostnames = [dut_host] + fanout_hosts[fanout_host] = fanout + + if fanout.os == 'sonic': + ifs_status = fanout.host.get_interfaces_status() + for key, interface_info in ifs_status.items(): + fanout.fanout_port_alias_to_name[interface_info['alias']] = interface_info['interface'] + logging.info("fanout {} fanout_port_alias_to_name {}".format(fanout_host, fanout.fanout_port_alias_to_name)) + + fanout.add_port_map(encode_dut_port_name(dut_host, dut_port), fanout_port) + + # Add port name to fanout port mapping port if dut_port is alias. + if dut_port in mg_facts['minigraph_port_alias_to_name_map']: + mapped_port = mg_facts['minigraph_port_alias_to_name_map'][dut_port] + # only add the mapped port which isn't in device_conn ports to avoid overwriting port map wrongly, + # it happens when an interface has the same name with another alias, for example: + # Interface Alias + # -------------------- + # Ethernet108 Ethernet32 + # Ethernet32 Ethernet13/1 + if mapped_port not in value.keys(): + fanout.add_port_map(encode_dut_port_name(dut_host, mapped_port), fanout_port) + + if dut_host not in fanout.dut_hostnames: + fanout.dut_hostnames.append(dut_host) + return fanout_hosts @pytest.fixture(scope="session") -def vmhost(ansible_adhoc, request, tbinfo): +def vmhost(enhance_inventory, ansible_adhoc, request, tbinfo): server = tbinfo["server"] inv_files = get_inventory_files(request) vmhost = get_test_server_host(inv_files, server) @@ -604,6 +657,11 @@ def creds_all_duts(duthosts): @pytest.hookimpl(tryfirst=True, hookwrapper=True) def pytest_runtest_makereport(item, call): + if call.when == 'setup': + item.user_properties.append(('start', str(datetime.fromtimestamp(call.start)))) + elif call.when == 'teardown': + item.user_properties.append(('end', str(datetime.fromtimestamp(call.stop)))) + # Filter out unnecessary logs captured on "stdout" and "stderr" item._report_sections = list(filter(lambda report: report[1] not in ("stdout", "stderr"), item._report_sections)) @@ -616,14 +674,6 @@ def pytest_runtest_makereport(item, call): setattr(item, "rep_" + rep.when, rep) - -def fetch_dbs(duthost, testname): - dbs = [[0, "appdb"], [1, "asicdb"], [2, "counterdb"], [4, "configdb"]] - for db in dbs: - duthost.shell("redis-dump -d {} --pretty -o {}.json".format(db[0], db[1])) - duthost.fetch(src="{}.json".format(db[1]), dest="logs/{}".format(testname)) - - def collect_techsupport_on_dut(request, a_dut): # request.node is an "item" because we use the default # "function" scope @@ -786,7 +836,7 @@ def enable_container_autorestart(duthost, testcase="", feature_list=None): return enable_container_autorestart @pytest.fixture(scope='module') -def swapSyncd(request, duthosts, rand_one_dut_hostname, creds): +def swapSyncd(request, duthosts, rand_one_dut_hostname, creds, tbinfo, lower_tor_host): """ Swap syncd on DUT host @@ -797,7 +847,10 @@ def swapSyncd(request, duthosts, rand_one_dut_hostname, creds): Returns: None """ - duthost = duthosts[rand_one_dut_hostname] + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[rand_one_dut_hostname] swapSyncd = request.config.getoption("--qos_swap_syncd") try: if swapSyncd: @@ -920,6 +973,15 @@ def generate_params_dut_hostname(request): return duts +def get_completeness_level_metadata(request): + completeness_level = request.config.getoption("--completeness_level") + # if completeness_level is not set or an unknown completeness_level is set + # return "thorough" to run all test set + if not completeness_level or completeness_level not in ["debug", "basic", "confident", "thorough"]: + return "thorough" + return completeness_level + + def get_testbed_metadata(request): """ Get the metadata for the testbed name. Return None if tbname is @@ -943,8 +1005,8 @@ def get_testbed_metadata(request): return metadata.get(tbname) -def generate_port_lists(request, port_scope): - empty = [ encode_dut_port_name('unknown', 'unknown') ] +def generate_port_lists(request, port_scope, with_completeness_level=False): + empty = [encode_dut_port_name('unknown', 'unknown')] if 'ports' in port_scope: scope = 'Ethernet' elif 'pcs' in port_scope: @@ -966,14 +1028,39 @@ def generate_port_lists(request, port_scope): if dut_ports is None: return empty - ret = [] + dut_port_map = {} for dut, val in dut_ports.items(): + dut_port_pairs = [] if 'intf_status' not in val: continue for intf, status in val['intf_status'].items(): if scope in intf and (not state or status[state] == 'up'): - ret.append(encode_dut_port_name(dut, intf)) - + dut_port_pairs.append(encode_dut_port_name(dut, intf)) + dut_port_map[dut] = dut_port_pairs + logger.info("Generate dut_port_map: {}".format(dut_port_map)) + + if with_completeness_level: + completeness_level = get_completeness_level_metadata(request) + # if completeness_level in ["debug", "basic", "confident"], + # only select several ports on every DUT to save test time + + def trim_dut_port_lists(dut_port_list, target_len): + if len(dut_port_list) <= target_len: + return dut_port_list + # for diversity, fetch the ports from both the start and the end of the original list + pos_1 = target_len / 2 + pos_2 = target_len - pos_1 + return dut_ports[:pos_1] + dut_ports[-pos_2:] + + if completeness_level in ["debug"]: + for dut, dut_ports in dut_port_map.items(): + dut_port_map[dut] = trim_dut_port_lists(dut_ports, 1) + elif completeness_level in ["basic", "confident"]: + for dut, dut_ports in dut_port_map.items(): + dut_port_map[dut] = trim_dut_port_lists(dut_ports, 4) + + ret = reduce(lambda dut_ports_1, dut_ports_2: dut_ports_1 + dut_ports_2, dut_port_map.values()) + logger.info("Generate port_list: {}".format(ret)) return ret if ret else empty @@ -996,6 +1083,9 @@ def generate_dut_feature_container_list(request): if "features" not in val: continue for feature in val["features"].keys(): + if "disabled" in val["features"][feature]: + continue + dut_info = meta[dut] if "asic_services" in dut_info and dut_info["asic_services"].get(feature) is not None: @@ -1007,6 +1097,42 @@ def generate_dut_feature_container_list(request): return container_list +def generate_dut_feature_list(request, duts_selected, asics_selected): + """ + Generate a list of features. + The list of features willl be obtained from + metadata file. + This list will be features that can be stopped + or restarted. + """ + meta = get_testbed_metadata(request) + tuple_list = [] + + if meta is None: + return tuple_list + + skip_feature_list = ['database', 'database-chassis', 'gbsyncd'] + + for a_dut_index, a_dut in enumerate(duts_selected): + if len(asics_selected): + for a_asic in asics_selected[a_dut_index]: + # Create tuple of dut and asic index + if "features" in meta[a_dut]: + for a_feature in meta[a_dut]["features"].keys(): + if a_feature not in skip_feature_list: + tuple_list.append((a_dut, a_asic, a_feature)) + else: + tuple_list.append((a_dut, a_asic, None)) + else: + if "features" in meta[a_dut]: + for a_feature in meta[a_dut]["features"].keys(): + if a_feature not in skip_feature_list: + tuple_list.append((a_dut, None, a_feature)) + else: + tuple_list.append((a_dut, None, None)) + return tuple_list + + def generate_dut_backend_asics(request, duts_selected): dut_asic_list = [] @@ -1018,7 +1144,7 @@ def generate_dut_backend_asics(request, duts_selected): for dut in duts_selected: mdata = metadata.get(dut) if mdata is None: - continue + dut_asic_list.append([None]) dut_asic_list.append(mdata.get("backend_asics", [None])) return dut_asic_list @@ -1052,6 +1178,36 @@ def generate_priority_lists(request, prio_scope): return ret if ret else empty + +def pfc_pause_delay_test_params(request): + empty = [] + + tbname = request.config.getoption("--testbed") + if not tbname: + return empty + + folder = 'pfc_headroom_test_params' + filepath = os.path.join(folder, tbname + '.json') + + try: + with open(filepath, 'r') as yf: + info = json.load(yf) + except IOError: + return empty + + if tbname not in info: + return empty + + dut_pfc_delay_params = info[tbname] + ret = [] + + for dut, pfc_pause_delay_params in dut_pfc_delay_params.items(): + for pfc_delay, headroom_result in pfc_pause_delay_params.items(): + ret.append('{}|{}|{}'.format(dut, pfc_delay, headroom_result)) + + return ret if ret else empty + + _frontend_hosts_per_hwsku_per_module = {} _hosts_per_hwsku_per_module = {} def pytest_generate_tests(metafunc): @@ -1085,8 +1241,8 @@ def pytest_generate_tests(metafunc): asics_selected = None asic_fixture_name = None + tbname, tbinfo = get_tbinfo(metafunc) if duts_selected is None: - tbname, tbinfo = get_tbinfo(metafunc) duts_selected = [tbinfo["duts"][0]] if "enum_asic_index" in metafunc.fixturenames: @@ -1101,9 +1257,17 @@ def pytest_generate_tests(metafunc): elif "enum_rand_one_asic_index" in metafunc.fixturenames: asic_fixture_name = "enum_rand_one_asic_index" asics_selected = generate_param_asic_index(metafunc, duts_selected, ASIC_PARAM_TYPE_ALL, random_asic=True) - + elif "enum_rand_one_frontend_asic_index" in metafunc.fixturenames: + asic_fixture_name = "enum_rand_one_frontend_asic_index" + asics_selected = generate_param_asic_index(metafunc, duts_selected, ASIC_PARAM_TYPE_FRONTEND, random_asic=True) + + # Create parameterization tuple of dut_fixture_name, asic_fixture_name and feature to parameterize + if dut_fixture_name and asic_fixture_name and ("enum_dut_feature" in metafunc.fixturenames): + tuple_list = generate_dut_feature_list(metafunc, duts_selected, asics_selected) + feature_fixture = "enum_dut_feature" + metafunc.parametrize(dut_fixture_name + "," + asic_fixture_name + "," + feature_fixture, tuple_list, scope="module", indirect=True) # Create parameterization tuple of dut_fixture_name and asic_fixture_name to parameterize - if dut_fixture_name and asic_fixture_name: + elif dut_fixture_name and asic_fixture_name: # parameterize on both - create tuple for each tuple_list = [] for a_dut_index, a_dut in enumerate(duts_selected): @@ -1138,14 +1302,27 @@ def pytest_generate_tests(metafunc): metafunc.parametrize("enum_dut_portchannel_oper_up", generate_port_lists(metafunc, "oper_up_pcs")) if "enum_dut_portchannel_admin_up" in metafunc.fixturenames: metafunc.parametrize("enum_dut_portchannel_admin_up", generate_port_lists(metafunc, "admin_up_pcs")) + if "enum_dut_portchannel_with_completeness_level" in metafunc.fixturenames: + metafunc.parametrize("enum_dut_portchannel_with_completeness_level", generate_port_lists(metafunc, "all_pcs", with_completeness_level=True)) if "enum_dut_feature_container" in metafunc.fixturenames: metafunc.parametrize( "enum_dut_feature_container", generate_dut_feature_container_list(metafunc) ) + if 'enum_dut_all_prio' in metafunc.fixturenames: + metafunc.parametrize("enum_dut_all_prio", generate_priority_lists(metafunc, 'all')) if 'enum_dut_lossless_prio' in metafunc.fixturenames: metafunc.parametrize("enum_dut_lossless_prio", generate_priority_lists(metafunc, 'lossless')) if 'enum_dut_lossy_prio' in metafunc.fixturenames: metafunc.parametrize("enum_dut_lossy_prio", generate_priority_lists(metafunc, 'lossy')) + if 'enum_pfc_pause_delay_test_params' in metafunc.fixturenames: + metafunc.parametrize("enum_pfc_pause_delay_test_params", pfc_pause_delay_test_params(metafunc)) + + if 'topo_scenario' in metafunc.fixturenames: + if tbinfo['topo']['type'] == 'm0' and 'topo_scenario' in metafunc.fixturenames: + metafunc.parametrize('topo_scenario', ['m0_vlan_scenario', 'm0_l3_scenario'], scope='module') + else: + metafunc.parametrize('topo_scenario', ['default'], scope='module') + ### Override enum fixtures for duts and asics to ensure that parametrization happens once per module. @pytest.fixture(scope="module") @@ -1184,6 +1361,14 @@ def enum_backend_asic_index(request): def enum_rand_one_asic_index(request): return request.param +@pytest.fixture(scope="module") +def enum_rand_one_frontend_asic_index(request): + return request.param + +@pytest.fixture(scope="module") +def enum_dut_feature(request): + return request.param + @pytest.fixture(scope="module") def duthost_console(duthosts, rand_one_dut_hostname, localhost, conn_graph_facts, creds): duthost = duthosts[rand_one_dut_hostname] @@ -1345,6 +1530,42 @@ def duts_running_config_facts(duthosts): cfg_facts[duthost.hostname].append(asic_cfg_facts) return cfg_facts +@pytest.fixture(scope='class') +def dut_test_params(duthosts, rand_one_dut_hostname, tbinfo, ptf_portmap_file, lower_tor_host, creds): + """ + Prepares DUT host test params + + Args: + duthost (AnsibleHost): Device Under Test (DUT) + tbinfo (Fixture, dict): Map containing testbed information + ptfPortMapFile (Fxiture, str): filename residing + on PTF host and contains port maps information + + Returns: + dut_test_params (dict): DUT host test params + """ + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[rand_one_dut_hostname] + mgFacts = duthost.get_extended_minigraph_facts(tbinfo) + topo = tbinfo["topo"]["name"] + + yield { + "topo": topo, + "hwsku": mgFacts["minigraph_hwsku"], + "basicParams": { + "router_mac": duthost.facts["router_mac"], + "server": duthost.host.options['inventory_manager'].get_host( + duthost.hostname + ).vars['ansible_host'], + "port_map_file": ptf_portmap_file, + "sonic_asic_type": duthost.facts['asic_type'], + "sonic_version": duthost.os_version, + "dut_username": creds['sonicadmin_user'], + "dut_password": creds['sonicadmin_password'] + } + } @pytest.fixture(scope='module') def duts_minigraph_facts(duthosts, tbinfo): @@ -1367,40 +1588,373 @@ def get_reboot_cause(duthost): yield uptime_end = duthost.get_up_time() if not uptime_end == uptime_start: - duthost.show_and_parse("show reboot-cause history") + if "201811" in duthost.os_version or "201911" in duthost.os_version: + duthost.show_and_parse("show reboot-cause") + else: + duthost.show_and_parse("show reboot-cause history") def collect_db_dump_on_duts(request, duthosts): - ''' - When test failed, teardown of this fixture will dump all the DB and collect to the test servers + '''When test failed, this fixture will dump all the DBs on DUT and collect them to local ''' if hasattr(request.node, 'rep_call') and request.node.rep_call.failed: dut_file_path = "/tmp/db_dump" - docker_file_path = "./logs/db_dump" - db_dump_path = os.path.join(dut_file_path, request.module.__name__, request.node.name) - db_dump_tarfile = "{}.tar.gz".format(dut_file_path) - - # Collect DB config - dbs = set() - result = duthosts[0].shell("cat /var/run/redis/sonic-db/database_config.json") - db_config = json.loads(result['stdout']) - for db in db_config['DATABASES']: - db_id = db_config['DATABASES'][db]['id'] - dbs.add(db_id) - - # Collect DB dump - duthosts.file(path = db_dump_path, state="directory") - for i in dbs: - duthosts.shell("redis-dump -d {} -y -o {}/{}".format(i, db_dump_path, i)) - duthosts.shell("tar czf {} {}".format(db_dump_tarfile, dut_file_path)) - duthosts.fetch(src = db_dump_tarfile, dest = docker_file_path) + local_file_path = "./logs/db_dump" + + # Remove characters that can't be used in filename + nodename = safe_filename(request.node.nodeid) + db_dump_path = os.path.join(dut_file_path, nodename) + db_dump_tarfile = os.path.join(dut_file_path, "{}.tar.gz".format(nodename)) + + # We don't need to collect all DBs, db_names specify the DBs we want to collect + db_names = ["APPL_DB", "ASIC_DB", "COUNTERS_DB", "CONFIG_DB", "STATE_DB"] + raw_db_config = duthosts[0].shell("cat /var/run/redis/sonic-db/database_config.json")["stdout"] + db_config = json.loads(raw_db_config).get("DATABASES", {}) + db_ids = set() + for db_name in db_names: + # Skip STATE_DB dump on release 201911. + # JINJA2_CACHE can't be dumped by "redis-dump", and it is stored in STATE_DB on 201911 release. + # Please refer to issue: https://github.com/Azure/sonic-buildimage/issues/5587. + # The issue has been fixed in https://github.com/Azure/sonic-buildimage/pull/5646. + # However, the fix is not included in 201911 release. So we have to skip STATE_DB on release 201911 + # to avoid raising exception when dumping the STATE_DB. + if db_name == "STATE_DB" and duthosts[0].sonic_release in ['201911']: + continue + + if db_name in db_config: + db_ids.add(db_config[db_name].get("id", 0)) + + namespace_list = duthosts[0].get_asic_namespace_list() if duthosts[0].is_multi_asic else [] + if namespace_list: + for namespace in namespace_list: + # Collect DB dump + dump_dest_path = os.path.join(db_dump_path, namespace) + dump_cmds = ["mkdir -p {}".format(dump_dest_path)] + for db_id in db_ids: + dump_cmd = "ip netns exec {} redis-dump -d {} -y -o {}/{}".format(namespace, db_id, dump_dest_path, db_id) + dump_cmds.append(dump_cmd) + duthosts.shell_cmds(cmds=dump_cmds) + else: + # Collect DB dump + dump_dest_path = db_dump_path + dump_cmds = ["mkdir -p {}".format(dump_dest_path)] + for db_id in db_ids: + dump_cmd = "redis-dump -d {} -y -o {}/{}".format(db_id, dump_dest_path, db_id) + dump_cmds.append(dump_cmd) + duthosts.shell_cmds(cmds=dump_cmds) + + #compress dump file and fetch to docker + duthosts.shell("tar -czf {} -C {} {}".format(db_dump_tarfile, dut_file_path, nodename)) + duthosts.fetch(src = db_dump_tarfile, dest = local_file_path) #remove dump file from dut - duthosts.shell("rm -rf {} {}".format(dut_file_path, db_dump_tarfile)) + duthosts.shell("rm -fr {} {}".format(db_dump_tarfile, db_dump_path)) + @pytest.fixture(autouse=True) def collect_db_dump(request, duthosts): + """This autoused fixture is to generate DB dumps on DUT and collect them to local for later troubleshooting when + a test case failed. + """ + yield + collect_db_dump_on_duts(request, duthosts) + + +def __dut_reload(duts_data, node=None, results=None): + if node is None or results is None: + logger.error('Missing kwarg "node" or "results"') + return + logger.info("dut reload called on {}".format(node.hostname)) + node.copy(content=json.dumps(duts_data[node.hostname]["pre_running_config"][None], indent=4), + dest='/etc/sonic/config_db.json', verbose=False) + config_reload(node) + + +def compare_running_config(pre_running_config, cur_running_config): + if type(pre_running_config) != type(cur_running_config): + return False + if pre_running_config == cur_running_config: + return True + else: + if type(pre_running_config) is dict: + if set(pre_running_config.keys()) != set(cur_running_config.keys()): + return False + for key in pre_running_config.keys(): + if not compare_running_config(pre_running_config[key], cur_running_config[key]): + return False + return True + # We only have string in list in running config now, so we can ignore the order of the list. + elif type(pre_running_config) is list: + if set(pre_running_config) != set(cur_running_config): + return False + else: + return True + else: + return False + + +@pytest.fixture(scope="module", autouse=True) +def core_dump_and_config_check(duthosts, tbinfo, request): ''' - When test failed, teardown of this fixture will dump all the DB and collect to the test servers + Check if there are new core dump files and if the running config is modified after the test case running. + If so, we will reload the running config after test case running. ''' + check_flag = True + if hasattr(request.config.option, 'enable_macsec') and request.config.option.enable_macsec: + check_flag = False + for m in request.node.iter_markers(): + if m.name == "skip_check_dut_health": + check_flag = False + + module_name = request.node.name + + duts_data = {} + + new_core_dumps = {} + core_dump_check_pass = True + + inconsistent_config = {} + pre_only_config = {} + cur_only_config = {} + config_db_check_pass = True + + check_result = {} + + if check_flag: + for duthost in duthosts: + logger.info("Collecting core dumps before test on {}".format(duthost.hostname)) + duts_data[duthost.hostname] = {} + + if "20191130" in duthost.os_version: + pre_existing_core_dumps = duthost.shell('ls /var/core/ | grep -v python || true')['stdout'].split() + else: + pre_existing_core_dumps = duthost.shell('ls /var/core/')['stdout'].split() + duts_data[duthost.hostname]["pre_core_dumps"] = pre_existing_core_dumps + + logger.info("Collecting running config before test on {}".format(duthost.hostname)) + duts_data[duthost.hostname]["pre_running_config"] = {} + if not duthost.stat(path="/etc/sonic/running_golden_config.json")['stat']['exists']: + logger.info("Collecting running golden config before test on {}".format(duthost.hostname)) + duthost.shell("sonic-cfggen -d --print-data > /etc/sonic/running_golden_config.json") + duts_data[duthost.hostname]["pre_running_config"][None] = \ + json.loads(duthost.shell("cat /etc/sonic/running_golden_config.json", verbose=False)['stdout']) + yield - collect_db_dump_on_duts(request, duthosts) + + if check_flag: + for duthost in duthosts: + inconsistent_config[duthost.hostname] = {} + pre_only_config[duthost.hostname] = {} + cur_only_config[duthost.hostname] = {} + new_core_dumps[duthost.hostname] = [] + + logger.info("Collecting core dumps after test on {}".format(duthost.hostname)) + if "20191130" in duthost.os_version: + cur_cores = duthost.shell('ls /var/core/ | grep -v python || true')['stdout'].split() + else: + cur_cores = duthost.shell('ls /var/core/')['stdout'].split() + duts_data[duthost.hostname]["cur_core_dumps"] = cur_cores + + new_core_dumps[duthost.hostname] = list( + set(duts_data[duthost.hostname]["cur_core_dumps"]) - set(duts_data[duthost.hostname]["pre_core_dumps"])) + + if new_core_dumps[duthost.hostname]: + core_dump_check_pass = False + + base_dir = os.path.dirname(os.path.realpath(__file__)) + for new_core_dump in new_core_dumps[duthost.hostname]: + duthost.fetch(src="/var/core/{}".format(new_core_dump), dest=os.path.join(base_dir, "logs")) + + logger.info("Collecting running config after test on {}".format(duthost.hostname)) + # get running config after running + duts_data[duthost.hostname]["cur_running_config"] = {} + duts_data[duthost.hostname]["cur_running_config"][None] = \ + json.loads(duthost.shell("sonic-cfggen -d --print-data", verbose=False)['stdout']) + + # The tables that we don't care + EXCLUDE_CONFIG_TABLE_NAMES = set([]) + # The keys that we don't care + # Current skipped keys: + # 1. "MUX_LINKMGR|LINK_PROBER" + # NOTE: this key is edited by the `run_icmp_responder_session` or `run_icmp_responder` + # to account for the lower performance of the ICMP responder/mux simulator compared to + # real servers and mux cables. + # Linkmgrd is the only service to consume this table so it should not affect other test cases. + # Let's keep this setting in db and we don't want any config reload caused by this key, so + # let's skip checking it. + if "dualtor" in tbinfo["topo"]["name"]: + EXCLUDE_CONFIG_KEY_NAMES = [ + 'MUX_LINKMGR|LINK_PROBER' + ] + else: + EXCLUDE_CONFIG_KEY_NAMES = [] + + def _remove_entry(table_name, key_name, config): + if table_name in config and key_name in config[table_name]: + config[table_name].pop(key_name) + if len(config[table_name]) == 0: + config.pop(table_name) + + for cfg_context in duts_data[duthost.hostname]['pre_running_config']: + pre_only_config[duthost.hostname][cfg_context] = {} + cur_only_config[duthost.hostname][cfg_context] = {} + inconsistent_config[duthost.hostname][cfg_context] = {} + + pre_running_config = duts_data[duthost.hostname]["pre_running_config"][cfg_context] + cur_running_config = duts_data[duthost.hostname]["cur_running_config"][cfg_context] + + # Remove ignored keys from base config + for exclude_key in EXCLUDE_CONFIG_KEY_NAMES: + fields = exclude_key.split('|') + if len(fields) != 2: + continue + _remove_entry(fields[0], fields[1], pre_running_config) + _remove_entry(fields[0], fields[1], cur_running_config) + + pre_running_config_keys = set(pre_running_config.keys()) + cur_running_config_keys = set(cur_running_config.keys()) + + # Check if there are extra keys in pre running config + pre_config_extra_keys = list( + pre_running_config_keys - cur_running_config_keys - EXCLUDE_CONFIG_TABLE_NAMES) + for key in pre_config_extra_keys: + pre_only_config[duthost.hostname][cfg_context].update({key: pre_running_config[key]}) + + # Check if there are extra keys in cur running config + cur_config_extra_keys = list( + cur_running_config_keys - pre_running_config_keys - EXCLUDE_CONFIG_TABLE_NAMES) + for key in cur_config_extra_keys: + cur_only_config[duthost.hostname][cfg_context].update({key: cur_running_config[key]}) + + # Get common keys in pre running config and cur running config + common_config_keys = list(pre_running_config_keys & cur_running_config_keys - + EXCLUDE_CONFIG_TABLE_NAMES) + + # Check if the running config is modified after module running + for key in common_config_keys: + # TODO: remove these code when solve the problem of "FLEX_COUNTER_DELAY_STATUS" + if key == "FLEX_COUNTER_TABLE": + for sub_key, sub_value in list(pre_running_config[key].items()): + try: + pre_value = pre_running_config[key][sub_key] + cur_value = cur_running_config[key][sub_key] + if pre_value["FLEX_COUNTER_STATUS"] != cur_value["FLEX_COUNTER_STATUS"]: + inconsistent_config[duthost.hostname][cfg_context].update( + { + key: { + "pre_value": pre_running_config[key], + "cur_value": cur_running_config[key] + } + } + ) + except KeyError: + inconsistent_config[duthost.hostname][cfg_context].update( + { + key: { + "pre_value": pre_running_config[key], + "cur_value": cur_running_config[key] + } + } + ) + elif not compare_running_config(pre_running_config[key], cur_running_config[key]): + inconsistent_config[duthost.hostname][cfg_context].update( + { + key: { + "pre_value": pre_running_config[key], + "cur_value": cur_running_config[key] + } + } + ) + + if pre_only_config[duthost.hostname][cfg_context] or \ + cur_only_config[duthost.hostname][cfg_context] or \ + inconsistent_config[duthost.hostname][cfg_context]: + config_db_check_pass = False + if not (core_dump_check_pass and config_db_check_pass): + check_result = { + "core_dump_check": { + "pass": core_dump_check_pass, + "new_core_dumps": new_core_dumps + }, + "config_db_check": { + "pass": config_db_check_pass, + "pre_only_config": pre_only_config, + "cur_only_config": cur_only_config, + "inconsistent_config": inconsistent_config + } + } + logger.warning("Core dump or config check failed for {}, results: {}" + .format(module_name, json.dumps(check_result))) + results = parallel_run(__dut_reload, (), {"duts_data": duts_data}, duthosts, timeout=360) + logger.debug('Results of dut reload: {}'.format(json.dumps(dict(results)))) + else: + logger.info("Core dump and config check passed for {}".format(module_name)) + + if check_result: + items = request.session.items + for item in items: + if item.module.__name__ + ".py" == module_name.split("/")[-1]: + item.user_properties.append(('CustomMsg', json.dumps({'DutChekResult': { + 'core_dump_check_pass': core_dump_check_pass, + 'config_db_check_pass': config_db_check_pass + }}))) + + +@pytest.fixture(scope="function") +def on_exit(): + ''' + Utility to register callbacks for cleanup. Runs callbacks despite assertion + failures. Callbacks are executed in reverse order of registration. + ''' + class OnExit(): + def __init__(self): + self.cbs = [] + + def register(self, fn): + self.cbs.append(fn) + + def cleanup(self): + while len(self.cbs) != 0: + self.cbs.pop()() + + on_exit = OnExit() + yield on_exit + on_exit.cleanup() + + +def verify_packets_any_fixed(test, pkt, ports=[], device_number=0): + """ + Check that a packet is received on _any_ of the specified ports belonging to + the given device (default device_number is 0). + + Also verifies that the packet is not received on any other ports for this + device, and that no other packets are received on the device (unless --relax + is in effect). + + The function is redefined here to workaround code bug in testutils.verify_packets_any + """ + received = False + failures = [] + for device, port in testutils.ptf_ports(): + if device != device_number: + continue + if port in ports: + logging.debug("Checking for pkt on device %d, port %d", device_number, port) + result = testutils.dp_poll(test, device_number=device, port_number=port, exp_pkt=pkt) + if isinstance(result, test.dataplane.PollSuccess): + received = True + else: + failures.append((port, result)) + else: + testutils.verify_no_packet(test, pkt, (device, port)) + testutils.verify_no_other_packets(test) + + if not received: + def format_failure(port, failure): + return "On port %d:\n%s" % (port, failure.format()) + failure_report = "\n".join([format_failure(*f) for f in failures]) + test.fail("Did not receive expected packet on any of ports %r for device %d.\n%s" + % (ports, device_number, failure_report)) + +# HACK: testutils.verify_packets_any to workaround code bug +# TODO: delete me when ptf version is advanced than https://github.com/p4lang/ptf/pull/139 +testutils.verify_packets_any = verify_packets_any_fixed diff --git a/tests/console/test_console_loopback.py b/tests/console/test_console_loopback.py index 91abb222af0..5974dcd826f 100644 --- a/tests/console/test_console_loopback.py +++ b/tests/console/test_console_loopback.py @@ -1,5 +1,7 @@ import pytest import pexpect +import string + from tests.common.helpers.assertions import pytest_assert @@ -38,7 +40,7 @@ def test_console_loopback_echo(duthost, creds, target_line): client.sendline(text) assert_expect_text(client, text, target_line, timeout_sec) except Exception as e: - pytest.fail("Not able to communicate DUT via reverse SSH") + pytest.fail("Not able to communicate DUT via reverse SSH: {}".format(e)) @pytest.mark.parametrize("src_line,dst_line", [('17', '19'), ('18', '20'), diff --git a/tests/console/test_console_reversessh.py b/tests/console/test_console_reversessh.py index 16de2353462..f8ebd300471 100644 --- a/tests/console/test_console_reversessh.py +++ b/tests/console/test_console_reversessh.py @@ -38,7 +38,7 @@ def test_console_reversessh_connectivity(duthost, creds, target_line): client.sendcontrol('a') client.sendcontrol('x') except Exception as e: - pytest.fail("Not able to do reverse SSH to remote host via DUT") + pytest.fail("Not able to do reverse SSH to remote host via DUT: {}".format(e)) pytest_assert( wait_until(10, 1, 0, check_target_line_status, duthost, target_line, "IDLE"), @@ -69,13 +69,13 @@ def test_console_reversessh_force_interrupt(duthost, creds, target_line): check_target_line_status(duthost, target_line, "BUSY"), "Target line {} is idle while reverse SSH session is up".format(target_line)) except Exception as e: - pytest.fail("Not able to do reverse SSH to remote host via DUT") + pytest.fail("Not able to do reverse SSH to remote host via DUT: {}".format(e)) try: # Force clear line from DUT duthost.shell('sudo sonic-clear line {}'.format(target_line)) except Exception as e: - pytest.fail("Not able to do clear line for DUT") + pytest.fail("Not able to do clear line for DUT: {}".format(e)) # Check the session ended within 5s and the line state is idle pytest_assert( @@ -85,7 +85,7 @@ def test_console_reversessh_force_interrupt(duthost, creds, target_line): try: client.expect("Picocom was killed") except Exception as e: - pytest.fail("Console session not exit correctly: {}".format(str(e))) + pytest.fail("Console session not exit correctly: {}".format(e)) def check_target_line_status(duthost, line, expect_status): console_facts = duthost.console_facts()['ansible_facts']['console_facts'] diff --git a/tests/container_checker/test_container_checker.py b/tests/container_checker/test_container_checker.py old mode 100755 new mode 100644 index ca41778f1d0..395b1505123 --- a/tests/container_checker/test_container_checker.py +++ b/tests/container_checker/test_container_checker.py @@ -11,7 +11,6 @@ from tests.common.helpers.assertions import pytest_require from tests.common.helpers.dut_utils import check_container_state from tests.common.helpers.dut_utils import clear_failed_flag_and_restart -from tests.common.helpers.dut_utils import decode_dut_and_container_name from tests.common.helpers.dut_utils import is_hitting_start_limit from tests.common.helpers.dut_utils import is_container_running from tests.common.plugins.loganalyzer.loganalyzer import LogAnalyzer, LogAnalyzerError @@ -31,44 +30,50 @@ @pytest.fixture(autouse=True, scope="module") -def config_reload_after_tests(rand_selected_dut): +def config_reload_after_tests(duthosts, selected_rand_one_per_hwsku_hostname): """Restores the DuT. Args: - rand_selected_dut: The fixture returns a randomly selected DuT. + duthosts: list of DUTs. + selected_rand_one_per_hwsku_hostname: The fixture returns a dict of module + to list of hostnames mapping Returns: None. """ - duthost = rand_selected_dut - - bgp_neighbors = duthost.get_bgp_neighbors() - up_bgp_neighbors = [ k.lower() for k, v in bgp_neighbors.items() if v["state"] == "established" ] + up_bgp_neighbors = {} + for hostname in selected_rand_one_per_hwsku_hostname: + duthost = duthosts[hostname] + bgp_neighbors = duthost.get_bgp_neighbors() + up_bgp_neighbors[duthost] = [ k.lower() for k, v in bgp_neighbors.items() if v["state"] == "established" ] yield - - config_reload(duthost) - postcheck_critical_processes_status(duthost, up_bgp_neighbors) + for hostname in selected_rand_one_per_hwsku_hostname: + duthost = duthosts[hostname] + logger.info("Reload config on DuT '{}' ...".format(duthost.hostname)) + config_reload(duthost) + postcheck_critical_processes_status(duthost, up_bgp_neighbors[duthost]) @pytest.fixture(autouse=True, scope="module") -def check_image_version(rand_selected_dut): +def check_image_version(duthosts, selected_rand_one_per_hwsku_hostname): """Skips this test if the SONiC image installed on DUT was 201911 or old version. Args: - rand_selected_dut: The fixture returns a randomly selected DuT. - + duthosts: list of DUTs. + selected_rand_one_per_hwsku_hostname: The fixture returns a dict of module + to list of hostnames mapping Returns: None. """ - duthost = rand_selected_dut - - pytest_require(parse_version(duthost.kernel_version) > parse_version("4.9.0"), - "Test was not supported for 201911 and older image version!") + for hostname in selected_rand_one_per_hwsku_hostname: + duthost = duthosts[hostname] + pytest_require(parse_version(duthost.kernel_version) > parse_version("4.9.0"), + "Test was not supported for 201911 and older image version!") @pytest.fixture(autouse=True, scope="module") -def update_monit_service(rand_selected_dut): +def update_monit_service(duthosts, selected_rand_one_per_hwsku_hostname): """Update Monit configuration and restart it. This function will first reduce the monitoring interval of container checker @@ -76,34 +81,37 @@ def update_monit_service(rand_selected_dut): After testing, these two changes will be rolled back. Args: - rand_selected_dut: The fixture returns a randomly selected DuT. - + duthosts: list of DUTs. + selected_rand_one_per_hwsku_hostname: The fixture returns a dict of module + to list of hostnames mapping Returns: None. """ - duthost = rand_selected_dut - - logger.info("Back up Monit configuration files on DuT '{}' ...".format(duthost.hostname)) - duthost.shell("sudo cp -f /etc/monit/monitrc /tmp/") - duthost.shell("sudo cp -f /etc/monit/conf.d/sonic-host /tmp/") - - temp_config_line = " if status != 0 for 1 times within 1 cycles then alert repeat every 1 cycles" - logger.info("Reduce the monitoring interval of container_checker.") - duthost.shell("sudo sed -i '$s/^./#/' /etc/monit/conf.d/sonic-host") - duthost.shell("echo '{}' | sudo tee -a /etc/monit/conf.d/sonic-host".format(temp_config_line)) - duthost.shell("sudo sed -i 's/with start delay 300/with start delay 10/' /etc/monit/monitrc") - duthost.shell("sudo sed -i 's/set daemon 60/set daemon 10/' /etc/monit/monitrc") - logger.info("Restart the Monit service without delaying to monitor.") - duthost.shell("sudo systemctl restart monit") + for hostname in selected_rand_one_per_hwsku_hostname: + duthost = duthosts[hostname] + logger.info("Back up Monit configuration files on DuT '{}' ...".format(duthost.hostname)) + duthost.shell("sudo cp -f /etc/monit/monitrc /tmp/") + duthost.shell("sudo cp -f /etc/monit/conf.d/sonic-host /tmp/") + + temp_config_line = " if status != 0 for 1 times within 1 cycles then alert repeat every 1 cycles" + logger.info("Reduce the monitoring interval of container_checker.") + duthost.shell("sudo sed -i '$s/^./#/' /etc/monit/conf.d/sonic-host") + duthost.shell("echo '{}' | sudo tee -a /etc/monit/conf.d/sonic-host".format(temp_config_line)) + duthost.shell("sudo sed -i 's/with start delay 300/with start delay 10/' /etc/monit/monitrc") + duthost.shell("sudo sed -i 's/set daemon 60/set daemon 10/' /etc/monit/monitrc") + logger.info("Restart the Monit service without delaying to monitor.") + duthost.shell("sudo systemctl restart monit") yield - logger.info("Roll back the Monit configuration of container checker on DuT '{}' ..." - .format(duthost.hostname)) - duthost.shell("sudo mv -f /tmp/monitrc /etc/monit/") - duthost.shell("sudo mv -f /tmp/sonic-host /etc/monit/conf.d/") - logger.info("Restart the Monit service and delay monitoring for 5 minutes.") - duthost.shell("sudo systemctl restart monit") + for hostname in selected_rand_one_per_hwsku_hostname: + duthost = duthosts[hostname] + logger.info("Roll back the Monit configuration of container checker on DuT '{}' ..." + .format(duthost.hostname)) + duthost.shell("sudo mv -f /tmp/monitrc /etc/monit/") + duthost.shell("sudo mv -f /tmp/sonic-host /etc/monit/conf.d/") + logger.info("Restart the Monit service and delay monitoring for 5 minutes.") + duthost.shell("sudo systemctl restart monit") def check_all_critical_processes_status(duthost): @@ -155,29 +163,6 @@ def postcheck_critical_processes_status(duthost, up_bgp_neighbors): return wait_until(CONTAINER_RESTART_THRESHOLD_SECS, CONTAINER_CHECK_INTERVAL_SECS, 0, post_test_check, duthost, up_bgp_neighbors) - -def stop_container(duthost, container_name): - """Stops the running container. - - Args: - duthost: Host DUT. - container_name: A string represents the container which will be stopped. - - Returns: - None - """ - - logger.info("Stopping the container '{}' on DuT '{}' ...".format(container_name, duthost.hostname)) - duthost.shell("sudo systemctl stop {}.service".format(container_name)) - logger.info("Waiting until container '{}' is stopped...".format(container_name)) - stopped = wait_until(CONTAINER_STOP_THRESHOLD_SECS, - CONTAINER_CHECK_INTERVAL_SECS, - 0, - check_container_state, duthost, container_name, False) - pytest_assert(stopped, "Failed to stop container '{}'".format(container_name)) - logger.info("Container '{}' on DuT '{}' was stopped".format(container_name, duthost.hostname)) - - def get_expected_alerting_message(container_name): """Generates the expected alerting message from the stopped container. @@ -196,7 +181,7 @@ def get_expected_alerting_message(container_name): return expected_alerting_messages -def test_container_checker(duthosts, enum_dut_feature_container, rand_selected_dut, tbinfo): +def test_container_checker(duthosts, enum_rand_one_per_hwsku_hostname, enum_rand_one_asic_index, enum_dut_feature, tbinfo): """Tests the feature of container checker. This function will check whether the container names will appear in the Monit @@ -204,35 +189,40 @@ def test_container_checker(duthosts, enum_dut_feature_container, rand_selected_d Args: duthosts: list of DUTs. - enum_dut_feature_container: A list contains strings ("|"). - rand_selected_dut: The fixture returns a randomly selected DuT. + enum_rand_one_per_hwsku_hostname: Fixture returning list of hostname selected per hwsku. + enum_rand_one_asic_index: Fixture returning list of asics for selected duts. + enum_dut_feature: A list contains features. tbinfo: Testbed information. Returns: None. """ - dut_name, container_name = decode_dut_and_container_name(enum_dut_feature_container) - pytest_require(dut_name == rand_selected_dut.hostname and container_name != "unknown", - "Skips testing container_checker of container '{}' on the DuT '{}' since another DuT '{}' was chosen." - .format(container_name, dut_name, rand_selected_dut.hostname)) - duthost = duthosts[dut_name] + service_name = enum_dut_feature + duthost = duthosts[enum_rand_one_per_hwsku_hostname] + asic = duthost.asic_instance(enum_rand_one_asic_index) + container_name = asic.get_docker_name(service_name) loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="container_checker_{}".format(container_name)) disabled_containers = get_disabled_container_list(duthost) skip_containers = disabled_containers[:] - skip_containers.append("gbsyncd") - skip_containers.append("database") - skip_containers.append("database-chassis") - # Skip 'radv' container on devices whose role is not T0. - if tbinfo["topo"]["type"] != "t0": + # Skip 'radv' container on devices whose role is not T0/M0. + if tbinfo["topo"]["type"] not in ["t0", "m0"]: skip_containers.append("radv") - pytest_require(container_name not in skip_containers, + pytest_require(service_name not in skip_containers, "Container '{}' is skipped for testing.".format(container_name)) - stop_container(duthost, container_name) + + asic.stop_service(service_name) + logger.info("Waiting until container '{}' is stopped...".format(container_name)) + stopped = wait_until(CONTAINER_STOP_THRESHOLD_SECS, + CONTAINER_CHECK_INTERVAL_SECS, + 0, + check_container_state, duthost, container_name, False) + pytest_assert(stopped, "Failed to stop container '{}'".format(container_name)) + logger.info("Container '{}' on DuT '{}' was stopped".format(container_name, duthost.hostname)) loganalyzer.expect_regex = get_expected_alerting_message(container_name) with loganalyzer: diff --git a/tests/copp/copp_utils.py b/tests/copp/copp_utils.py index 516ba1de91b..e856b26cdb7 100644 --- a/tests/copp/copp_utils.py +++ b/tests/copp/copp_utils.py @@ -215,7 +215,7 @@ def _install_nano(dut, creds, syncd_docker_name): cmd = '''docker exec -e http_proxy={} -e https_proxy={} {} bash -c " \ rm -rf /var/lib/apt/lists/* \ && apt-get update \ - && apt-get install -y python-pip build-essential libssl-dev libffi-dev python-dev python-setuptools wget cmake \ + && apt-get install -y python-pip build-essential libssl1.1 libssl-dev libffi-dev python-dev python-setuptools wget cmake \ && wget https://github.com/nanomsg/nanomsg/archive/1.0.0.tar.gz \ && tar xzf 1.0.0.tar.gz && cd nanomsg-1.0.0 \ && mkdir -p build && cmake . && make install && ldconfig && cd .. && rm -rf nanomsg-1.0.0 \ diff --git a/tests/copp/test_copp.py b/tests/copp/test_copp.py index 57f91f678c9..ece036039c4 100644 --- a/tests/copp/test_copp.py +++ b/tests/copp/test_copp.py @@ -40,7 +40,7 @@ from tests.common.fixtures.ptfhost_utils import change_mac_addresses # lgtm[py/unused-import] pytestmark = [ - pytest.mark.topology("t1", "t2") + pytest.mark.topology("t0", "t1", "t2", "m0") ] _COPPTestParameters = namedtuple("_COPPTestParameters", @@ -55,6 +55,7 @@ "nn_target_vlanid"]) _SUPPORTED_PTF_TOPOS = ["ptf32", "ptf64"] _SUPPORTED_T0_TOPOS = ["t0", "t0-64", "t0-52", "t0-116"] +_SUPPORTED_M0_TOPOS = ["m0"] _SUPPORTED_T1_TOPOS = ["t1", "t1-lag", "t1-64-lag", "t1-backend"] _SUPPORTED_T2_TOPOS = ["t2"] _TOR_ONLY_PROTOCOL = ["DHCP"] @@ -107,6 +108,7 @@ def test_no_policer(self, protocol, duthosts, enum_rand_one_per_hwsku_frontend_h copp_testbed, dut_type) + @pytest.mark.disable_loganalyzer def test_add_new_trap(self, duthosts, enum_rand_one_per_hwsku_frontend_hostname, ptfhost, check_image_version, copp_testbed, dut_type, backup_restore_config_db): """ Validates that one new trap(bgp) can be installed @@ -136,6 +138,7 @@ def test_add_new_trap(self, duthosts, enum_rand_one_per_hwsku_frontend_hostname, wait_until(60, 20, 0, _copp_runner, duthost, ptfhost, self.trap_id.upper(), copp_testbed, dut_type), "Installing {} trap fail".format(self.trap_id)) + @pytest.mark.disable_loganalyzer @pytest.mark.parametrize("remove_trap_type", ["delete_feature_entry", "disable_feature_status"]) def test_remove_trap(self, duthosts, enum_rand_one_per_hwsku_frontend_hostname, ptfhost, check_image_version, copp_testbed, dut_type, backup_restore_config_db, remove_trap_type): @@ -222,7 +225,7 @@ def copp_testbed( duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] test_params = _gather_test_params(tbinfo, duthost, request) - if test_params.topo not in (_SUPPORTED_PTF_TOPOS + _SUPPORTED_T0_TOPOS + _SUPPORTED_T1_TOPOS + _SUPPORTED_T2_TOPOS): + if test_params.topo not in (_SUPPORTED_PTF_TOPOS + _SUPPORTED_T0_TOPOS + _SUPPORTED_T1_TOPOS + _SUPPORTED_T2_TOPOS + _SUPPORTED_M0_TOPOS): pytest.skip("Topology not supported by COPP tests") try: @@ -277,7 +280,7 @@ def _copp_runner(dut, ptf, protocol, test_params, dut_type, has_trap=True): testdir="ptftests", # Special Handling for DHCP if we are using T1 Topo testname="copp_tests.{}Test".format((protocol+"TopoT1") - if protocol in _TOR_ONLY_PROTOCOL and dut_type != "ToRRouter" else protocol), + if protocol in _TOR_ONLY_PROTOCOL and dut_type not in ["ToRRouter", "MgmtToRRouter"] else protocol), platform="nn", qlen=100000, params=params, diff --git a/tests/crm/conftest.py b/tests/crm/conftest.py index 85d6b7a7503..b8ae8545f7a 100755 --- a/tests/crm/conftest.py +++ b/tests/crm/conftest.py @@ -2,9 +2,11 @@ import time import json import logging +import re from test_crm import RESTORE_CMDS, CRM_POLLING_INTERVAL from tests.common.errors import RunAnsibleModuleFail +from tests.common.utilities import recover_acl_rule logger = logging.getLogger(__name__) @@ -51,7 +53,10 @@ def pytest_runtest_teardown(item, nextitem): for cmd in RESTORE_CMDS[test_name]: logger.info(cmd) try: - dut.shell(cmd) + if isinstance(cmd, dict): + recover_acl_rule(dut, cmd["data_acl"]) + else: + dut.shell(cmd) except RunAnsibleModuleFail as err: failures.append("Failure during command execution '{command}':\n{error}".format(command=cmd, error=str(err))) @@ -105,13 +110,26 @@ def crm_interface(duthosts, enum_rand_one_per_hwsku_frontend_hostname, tbinfo, e @pytest.fixture(scope="module", autouse=True) def set_polling_interval(duthosts, enum_rand_one_per_hwsku_frontend_hostname): - """ Set CRM polling interval to 1 second """ duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] wait_time = 2 + + # Get polling interval + output = duthost.command('crm show summary')['stdout'] + parsed = re.findall(r'Polling Interval: +(\d+) +second', output) + original_crm_polling_interval = int(parsed[0]) + + # Set CRM polling interval to 1 second duthost.command("crm config polling interval {}".format(CRM_POLLING_INTERVAL))["stdout"] logger.info("Waiting {} sec for CRM counters to become updated".format(wait_time)) time.sleep(wait_time) + yield + + # Set CRM polling interval to original value + duthost.command("crm config polling interval {}".format(original_crm_polling_interval))["stdout"] + logger.info("Waiting {} sec for CRM counters to become updated".format(wait_time)) + time.sleep(wait_time) + @pytest.fixture(scope="module") def collector(duthosts, rand_one_dut_hostname): diff --git a/tests/crm/templates/acl.json b/tests/crm/templates/acl.json index ba2a385128f..072653ba557 100644 --- a/tests/crm/templates/acl.json +++ b/tests/crm/templates/acl.json @@ -27,4 +27,3 @@ } } } - diff --git a/tests/crm/test_crm.py b/tests/crm/test_crm.py index 96e3c32b46f..7a0d409a5f0 100755 --- a/tests/crm/test_crm.py +++ b/tests/crm/test_crm.py @@ -9,11 +9,14 @@ import tempfile from jinja2 import Template +from tests.common.cisco_data import is_cisco_device from tests.common.plugins.loganalyzer.loganalyzer import LogAnalyzer from tests.common.helpers.assertions import pytest_assert from collections import OrderedDict from tests.common.fixtures.duthost_utils import disable_route_checker from tests.common.fixtures.duthost_utils import disable_fdb_aging +from tests.common.utilities import wait_until +from tests.common.utilities import wait_until, get_data_acl pytestmark = [ @@ -130,6 +133,10 @@ def generate_mac(num): return mac_list +def is_cel_e1031_device(duthost): + return duthost.facts["platform"] == "x86_64-cel_e1031-r0" + + def generate_fdb_config(duthost, entry_num, vlan_id, iface, op, dest): """ Generate FDB config file to apply it using 'swssconfig' tool. Generated config file template: @@ -269,6 +276,8 @@ def verify_thresholds(duthost, asichost, **kwargs): kwargs["th_lo"] = used_percent kwargs["th_hi"] = used_percent + 1 loganalyzer.expect_regex = [EXPECT_CLEAR] + + kwargs['crm_used'], kwargs['crm_avail'] = get_crm_stats(kwargs['crm_cmd'], duthost) cmd = template.render(**kwargs) with loganalyzer: @@ -285,6 +294,15 @@ def get_crm_stats(cmd, duthost): return crm_stats_used, crm_stats_available +def check_crm_stats(cmd, duthost, origin_crm_stats_used, origin_crm_stats_available, oper_used="==", oper_ava="=="): + crm_stats_used, crm_stats_available = get_crm_stats(cmd, duthost) + if eval("{} {} {}".format(crm_stats_used, oper_used, origin_crm_stats_used)) and \ + eval("{} {} {}".format(crm_stats_available, oper_ava, origin_crm_stats_available)): + return True + else: + return False + + def generate_neighbors(amount, ip_ver): """ Generate list of IPv4 or IPv6 addresses """ if ip_ver == "4": @@ -325,7 +343,7 @@ def configure_nexthop_groups(amount, interface, asichost, test_name): ip_addr_list = generate_neighbors(amount + 1, "4") ip_addr_list = " ".join([str(item) for item in ip_addr_list[1:]]) # Store CLI command to delete all created neighbors if test case will fail - RESTORE_CMDS[test_name].append(del_template.render(iface=interface, + RESTORE_CMDS[test_name].append(del_template.render(iface=interface, neigh_ip_list=ip_addr_list, namespace=asichost.namespace)) logger.info("Configuring {} nexthop groups".format(amount)) @@ -454,30 +472,30 @@ def test_crm_route(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_fro # Get NH IP cmd = "{ip_cmd} -{ip_ver} neigh show dev {crm_intf} nud reachable nud stale \ | grep -v fe80".format(ip_cmd = asichost.ip_cmd, - ip_ver=ip_ver, + ip_ver=ip_ver, crm_intf=crm_interface[0]) out = duthost.shell(cmd) pytest_assert(out["stdout"] != "", "Get Next Hop IP failed. Neighbor not found") nh_ip = [item.split()[0] for item in out["stdout"].split("\n") if "REACHABLE" in item][0] - # Add IPv[4/6] routes - # Cisco platforms need an upward of 10 routes for crm_stats_ipv4_route_available to decrement - if duthost.facts["asic_type"] in ["cisco-8000"] and ip_ver == '4': - total_routes = 10 + # Add IPv[4/6] routes + # Cisco platforms need an upward of 64 routes for crm_stats_ipv4_route_available to decrement + if is_cisco_device(duthost) and ip_ver == '4': + total_routes = 64 else: total_routes = 1 for i in range(total_routes): route_add = route_add_cmd.format(asichost.ip_cmd, i, nh_ip) logging.info("route add cmd: {}".format(route_add)) duthost.command(route_add) - + # Make sure CRM counters updated time.sleep(CRM_UPDATE_TIME) # Get new "crm_stats_ipv[4/6]_route" used and available counter value new_crm_stats_route_used, new_crm_stats_route_available = get_crm_stats(get_route_stats, duthost) logging.info(" new_crm_stats_route_used {}, new_crm_stats_route_available{} ".format( new_crm_stats_route_used, new_crm_stats_route_available)) - + # Verify "crm_stats_ipv[4/6]_route_used" counter was incremented if not (new_crm_stats_route_used - crm_stats_route_used == total_routes): @@ -532,32 +550,46 @@ def test_crm_route(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_fro # Make sure SONIC configure expected entries time.sleep(SONIC_RES_UPDATE_TIME) - # Get new "crm_stats_ipv[4/6]_route" used and available counter value - new_crm_stats_route_used, new_crm_stats_route_available = get_crm_stats(get_route_stats, duthost) - RESTORE_CMDS["wait"] = SONIC_RES_UPDATE_TIME # Verify thresholds for "IPv[4/6] route" CRM resource - verify_thresholds(duthost, asichost, crm_cli_res="ipv{ip_ver} route".format(ip_ver=ip_ver), - crm_used=new_crm_stats_route_used, crm_avail=new_crm_stats_route_available) + verify_thresholds(duthost, asichost, crm_cli_res="ipv{ip_ver} route".format(ip_ver=ip_ver), crm_cmd=get_route_stats) @pytest.mark.parametrize("ip_ver,nexthop", [("4", "2.2.2.2"), ("6", "2001::1")]) -def test_crm_nexthop(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_frontend_asic_index, crm_interface, ip_ver, nexthop): +def test_crm_nexthop(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_frontend_asic_index, \ + crm_interface, ip_ver, nexthop, ptfhost): duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] asichost = duthost.asic_instance(enum_frontend_asic_index) RESTORE_CMDS["crm_threshold_name"] = "ipv{ip_ver}_nexthop".format(ip_ver=ip_ver) - nexthop_add_cmd = "{ip_cmd} neigh replace {nexthop} \ + if duthost.facts["asic_type"] == "marvell": + if ip_ver == "4": + ptfhost.add_ip_to_dev('eth1', nexthop+'/24') + ptfhost.set_dev_up_or_down('eth1', 'is_up') + ip_add_cmd = "config interface ip add Ethernet1 2.2.2.1/24" + ip_remove_cmd = "config interface ip remove Ethernet1 2.2.2.1/24" + nexthop_add_cmd = "config route add prefix 99.99.99.0/24 nexthop {}".format(nexthop) + nexthop_del_cmd = "config route del prefix 99.99.99.0/24 nexthop {}".format(nexthop) + else: + ptfhost.add_ip_to_dev('eth1', nexthop+'/96') + ptfhost.set_dev_up_or_down('eth1', 'is_up') + ip_add_cmd = "config interface ip add Ethernet1 2001::2/64" + ip_remove_cmd = "config interface ip remove Ethernet1 2001::2/64" + nexthop_add_cmd = "config route add prefix 3001::0/64 nexthop {}".format(nexthop) + nexthop_del_cmd = "config route del prefix 3001::0/64 nexthop {}".format(nexthop) + asichost.sonichost.del_member_from_vlan(1000, 'Ethernet1') + asichost.shell(ip_add_cmd) + else: + nexthop_add_cmd = "{ip_cmd} neigh replace {nexthop} \ lladdr 11:22:33:44:55:66 dev {iface}"\ - .format(ip_cmd=asichost.ip_cmd, + .format(ip_cmd=asichost.ip_cmd, nexthop=nexthop, iface=crm_interface[0]) - nexthop_del_cmd = "{ip_cmd} neigh del {nexthop} \ + nexthop_del_cmd = "{ip_cmd} neigh del {nexthop} \ lladdr 11:22:33:44:55:66 dev {iface}"\ - .format(ip_cmd=asichost.ip_cmd, + .format(ip_cmd=asichost.ip_cmd, nexthop=nexthop, iface=crm_interface[0]) - # Get "crm_stats_ipv[4/6]_nexthop" used and available counter value get_nexthop_stats = "{db_cli} COUNTERS_DB HMGET CRM:STATS \ crm_stats_ipv{ip_ver}_nexthop_used \ @@ -565,41 +597,32 @@ def test_crm_nexthop(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_f .format(db_cli=asichost.sonic_db_cli, ip_ver=ip_ver) crm_stats_nexthop_used, crm_stats_nexthop_available = get_crm_stats(get_nexthop_stats, duthost) - # Add nexthop asichost.shell(nexthop_add_cmd) - # Make sure CRM counters updated - time.sleep(CRM_UPDATE_TIME) - - # Get new "crm_stats_ipv[4/6]_nexthop" used and available counter value - new_crm_stats_nexthop_used, new_crm_stats_nexthop_available = get_crm_stats(get_nexthop_stats, duthost) - - # Verify "crm_stats_ipv[4/6]_nexthop_used" counter was incremented - if not (new_crm_stats_nexthop_used - crm_stats_nexthop_used >= 1): - RESTORE_CMDS["test_crm_nexthop"].append(nexthop_del_cmd) - pytest.fail("\"crm_stats_ipv{}_nexthop_used\" counter was not incremented".format(ip_ver)) - # Verify "crm_stats_ipv[4/6]_nexthop_available" counter was decremented - if not (crm_stats_nexthop_available - new_crm_stats_nexthop_available >= 1): + logger.info("original crm_stats_nexthop_used is: {}, original crm_stats_nexthop_available is {}".format( + crm_stats_nexthop_used, crm_stats_nexthop_available)) + crm_stats_checker = wait_until(30, 5, 0, check_crm_stats, get_nexthop_stats, duthost, crm_stats_nexthop_used + 1, + crm_stats_nexthop_available - 1, ">=", "<=") + if not crm_stats_checker: RESTORE_CMDS["test_crm_nexthop"].append(nexthop_del_cmd) - pytest.fail("\"crm_stats_ipv{}_nexthop_available\" counter was not decremented".format(ip_ver)) - + pytest_assert(crm_stats_checker, + "\"crm_stats_ipv{}_nexthop_used\" counter was not incremented or " + "\"crm_stats_ipv{}_nexthop_available\" counter was not decremented".format(ip_ver, ip_ver)) # Remove nexthop asichost.shell(nexthop_del_cmd) - - # Make sure CRM counters updated - time.sleep(CRM_UPDATE_TIME) + if duthost.facts["asic_type"] == "marvell": + asichost.shell(ip_remove_cmd) + asichost.sonichost.add_member_to_vlan(1000, 'Ethernet1', is_tagged=False) + ptfhost.remove_ip_addresses() + crm_stats_checker = wait_until(30, 5, 0, check_crm_stats, get_nexthop_stats, duthost, crm_stats_nexthop_used, + crm_stats_nexthop_available) + pytest_assert(crm_stats_checker, + "\"crm_stats_ipv{}_nexthop_used\" counter was not decremented or " + "\"crm_stats_ipv{}_nexthop_available\" counter was not incremented".format(ip_ver, ip_ver)) # Get new "crm_stats_ipv[4/6]_nexthop" used and available counter value new_crm_stats_nexthop_used, new_crm_stats_nexthop_available = get_crm_stats(get_nexthop_stats, duthost) - - # Verify "crm_stats_ipv[4/6]_nexthop_used" counter was decremented - pytest_assert(new_crm_stats_nexthop_used - crm_stats_nexthop_used == 0, \ - "\"crm_stats_ipv{}_nexthop_used\" counter was not decremented".format(ip_ver)) - # Verify "crm_stats_ipv[4/6]_nexthop_available" counter was incremented - pytest_assert(new_crm_stats_nexthop_available - crm_stats_nexthop_available == 0, \ - "\"crm_stats_ipv{}_nexthop_available\" counter was not incremented".format(ip_ver)) - used_percent = get_used_percent(new_crm_stats_nexthop_used, new_crm_stats_nexthop_available) if used_percent < 1: neighbours_num = get_entries_num(new_crm_stats_nexthop_used, new_crm_stats_nexthop_available) @@ -611,14 +634,11 @@ def test_crm_nexthop(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_f # Make sure SONIC configure expected entries time.sleep(SONIC_RES_UPDATE_TIME) - # Get new "crm_stats_ipv[4/6]_nexthop" used and available counter value - new_crm_stats_nexthop_used, new_crm_stats_nexthop_available = get_crm_stats(get_nexthop_stats, duthost) - RESTORE_CMDS["wait"] = SONIC_RES_UPDATE_TIME # Verify thresholds for "IPv[4/6] nexthop" CRM resource - verify_thresholds(duthost,asichost, crm_cli_res="ipv{ip_ver} nexthop".format(ip_ver=ip_ver), crm_used=new_crm_stats_nexthop_used, - crm_avail=new_crm_stats_nexthop_available) + verify_thresholds(duthost, asichost, crm_cli_res="ipv{ip_ver} nexthop".format(ip_ver=ip_ver), + crm_cmd=get_nexthop_stats) @pytest.mark.parametrize("ip_ver,neighbor,host", [("4", "2.2.2.2", "2.2.2.1/8"), ("6", "2001::1", "2001::2/64")]) @@ -640,50 +660,39 @@ def test_crm_neighbor(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_ crm_stats_neighbor_used, crm_stats_neighbor_available = get_crm_stats(get_neighbor_stats, duthost) # Add reachability to the neighbor - if duthost.facts["asic_type"] in ["cisco-8000"]: + if is_cisco_device(duthost): asichost.config_ip_intf(crm_interface[0], host, "add") # Add neighbor asichost.shell(neighbor_add_cmd) - # Make sure CRM counters updated - time.sleep(CRM_UPDATE_TIME) - - # Get new "crm_stats_ipv[4/6]_neighbor" used and available counter value - new_crm_stats_neighbor_used, new_crm_stats_neighbor_available = get_crm_stats(get_neighbor_stats, duthost) - - # Verify "crm_stats_ipv4_neighbor_used" counter was incremented - if not (new_crm_stats_neighbor_used - crm_stats_neighbor_used >= 1): - RESTORE_CMDS["test_crm_neighbor"].append(neighbor_del_cmd) - pytest.fail("\"crm_stats_ipv4_neighbor_used\" counter was not incremented") - # Verify "crm_stats_ipv4_neighbor_available" counter was decremented - if not (crm_stats_neighbor_available - new_crm_stats_neighbor_available >= 1): - RESTORE_CMDS["test_crm_neighbor"].append(neighbor_del_cmd) - pytest.fail("\"crm_stats_ipv4_neighbor_available\" counter was not decremented") + crm_stats_checker = wait_until(30, 5, 0, check_crm_stats, get_neighbor_stats, duthost, crm_stats_neighbor_used, + crm_stats_neighbor_available, ">", "<") + if not crm_stats_checker: + RESTORE_CMDS["test_crm_nexthop"].append(neighbor_del_cmd) + pytest_assert(crm_stats_checker, + "\"crm_stats_ipv4_neighbor_used\" counter was not incremented or " + "\"crm_stats_ipv4_neighbor_available\" counter was not decremented") # Remove reachability to the neighbor - if duthost.facts["asic_type"] in ["cisco-8000"]: + if is_cisco_device(duthost): asichost.config_ip_intf(crm_interface[0], host, "remove") # Remove neighbor asichost.shell(neighbor_del_cmd) - # Make sure CRM counters updated - time.sleep(CRM_UPDATE_TIME) + crm_stats_checker = wait_until(30, 5, 0, check_crm_stats, get_neighbor_stats, duthost, crm_stats_neighbor_used, + crm_stats_neighbor_available, ">=", "==") + pytest_assert(crm_stats_checker, + "\"crm_stats_ipv4_neighbor_used\" counter was not decremented or " + "\"crm_stats_ipv4_neighbor_available\" counter was not incremented".format( + ip_ver, ip_ver)) # Get new "crm_stats_ipv[4/6]_neighbor" used and available counter value new_crm_stats_neighbor_used, new_crm_stats_neighbor_available = get_crm_stats(get_neighbor_stats, duthost) - - # Verify "crm_stats_ipv4_neighbor_used" counter was decremented - pytest_assert(new_crm_stats_neighbor_used - crm_stats_neighbor_used >= 0, \ - "\"crm_stats_ipv4_neighbor_used\" counter was not decremented") - # Verify "crm_stats_ipv4_neighbor_available" counter was incremented - pytest_assert(new_crm_stats_neighbor_available - crm_stats_neighbor_available == 0, \ - "\"crm_stats_ipv4_neighbor_available\" counter was not incremented") - used_percent = get_used_percent(new_crm_stats_neighbor_used, new_crm_stats_neighbor_available) if used_percent < 1: # Add 3k neighbors instead of 1 percentage for Cisco-8000 devices - neighbours_num = get_entries_num(new_crm_stats_neighbor_used, new_crm_stats_neighbor_available) if duthost.facts["asic_type"] not in ["cisco-8000"] else CISCO_8000_ADD_NEIGHBORS - + neighbours_num = CISCO_8000_ADD_NEIGHBORS if is_cisco_device(duthost) else get_entries_num(new_crm_stats_neighbor_used, new_crm_stats_neighbor_available) + # Add new neighbor entries to correctly calculate used CRM resources in percentage configure_neighbors(amount=neighbours_num, interface=crm_interface[0], ip_ver=ip_ver, asichost=asichost, test_name="test_crm_neighbor") @@ -692,14 +701,11 @@ def test_crm_neighbor(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_ # Make sure SONIC configure expected entries time.sleep(SONIC_RES_UPDATE_TIME) - # Get new "crm_stats_ipv[4/6]_neighbor" used and available counter value - new_crm_stats_neighbor_used, new_crm_stats_neighbor_available = get_crm_stats(get_neighbor_stats, duthost) - RESTORE_CMDS["wait"] = SONIC_RES_UPDATE_TIME # Verify thresholds for "IPv[4/6] neighbor" CRM resource - verify_thresholds(duthost, asichost, crm_cli_res="ipv{ip_ver} neighbor".format(ip_ver=ip_ver), crm_used=new_crm_stats_neighbor_used, - crm_avail=new_crm_stats_neighbor_available) + verify_thresholds(duthost, asichost, crm_cli_res="ipv{ip_ver} neighbor".format(ip_ver=ip_ver), + crm_cmd=get_neighbor_stats) @pytest.mark.parametrize("group_member,network", [(False, "2.2.2.0/24"), (True, "2.2.2.0/24")]) @@ -710,7 +716,7 @@ def test_crm_nexthop_group(duthosts, enum_rand_one_per_hwsku_frontend_hostname, nhg_del_template=""" %s ip -4 {{ns_prefix}} route del 3.3.3.0/24 dev {{iface}} - ip -4 {{ns_prefix}} route del 4.4.4.0/24 dev {{iface2}} + ip -4 {{ns_prefix}} route del 4.4.4.0/24 dev {{iface2}} ip {{ns_prefix}} neigh del 3.3.3.1 lladdr 11:22:33:44:55:66 dev {{iface}} ip {{ns_prefix}} neigh del 4.4.4.1 lladdr 77:22:33:44:55:66 dev {{iface2}} ip -4 {{ns_prefix}} route del {{prefix}} nexthop via 3.3.3.1 nexthop via 4.4.4.1""" %(NS_PREFIX_TEMPLATE) @@ -736,7 +742,7 @@ def test_crm_nexthop_group(duthosts, enum_rand_one_per_hwsku_frontend_hostname, crm_stats_nexthop_group_member_available" \ .format(asichost.sonic_db_cli) - + # Get "crm_stats_nexthop_group_[member]" used and available counter value get_nexthop_group_stats = get_group_member_stats if group_member else get_group_stats get_nexthop_group_another_stats = get_group_stats if group_member else get_group_member_stats @@ -751,46 +757,38 @@ def test_crm_nexthop_group(duthosts, enum_rand_one_per_hwsku_frontend_hostname, logger.info("Add nexthop groups") duthost.shell(cmd) - # Make sure CRM counters updated - time.sleep(CRM_UPDATE_TIME) - - # Get new "crm_stats_nexthop_group_[member]" used and available counter value - new_nexthop_group_used, new_nexthop_group_available = get_crm_stats(get_nexthop_group_stats, duthost) - if group_member: template_resource = 2 else: template_resource = 1 - # Verify "crm_stats_nexthop_group_[member]_used" counter was incremented - if not (new_nexthop_group_used - nexthop_group_used == template_resource): - RESTORE_CMDS["test_crm_nexthop_group"].append(del_template.render(\ - iface=crm_interface[0], iface2=crm_interface[1], prefix=network, namespace=asichost.namespace)) - pytest.fail("\"crm_stats_nexthop_group_{}used\" counter was not incremented".format("member_" if group_member else "")) - - # Verify "crm_stats_nexthop_group_[member]_available" counter was decremented - if not (nexthop_group_available - new_nexthop_group_available >= template_resource): + crm_stats_checker = wait_until(30, 5, 0, check_crm_stats, get_nexthop_group_stats, duthost, + nexthop_group_used + template_resource, + nexthop_group_available + template_resource, "==", "<=") + if not crm_stats_checker: RESTORE_CMDS["test_crm_nexthop_group"].append(del_template.render(\ iface=crm_interface[0], iface2=crm_interface[1], prefix=network, namespace=asichost.namespace)) - pytest.fail("\"crm_stats_nexthop_group_{}available\" counter was not decremented".format("member_" if group_member else "")) + nexthop_group_name = "member_" if group_member else "" + pytest_assert(crm_stats_checker, + "\"crm_stats_nexthop_group_{}used\" counter was not incremented or " + "\"crm_stats_nexthop_group_{}available\" counter was not decremented".format(nexthop_group_name, + nexthop_group_name)) # Remove nexthop group members logger.info("Removing nexthop groups") duthost.shell(del_template.render(iface=crm_interface[0], iface2=crm_interface[1], prefix=network, namespace=asichost.namespace)) - # Make sure CRM counters updated - time.sleep(CRM_UPDATE_TIME) + crm_stats_checker = wait_until(30, 5, 0, check_crm_stats, get_nexthop_group_stats, duthost, + nexthop_group_used, + nexthop_group_available) + nexthop_group_name = "member_" if group_member else "" + pytest_assert(crm_stats_checker, + "\"crm_stats_nexthop_group_{}used\" counter was not decremented or " + "\"crm_stats_nexthop_group_{}available\" counter was not incremented".format( + nexthop_group_name, nexthop_group_name)) # Get new "crm_stats_nexthop_group_[member]" used and available counter value new_nexthop_group_used, new_nexthop_group_available = get_crm_stats(get_nexthop_group_stats, duthost) - # Verify "crm_stats_nexthop_group_[member]_used" counter was decremented - pytest_assert(new_nexthop_group_used - nexthop_group_used == 0, \ - "\"crm_stats_nexthop_group_{}used\" counter was not decremented".format("member_" if group_member else "")) - - # Verify "crm_stats_nexthop_group_[member]_available" counter was incremented - pytest_assert(new_nexthop_group_available - nexthop_group_available == 0, \ - "\"crm_stats_nexthop_group_{}available\" counter was not incremented".format("member_" if group_member else "")) - #Preconfiguration needed for used percentage verification used_percent = get_used_percent(new_nexthop_group_used, new_nexthop_group_available) if used_percent < 1: @@ -808,81 +806,111 @@ def test_crm_nexthop_group(duthosts, enum_rand_one_per_hwsku_frontend_hostname, # Make sure SONIC configure expected entries time.sleep(SONIC_RES_UPDATE_TIME) - # Get new "crm_stats_ipv[4/6]_neighbor" used and available counter value - new_nexthop_group_used, new_nexthop_group_available = get_crm_stats(get_nexthop_group_stats, duthost) - RESTORE_CMDS["wait"] = SONIC_RES_UPDATE_TIME - verify_thresholds(duthost, asichost, crm_cli_res=redis_threshold, crm_used=new_nexthop_group_used, - crm_avail=new_nexthop_group_available) + verify_thresholds(duthost, asichost, crm_cli_res=redis_threshold, crm_cmd=get_nexthop_group_stats) -def test_acl_entry(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_frontend_asic_index, collector): +@pytest.fixture +def recover_acl_rule(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_frontend_asic_index, collector): duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] asichost = duthost.asic_instance(enum_frontend_asic_index) - asic_collector = collector[asichost.asic_index] - - apply_acl_config(duthost, asichost, "test_acl_entry", asic_collector) - acl_tbl_key = asic_collector["acl_tbl_key"] - get_acl_entry_stats = "{db_cli} COUNTERS_DB HMGET {acl_tbl_key} \ - crm_stats_acl_entry_used \ - crm_stats_acl_entry_available"\ - .format(db_cli=asichost.sonic_db_cli, - acl_tbl_key=acl_tbl_key) base_dir = os.path.dirname(os.path.realpath(__file__)) template_dir = os.path.join(base_dir, "templates") acl_rules_template = "acl.json" - dut_tmp_dir = "/tmp" - RESTORE_CMDS["crm_threshold_name"] = "acl_entry" + dut_tmp_dir = "/tmp-{}".format(asichost.asic_index) + dut_conf_file_path = os.path.join(dut_tmp_dir, acl_rules_template) - crm_stats_acl_entry_used = 0 - crm_stats_acl_entry_available = 0 + pre_acl_rules = duthost.acl_facts()["ansible_facts"]["ansible_acl_facts"]["DATAACL"]["rules"] - # Get new "crm_stats_acl_entry" used and available counter value - new_crm_stats_acl_entry_used, new_crm_stats_acl_entry_available = get_crm_stats(get_acl_entry_stats, duthost) + yield - # Verify "crm_stats_acl_entry_used" counter was incremented - pytest_assert(new_crm_stats_acl_entry_used - crm_stats_acl_entry_used == 2, \ - "\"crm_stats_acl_entry_used\" counter was not incremented") + if pre_acl_rules: + for key, value in pre_acl_rules.items(): + if key != "DEFAULT_RULE": + seq_id = key.split('_')[1] + acl_config = json.loads(open(os.path.join(template_dir, acl_rules_template)).read()) + acl_entry_template = \ + acl_config["acl"]["acl-sets"]["acl-set"]["dataacl"]["acl-entries"]["acl-entry"]["1"] + acl_entry_config = acl_config["acl"]["acl-sets"]["acl-set"]["dataacl"]["acl-entries"]["acl-entry"] - crm_stats_acl_entry_available = new_crm_stats_acl_entry_available + new_crm_stats_acl_entry_used + acl_entry_config[seq_id] = copy.deepcopy(acl_entry_template) + acl_entry_config[seq_id]["config"]["sequence-id"] = seq_id + acl_entry_config[seq_id]["l2"]["config"]["ethertype"] = value["ETHER_TYPE"] + acl_entry_config[seq_id]["l2"]["config"]["vlan_id"] = value["VLAN_ID"] + acl_entry_config[seq_id]["input_interface"]["interface_ref"]["config"]["interface"] = value["IN_PORTS"] - used_percent = get_used_percent(new_crm_stats_acl_entry_used, new_crm_stats_acl_entry_available) - if used_percent < 1: - # Preconfiguration needed for used percentage verification - nexthop_group_num = get_entries_num(new_crm_stats_acl_entry_used, new_crm_stats_acl_entry_available) + with tempfile.NamedTemporaryFile(suffix=".json", prefix="acl_config", mode="w") as fp: + json.dump(acl_config, fp) + fp.flush() + logger.info("Generating config for ACL rule, ACL table - DATAACL") + duthost.template(src=fp.name, dest=dut_conf_file_path, force=True) - apply_acl_config(duthost, asichost, "test_acl_entry", asic_collector, nexthop_group_num) + logger.info("Applying {}".format(dut_conf_file_path)) + duthost.command("acl-loader update full {}".format(dut_conf_file_path)) - logger.info("Waiting {} seconds for SONiC to update resources...".format(SONIC_RES_UPDATE_TIME)) - # Make sure SONIC configure expected entries - time.sleep(SONIC_RES_UPDATE_TIME) +def test_acl_entry(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_frontend_asic_index, collector): + duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] + data_acl = get_data_acl(duthost) + asichost = duthost.asic_instance(enum_frontend_asic_index) + asic_collector = collector[asichost.asic_index] + try: + apply_acl_config(duthost, asichost, "test_acl_entry", asic_collector) + acl_tbl_key = asic_collector["acl_tbl_key"] + get_acl_entry_stats = "{db_cli} COUNTERS_DB HMGET {acl_tbl_key} \ + crm_stats_acl_entry_used \ + crm_stats_acl_entry_available"\ + .format(db_cli=asichost.sonic_db_cli, + acl_tbl_key=acl_tbl_key) + + base_dir = os.path.dirname(os.path.realpath(__file__)) + template_dir = os.path.join(base_dir, "templates") + acl_rules_template = "acl.json" + dut_tmp_dir = "/tmp" + + RESTORE_CMDS["crm_threshold_name"] = "acl_entry" + + crm_stats_acl_entry_used = 0 + crm_stats_acl_entry_available = 0 + + # Get new "crm_stats_acl_entry" used and available counter value new_crm_stats_acl_entry_used, new_crm_stats_acl_entry_available = get_crm_stats(get_acl_entry_stats, duthost) - # Verify thresholds for "ACL entry" CRM resource - verify_thresholds(duthost,asichost, crm_cli_res="acl group entry", crm_used=new_crm_stats_acl_entry_used, - crm_avail=new_crm_stats_acl_entry_available) + # Verify "crm_stats_acl_entry_used" counter was incremented + pytest_assert(new_crm_stats_acl_entry_used - crm_stats_acl_entry_used == 2, \ + "\"crm_stats_acl_entry_used\" counter was not incremented") - # Remove ACL - duthost.command("acl-loader delete") + crm_stats_acl_entry_available = new_crm_stats_acl_entry_available + new_crm_stats_acl_entry_used - # Make sure CRM counters updated - time.sleep(CRM_UPDATE_TIME) + used_percent = get_used_percent(new_crm_stats_acl_entry_used, new_crm_stats_acl_entry_available) + if used_percent < 1: + # Preconfiguration needed for used percentage verification + nexthop_group_num = get_entries_num(new_crm_stats_acl_entry_used, new_crm_stats_acl_entry_available) + + apply_acl_config(duthost, asichost, "test_acl_entry", asic_collector, nexthop_group_num) - # Get new "crm_stats_acl_entry" used and available counter value - new_crm_stats_acl_entry_used, new_crm_stats_acl_entry_available = get_crm_stats(get_acl_entry_stats, duthost) + logger.info("Waiting {} seconds for SONiC to update resources...".format(SONIC_RES_UPDATE_TIME)) + # Make sure SONIC configure expected entries + time.sleep(SONIC_RES_UPDATE_TIME) - # Verify "crm_stats_acl_entry_used" counter was decremented - pytest_assert(new_crm_stats_acl_entry_used - crm_stats_acl_entry_used == 0, \ - "\"crm_stats_acl_entry_used\" counter was not decremented") + # Verify thresholds for "ACL entry" CRM resource + verify_thresholds(duthost, asichost, crm_cli_res="acl group entry", crm_cmd=get_acl_entry_stats) - # Verify "crm_stats_acl_entry_available" counter was incremented - pytest_assert(new_crm_stats_acl_entry_available - crm_stats_acl_entry_available == 0, \ - "\"crm_stats_acl_entry_available\" counter was not incremented") + # Remove ACL + duthost.command("acl-loader delete") + crm_stats_checker = wait_until(30, 5, 0, check_crm_stats, get_acl_entry_stats, duthost, + crm_stats_acl_entry_used, + crm_stats_acl_entry_available) + pytest_assert(crm_stats_checker, + "\"crm_stats_acl_entry_used\" counter was not decremented or " + "\"crm_stats_acl_entry_available\" counter was not incremented") + finally: + if data_acl: + RESTORE_CMDS["test_acl_entry"].append({"data_acl": data_acl}) def test_acl_counter(duthosts, enum_rand_one_per_hwsku_frontend_hostname,enum_frontend_asic_index, collector): duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] @@ -940,36 +968,30 @@ def test_acl_counter(duthosts, enum_rand_one_per_hwsku_frontend_hostname,enum_fr crm_stats_acl_counter_available = new_crm_stats_acl_counter_available + new_crm_stats_acl_counter_used # Verify thresholds for "ACL entry" CRM resource - verify_thresholds(duthost, asichost, crm_cli_res="acl group counter", crm_used=new_crm_stats_acl_counter_used, - crm_avail=new_crm_stats_acl_counter_available) + verify_thresholds(duthost, asichost, crm_cli_res="acl group counter", crm_cmd=get_acl_counter_stats) # Remove ACL duthost.command("acl-loader delete") - - # Make sure CRM counters updated - time.sleep(CRM_UPDATE_TIME) - - # Get new "crm_stats_acl_counter" used and available counter value - new_crm_stats_acl_counter_used, new_crm_stats_acl_counter_available = get_crm_stats(get_acl_counter_stats, duthost) - - # Verify "crm_stats_acl_counter_used" counter was decremented - pytest_assert(new_crm_stats_acl_counter_used - crm_stats_acl_counter_used == 0, \ - "\"crm_stats_acl_counter_used\" counter was not decremented") - - # Verify "crm_stats_acl_counter_available" counter was incremented - pytest_assert(new_crm_stats_acl_counter_available - crm_stats_acl_counter_available >= 0, \ - "\"crm_stats_acl_counter_available\" counter was not incremented") + crm_stats_checker = wait_until(30, 5, 0, check_crm_stats, get_acl_counter_stats, duthost, + crm_stats_acl_counter_used, + crm_stats_acl_counter_available, "==", ">=") + pytest_assert(crm_stats_checker, + "\"crm_stats_acl_counter_used\" counter was not decremented or " + "\"crm_stats_acl_counter_available\" counter was not incremented") # Verify "crm_stats_acl_counter_available" counter was equal to original value - pytest_assert(original_crm_stats_acl_counter_available - new_crm_stats_acl_counter_available == 0, \ - "\"crm_stats_acl_counter_available\" counter is not equal to original value") + _, new_crm_stats_acl_counter_available = get_crm_stats(get_acl_counter_stats, duthost) + pytest_assert(original_crm_stats_acl_counter_available - new_crm_stats_acl_counter_available == 0, + "\"crm_stats_acl_counter_available\" counter is not equal to original value") + @pytest.mark.usefixtures('disable_fdb_aging') def test_crm_fdb_entry(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_frontend_asic_index, tbinfo): duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] asichost = duthost.asic_instance(enum_frontend_asic_index) - if "t0" not in tbinfo["topo"]["name"].lower(): - pytest.skip("Unsupported topology, expected to run only on 'T0*' topology") + topo_name_lower = tbinfo["topo"]["name"].lower() + if "t0" not in topo_name_lower and "m0" not in topo_name_lower: + pytest.skip("Unsupported topology, expected to run only on 'T0*' or 'M0' topology") get_fdb_stats = "redis-cli --raw -n 2 HMGET CRM:STATS crm_stats_fdb_entry_used crm_stats_fdb_entry_available" topology = tbinfo["topo"]["properties"]["topology"] cfg_facts = duthost.config_facts(host=duthost.hostname, source="persistent")['ansible_facts'] @@ -1005,6 +1027,9 @@ def test_crm_fdb_entry(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum cmd = "fdbclear" duthost.command(cmd) time.sleep(5) + if is_cel_e1031_device(duthost): + # Sleep more time for E1031 device after fdbclear + time.sleep(10) # Get "crm_stats_fdb_entry" used and available counter value crm_stats_fdb_entry_used, crm_stats_fdb_entry_available = get_crm_stats(get_fdb_stats, duthost) @@ -1015,12 +1040,28 @@ def test_crm_fdb_entry(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum new_crm_stats_fdb_entry_used, new_crm_stats_fdb_entry_available = get_crm_stats(get_fdb_stats, duthost) # Verify "crm_stats_fdb_entry_used" counter was incremented - pytest_assert(new_crm_stats_fdb_entry_used - crm_stats_fdb_entry_used == 1, \ - "Counter 'crm_stats_fdb_entry_used' was not incremented") + # For Cisco-8000 devices, hardware FDB counter is statistical-based with +/- 1 entry tolerance. + # Hence, the used counter can increase by more than 1. + # For E1031, refer CS00012270660, SDK for Helix4 chip does not support retrieving max l2 entry, + # HW and SW CRM available counter would be out of sync and increase by more than 1. + if is_cisco_device(duthost) or is_cel_e1031_device(duthost): + pytest_assert(new_crm_stats_fdb_entry_used - crm_stats_fdb_entry_used >= 1, \ + "Counter 'crm_stats_fdb_entry_used' was not incremented") + else: + pytest_assert(new_crm_stats_fdb_entry_used - crm_stats_fdb_entry_used == 1, \ + "Counter 'crm_stats_fdb_entry_used' was not incremented") # Verify "crm_stats_fdb_entry_available" counter was decremented - pytest_assert(crm_stats_fdb_entry_available - new_crm_stats_fdb_entry_available == 1, \ - "Counter 'crm_stats_fdb_entry_available' was not incremented") + # For Cisco-8000 devices, hardware FDB counter is statistical-based with +/- 1 entry tolerance. + # Hence, the available counter can decrease by more than 1. + # For E1031, refer CS00012270660, SDK for Helix4 chip does not support retrieving max l2 entry, + # HW and SW CRM available counter would be out of sync and decrease by more than 1. + if is_cisco_device(duthost) or is_cel_e1031_device(duthost): + pytest_assert(crm_stats_fdb_entry_available - new_crm_stats_fdb_entry_available >= 1, \ + "Counter 'crm_stats_fdb_entry_available' was not decremented") + else: + pytest_assert(crm_stats_fdb_entry_available - new_crm_stats_fdb_entry_available == 1, \ + "Counter 'crm_stats_fdb_entry_available' was not decremented") used_percent = get_used_percent(new_crm_stats_fdb_entry_used, new_crm_stats_fdb_entry_available) if used_percent < 1: @@ -1034,14 +1075,11 @@ def test_crm_fdb_entry(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum logger.info("Waiting {} seconds for SONiC to update resources...".format(SONIC_RES_UPDATE_TIME)) # Make sure SONIC configure expected entries time.sleep(SONIC_RES_UPDATE_TIME) - # Get new "crm_stats_fdb_entry" used and available counter value - new_crm_stats_fdb_entry_used, new_crm_stats_fdb_entry_available = get_crm_stats(get_fdb_stats, duthost) RESTORE_CMDS["wait"] = SONIC_RES_UPDATE_TIME # Verify thresholds for "FDB entry" CRM resource - verify_thresholds(duthost, asichost, crm_cli_res="fdb", crm_used=new_crm_stats_fdb_entry_used, - crm_avail=new_crm_stats_fdb_entry_available) + verify_thresholds(duthost, asichost, crm_cli_res="fdb", crm_cmd=get_fdb_stats) # Remove FDB entry cmd = "fdbclear" @@ -1049,7 +1087,7 @@ def test_crm_fdb_entry(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum # Make sure CRM counters updated time.sleep(CRM_UPDATE_TIME) - # Timeout for asyc fdb clear + # Timeout for asyc fdb clear FDB_CLEAR_TIMEOUT = 10 while FDB_CLEAR_TIMEOUT > 0: # Get new "crm_stats_fdb_entry" used and available counter value @@ -1064,5 +1102,8 @@ def test_crm_fdb_entry(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum Used == {}".format(new_crm_stats_fdb_entry_used)) # Verify "crm_stats_fdb_entry_available" counter was incremented - pytest_assert(new_crm_stats_fdb_entry_available - crm_stats_fdb_entry_available >= 0, \ - "Counter 'crm_stats_fdb_entry_available' was not incremented") + # For E1031, refer CS00012270660, SDK for Helix4 chip does not support retrieving max l2 entry, HW and + # SW CRM available counter would be out of sync, so this is not applicable for e1031 device + if not is_cel_e1031_device(duthost): + pytest_assert(new_crm_stats_fdb_entry_available - crm_stats_fdb_entry_available >= 0, \ + "Counter 'crm_stats_fdb_entry_available' was not incremented") diff --git a/tests/decap/test_decap.py b/tests/decap/test_decap.py index 57e973c5b46..f51d6e890b2 100644 --- a/tests/decap/test_decap.py +++ b/tests/decap/test_decap.py @@ -1,3 +1,12 @@ +''' +IPinIP Decap configs for different ASICs: +Table Name in APP_DB: TUNNEL_DECAP_TABLE:IPINIP_TUNNEL + +Config Mellanox <= [201911] Mellanox >= [202012] Broadcom <= [201911] Broadcom >= [202012] +dscp_mode uniform uniform pipe uniform +ecn_mode standard standard copy_from_outer copy_from_outer +ttl_mode pipe pipe pipe pipe +''' import json import logging from datetime import datetime @@ -28,18 +37,6 @@ pytest.mark.topology('any') ] - -@pytest.fixture -def ttl_dscp_params(duthost, supported_ttl_dscp_params): - if "uniform" in supported_ttl_dscp_params.values() and ("201811" in duthost.os_version or "201911" in duthost.os_version): - pytest.skip('uniform ttl/dscp mode is available from 202012. Current version is %s' % duthost.os_version) - - if supported_ttl_dscp_params['dscp'] == 'pipe' and duthost.facts['asic_type'] in ['cisco-8000']: - pytest.skip('dscp pipe mode is currently not supported for Cisco 8000 platform') - - return supported_ttl_dscp_params - - def remove_default_decap_cfg(duthosts): for duthost in duthosts: logger.info('Remove default decap cfg on {}'.format(duthost.hostname)) @@ -148,24 +145,6 @@ def apply_decap_cfg(duthosts, ip_ver, loopback_ips, ttl_mode, dscp_mode, ecn_mod duthost.shell_cmds(cmds=cmds) duthost.shell('rm /tmp/decap_conf_{}.json'.format(op)) - -@pytest.fixture -def decap_config(duthosts, ttl_dscp_params, ip_ver, loopback_ips): - ecn_mode = "copy_from_outer" - ttl_mode = ttl_dscp_params['ttl'] - dscp_mode = ttl_dscp_params['dscp'] - if duthosts[0].facts['asic_type'] in ['mellanox']: - ecn_mode = 'standard' - - # Add test decap configuration - apply_decap_cfg(duthosts, ip_ver, loopback_ips, ttl_mode, dscp_mode, ecn_mode, 'SET') - - yield ttl_mode, dscp_mode - - # Remove test decap configuration - apply_decap_cfg(duthosts, ip_ver, loopback_ips, ttl_mode, dscp_mode, ecn_mode, 'DEL') - - def set_mux_side(tbinfo, mux_server_url, side): if 'dualtor' in tbinfo['topo']['name']: res = requests.post(mux_server_url, json={"active_side": side}) @@ -173,40 +152,50 @@ def set_mux_side(tbinfo, mux_server_url, side): return res.json() # Response is new mux_status of all mux Y-cables. return {} - @pytest.fixture def set_mux_random(tbinfo, mux_server_url): return set_mux_side(tbinfo, mux_server_url, 'random') - -def test_decap(tbinfo, duthosts, ptfhost, setup_teardown, decap_config, mux_server_url, set_mux_random): - +def test_decap(tbinfo, duthosts, ptfhost, setup_teardown, mux_server_url, set_mux_random, supported_ttl_dscp_params, ip_ver, loopback_ips): setup_info = setup_teardown + asic_type = duthosts[0].facts["asic_type"] + ecn_mode = "copy_from_outer" + ttl_mode = supported_ttl_dscp_params['ttl'] + dscp_mode = supported_ttl_dscp_params['dscp'] + if duthosts[0].facts['asic_type'] in ['mellanox']: + ecn_mode = 'standard' - ttl_mode, dscp_mode = decap_config - - if 'dualtor' in tbinfo['topo']['name']: - wait(30, 'Wait some time for mux active/standby state to be stable after toggled mux state') - - log_file = "/tmp/decap.{}.log".format(datetime.now().strftime('%Y-%m-%d-%H:%M:%S')) - ptf_runner(ptfhost, - "ptftests", - "IP_decap_test.DecapPacketTest", - platform_dir="ptftests", - params={"outer_ipv4": setup_info["outer_ipv4"], - "outer_ipv6": setup_info["outer_ipv6"], - "inner_ipv4": setup_info["inner_ipv4"], - "inner_ipv6": setup_info["inner_ipv6"], - "lo_ips": setup_info["lo_ips"], - "lo_ipv6s": setup_info["lo_ipv6s"], - "router_macs": setup_info["router_macs"], - "ttl_mode": ttl_mode, - "dscp_mode": dscp_mode, - "ignore_ttl": setup_info["ignore_ttl"], - "max_internal_hops": setup_info["max_internal_hops"], - "fib_info_files": setup_info["fib_info_files"], - "single_fib_for_duts": setup_info["single_fib_for_duts"], - "ptf_test_port_map": ptf_test_port_map(ptfhost, tbinfo, duthosts, mux_server_url) - }, - qlen=PTFRUNNER_QLEN, - log_file=log_file) + try: + apply_decap_cfg(duthosts, ip_ver, loopback_ips, ttl_mode, dscp_mode, ecn_mode, 'SET') + + if 'dualtor' in tbinfo['topo']['name']: + wait(30, 'Wait some time for mux active/standby state to be stable after toggled mux state') + + log_file = "/tmp/decap.{}.log".format(datetime.now().strftime('%Y-%m-%d-%H:%M:%S')) + ptf_runner(ptfhost, + "ptftests", + "IP_decap_test.DecapPacketTest", + platform_dir="ptftests", + params={"outer_ipv4": setup_info["outer_ipv4"], + "outer_ipv6": setup_info["outer_ipv6"], + "inner_ipv4": setup_info["inner_ipv4"], + "inner_ipv6": setup_info["inner_ipv6"], + "lo_ips": setup_info["lo_ips"], + "lo_ipv6s": setup_info["lo_ipv6s"], + "router_macs": setup_info["router_macs"], + "ttl_mode": ttl_mode, + "dscp_mode": dscp_mode, + "asic_type": asic_type, + "ignore_ttl": setup_info["ignore_ttl"], + "max_internal_hops": setup_info["max_internal_hops"], + "fib_info_files": setup_info["fib_info_files"], + "single_fib_for_duts": setup_info["single_fib_for_duts"], + "ptf_test_port_map": ptf_test_port_map(ptfhost, tbinfo, duthosts, mux_server_url) + }, + qlen=PTFRUNNER_QLEN, + log_file=log_file) + except Exception as detail: + raise Exception(detail) + finally: + # Remove test decap configuration + apply_decap_cfg(duthosts, ip_ver, loopback_ips, ttl_mode, dscp_mode, ecn_mode, 'DEL') diff --git a/tests/dhcp_relay/test_dhcp_pkt_fwd.py b/tests/dhcp_relay/test_dhcp_pkt_fwd.py index 4268824cca6..1dbabc91081 100644 --- a/tests/dhcp_relay/test_dhcp_pkt_fwd.py +++ b/tests/dhcp_relay/test_dhcp_pkt_fwd.py @@ -10,7 +10,7 @@ from socket import INADDR_ANY pytestmark = [ - pytest.mark.topology("t1") + pytest.mark.topology("t1", "m0") ] logger = logging.getLogger(__name__) @@ -79,8 +79,9 @@ def dutPorts(self, duthosts, rand_one_dut_hostname, tbinfo): dict: contains downstream/upstream ports information """ duthost = duthosts[rand_one_dut_hostname] - if "t1" not in tbinfo["topo"]["name"]: - pytest.skip("Unsupported topology") + topo_name = tbinfo["topo"]["name"] + if "t1" not in topo_name and topo_name != "m0": + pytest.skip("Unsupported topology: {}".format(topo_name)) downstreamPorts = [] upstreamPorts = [] @@ -88,9 +89,9 @@ def dutPorts(self, duthosts, rand_one_dut_hostname, tbinfo): mgFacts = duthost.get_extended_minigraph_facts(tbinfo) for dutPort, neigh in mgFacts["minigraph_neighbors"].items(): - if "T0" in neigh["name"]: + if "t1" in topo_name and "T0" in neigh["name"] or topo_name == "m0" and "MX" in neigh["name"]: downstreamPorts.append(dutPort) - elif "T2" in neigh["name"]: + elif "t1" in topo_name and "T2" in neigh["name"] or topo_name == "m0" and "M1" in neigh["name"]: upstreamPorts.append(dutPort) yield {"upstreamPorts": upstreamPorts, "downstreamPorts": downstreamPorts} @@ -206,8 +207,8 @@ def createDhcpRequestRelayedPacket(self, dutMac): packet: DHCP Request packet """ ether = scapy.Ether(dst=dutMac, src=self.DHCP_RELAY["mac"], type=0x0800) - ip = scapy.IP(src=self.DHCP_RELAY["loopback"], dst=self.DHCP_SERVER["ip"], len=336, ttl=64) - udp = scapy.UDP(sport=self.DHCP_SERVER["port"], dport=self.DHCP_SERVER["port"], len=316) + ip = scapy.IP(src=self.DHCP_RELAY["loopback"], dst=self.DHCP_SERVER["ip"], len=328, ttl=64) + udp = scapy.UDP(sport=self.DHCP_SERVER["port"], dport=self.DHCP_SERVER["port"], len=308) bootp = scapy.BOOTP( op=1, htype=1, diff --git a/tests/dhcp_relay/test_dhcp_relay.py b/tests/dhcp_relay/test_dhcp_relay.py index 3d4636f26f4..e5976b46c97 100644 --- a/tests/dhcp_relay/test_dhcp_relay.py +++ b/tests/dhcp_relay/test_dhcp_relay.py @@ -3,6 +3,7 @@ import random import time import logging +import re from tests.common.fixtures.ptfhost_utils import copy_ptftests_directory # lgtm[py/unused-import] from tests.common.fixtures.ptfhost_utils import change_mac_addresses # lgtm[py/unused-import] @@ -14,10 +15,10 @@ from tests.common.utilities import skip_release from tests.common import config_reload from tests.common.platform.processes_utils import wait_critical_processes -from tests.common.utilities import wait_until +from tests.common.plugins.loganalyzer.loganalyzer import LogAnalyzer, LogAnalyzerError pytestmark = [ - pytest.mark.topology('t0'), + pytest.mark.topology('t0', 'm0'), pytest.mark.device_type('vs') ] @@ -92,7 +93,7 @@ def dut_dhcp_relay_data(duthosts, rand_one_dut_hostname, ptfhost, tbinfo): for iface_name, neighbor_info_dict in mg_facts['minigraph_neighbors'].items(): if neighbor_info_dict['name'] in mg_facts['minigraph_devices']: neighbor_device_info_dict = mg_facts['minigraph_devices'][neighbor_info_dict['name']] - if 'type' in neighbor_device_info_dict and neighbor_device_info_dict['type'] == 'LeafRouter': + if 'type' in neighbor_device_info_dict and neighbor_device_info_dict['type'] in ['LeafRouter', 'MgmtLeafRouter']: # If this uplink's physical interface is a member of a portchannel interface, # we record the name of the portchannel interface here, as this is the actual # interface the DHCP relay will listen on. @@ -108,9 +109,17 @@ def dut_dhcp_relay_data(duthosts, rand_one_dut_hostname, ptfhost, tbinfo): uplink_interfaces.append(iface_name) uplink_port_indices.append(mg_facts['minigraph_ptf_indices'][iface_name]) + other_client_ports_indices = [] + for iface_name in vlan_info_dict['members'] : + if mg_facts['minigraph_ptf_indices'][iface_name] == client_iface['port_idx']: + pass + else : + other_client_ports_indices.append(mg_facts['minigraph_ptf_indices'][iface_name]) + dhcp_relay_data = {} dhcp_relay_data['downlink_vlan_iface'] = downlink_vlan_iface dhcp_relay_data['client_iface'] = client_iface + dhcp_relay_data['other_client_ports'] = other_client_ports_indices dhcp_relay_data['uplink_interfaces'] = uplink_interfaces dhcp_relay_data['uplink_port_indices'] = uplink_port_indices dhcp_relay_data['switch_loopback_ip'] = str(switch_loopback_ip) @@ -198,16 +207,19 @@ def testing_config(request, duthosts, rand_one_dut_hostname, tbinfo): assert False, "Wrong DHCP setup on Dual ToR testbeds" yield testing_mode, duthost, 'dual_testbed' - elif tbinfo['topo']['name'] == 't0-56-po2vlan': + elif tbinfo['topo']['name'] in ('t0-54-po2vlan', 't0-56-po2vlan'): if testing_mode == SINGLE_TOR_MODE: if subtype_exist and subtype_value == 'DualToR': - assert False, "Wrong DHCP setup on t0-56-vlan2po testbeds" + assert False, "Wrong DHCP setup on po2vlan testbeds" yield testing_mode, duthost, 'single_testbed' if testing_mode == DUAL_TOR_MODE: - pytest.skip("skip DUAL_TOR_MODE tests on t0-56-vlan2po testbeds") + pytest.skip("skip DUAL_TOR_MODE tests on po2vlan testbeds") else: + if testing_mode == DUAL_TOR_MODE: + pytest.skip("skip DUAL_TOR_MODE tests on Single ToR testbeds") + if testing_mode == SINGLE_TOR_MODE: if subtype_exist: duthost.shell('redis-cli -n 4 HDEL "DEVICE_METADATA|localhost" "subtype"') @@ -243,37 +255,114 @@ def test_interface_binding(duthosts, rand_one_dut_hostname, dut_dhcp_relay_data) for iface in dhcp_relay['uplink_interfaces']: assert "{}:67".format(iface) in output, "{} is not found in {}".format("{}:67".format(iface), output) -def test_dhcp_relay_default(ptfhost, dut_dhcp_relay_data, validate_dut_routes_exist, testing_config, toggle_all_simulator_ports_to_rand_selected_tor_m): + +def start_dhcp_monitor_debug_counter(duthost): + program_name = "dhcpmon" + program_pid_list = [] + program_list = duthost.shell("ps aux | grep {}".format(program_name)) + matches = re.findall(r'/usr/sbin/dhcpmon.*', program_list["stdout"]) + + for program_info in program_list["stdout_lines"]: + if program_name in program_info: + program_pid = int(program_info.split()[1]) + program_pid_list.append(program_pid) + + for program_pid in program_pid_list: + kill_cmd_result = duthost.shell("sudo kill {} || true".format(program_pid), module_ignore_errors=True) + # Get the exit code of 'kill' command + exit_code = kill_cmd_result["rc"] + if exit_code != 0: + stderr = kill_cmd_result.get("stderr", "") + if "No such process" not in stderr: + pytest.fail("Failed to stop program '{}' before test. Error: {}".format(program_name, stderr)) + + if matches: + for dhcpmon_cmd in matches: + if "-D" not in dhcpmon_cmd: + dhcpmon_cmd += " -D" + duthost.shell("docker exec -d dhcp_relay %s" % dhcpmon_cmd) + else: + assert False, "Failed to start dhcpmon in debug counter mode\n" + + +def test_dhcp_relay_default(ptfhost, dut_dhcp_relay_data, validate_dut_routes_exist, testing_config, + rand_unselected_dut, toggle_all_simulator_ports_to_rand_selected_tor_m): # noqa F811 """Test DHCP relay functionality on T0 topology. For each DHCP relay agent running on the DuT, verify DHCP packets are relayed properly """ + testing_mode, duthost, testbed_mode = testing_config if testing_mode == DUAL_TOR_MODE: skip_release(duthost, ["201811", "201911"]) - for dhcp_relay in dut_dhcp_relay_data: - # Run the DHCP relay test on the PTF host - ptf_runner(ptfhost, - "ptftests", - "dhcp_relay_test.DHCPTest", - platform_dir="ptftests", - params={"hostname": duthost.hostname, - "client_port_index": dhcp_relay['client_iface']['port_idx'], - "client_iface_alias": str(dhcp_relay['client_iface']['alias']), - "leaf_port_indices": repr(dhcp_relay['uplink_port_indices']), - "num_dhcp_servers": len(dhcp_relay['downlink_vlan_iface']['dhcp_server_addrs']), - "server_ip": str(dhcp_relay['downlink_vlan_iface']['dhcp_server_addrs'][0]), - "relay_iface_ip": str(dhcp_relay['downlink_vlan_iface']['addr']), - "relay_iface_mac": str(dhcp_relay['downlink_vlan_iface']['mac']), - "relay_iface_netmask": str(dhcp_relay['downlink_vlan_iface']['mask']), - "dest_mac_address": BROADCAST_MAC, - "client_udp_src_port": DEFAULT_DHCP_CLIENT_PORT, - "switch_loopback_ip": dhcp_relay['switch_loopback_ip'], - "uplink_mac": str(dhcp_relay['uplink_mac']), - "testbed_mode": testbed_mode, - "testing_mode": testing_mode}, - log_file="/tmp/dhcp_relay_test.DHCPTest.log") + skip_dhcpmon = any(vers in duthost.os_version for vers in ["201811", "201911", "202111"]) + + try: + for dhcp_relay in dut_dhcp_relay_data: + if not skip_dhcpmon: + dhcp_server_num = len(dhcp_relay['downlink_vlan_iface']['dhcp_server_addrs']) + if testing_mode == DUAL_TOR_MODE: + standby_duthost = rand_unselected_dut + start_dhcp_monitor_debug_counter(standby_duthost) + expected_standby_agg_counter_message = ( + r".*dhcp_relay#dhcpmon\[[0-9]+\]: " + r"\[\s*Agg-%s\s*-[\sA-Za-z0-9]+\s*rx/tx\] " + r"Discover: +0/ +0, Offer: +0/ +0, Request: +0/ +0, ACK: +0/ +0+" + ) % (dhcp_relay['downlink_vlan_iface']['name']) + loganalyzer_standby = LogAnalyzer(ansible_host=standby_duthost, marker_prefix="dhcpmon counter") + marker_standby = loganalyzer_standby.init() + loganalyzer_standby.expect_regex = [expected_standby_agg_counter_message] + start_dhcp_monitor_debug_counter(duthost) + expected_agg_counter_message = ( + r".*dhcp_relay#dhcpmon\[[0-9]+\]: " + r"\[\s*Agg-%s\s*-[\sA-Za-z0-9]+\s*rx/tx\] " + r"Discover: +1/ +%d, Offer: +1/ +1, Request: +3/ +%d, ACK: +1/ +1+" + ) % (dhcp_relay['downlink_vlan_iface']['name'], dhcp_server_num, dhcp_server_num * 3) + loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="dhcpmon counter") + marker = loganalyzer.init() + loganalyzer.expect_regex = [expected_agg_counter_message] + + # Run the DHCP relay test on the PTF host + ptf_runner(ptfhost, + "ptftests", + "dhcp_relay_test.DHCPTest", + platform_dir="ptftests", + params={"hostname": duthost.hostname, + "client_port_index": dhcp_relay['client_iface']['port_idx'], + # This port is introduced to test DHCP relay packet received + # on other client port + "other_client_port": repr(dhcp_relay['other_client_ports']), + "client_iface_alias": str(dhcp_relay['client_iface']['alias']), + "leaf_port_indices": repr(dhcp_relay['uplink_port_indices']), + "num_dhcp_servers": len(dhcp_relay['downlink_vlan_iface']['dhcp_server_addrs']), + "server_ip": dhcp_relay['downlink_vlan_iface']['dhcp_server_addrs'], + "relay_iface_ip": str(dhcp_relay['downlink_vlan_iface']['addr']), + "relay_iface_mac": str(dhcp_relay['downlink_vlan_iface']['mac']), + "relay_iface_netmask": str(dhcp_relay['downlink_vlan_iface']['mask']), + "dest_mac_address": BROADCAST_MAC, + "client_udp_src_port": DEFAULT_DHCP_CLIENT_PORT, + "switch_loopback_ip": dhcp_relay['switch_loopback_ip'], + "uplink_mac": str(dhcp_relay['uplink_mac']), + "testbed_mode": testbed_mode, + "testing_mode": testing_mode}, + log_file="/tmp/dhcp_relay_test.DHCPTest.log") + if not skip_dhcpmon: + time.sleep(18) # dhcpmon debug counter prints every 18 seconds + loganalyzer.analyze(marker) + if testing_mode == DUAL_TOR_MODE: + loganalyzer_standby.analyze(marker_standby) + except LogAnalyzerError as err: + logger.error("Unable to find expected log in syslog") + raise err + + if not skip_dhcpmon: + # Clean up - Restart DHCP relay service on DUT to recover original dhcpmon setting + restart_dhcp_service(duthost) + if testing_mode == DUAL_TOR_MODE: + restart_dhcp_service(standby_duthost) + pytest_assert(wait_until(120, 5, 0, check_interface_status, standby_duthost)) + pytest_assert(wait_until(120, 5, 0, check_interface_status, duthost)) def test_dhcp_relay_after_link_flap(ptfhost, dut_dhcp_relay_data, validate_dut_routes_exist, testing_config): @@ -315,7 +404,7 @@ def test_dhcp_relay_after_link_flap(ptfhost, dut_dhcp_relay_data, validate_dut_r "client_iface_alias": str(dhcp_relay['client_iface']['alias']), "leaf_port_indices": repr(dhcp_relay['uplink_port_indices']), "num_dhcp_servers": len(dhcp_relay['downlink_vlan_iface']['dhcp_server_addrs']), - "server_ip": str(dhcp_relay['downlink_vlan_iface']['dhcp_server_addrs'][0]), + "server_ip": dhcp_relay['downlink_vlan_iface']['dhcp_server_addrs'], "relay_iface_ip": str(dhcp_relay['downlink_vlan_iface']['addr']), "relay_iface_mac": str(dhcp_relay['downlink_vlan_iface']['mac']), "relay_iface_netmask": str(dhcp_relay['downlink_vlan_iface']['mask']), @@ -378,7 +467,7 @@ def test_dhcp_relay_start_with_uplinks_down(ptfhost, dut_dhcp_relay_data, valida "client_iface_alias": str(dhcp_relay['client_iface']['alias']), "leaf_port_indices": repr(dhcp_relay['uplink_port_indices']), "num_dhcp_servers": len(dhcp_relay['downlink_vlan_iface']['dhcp_server_addrs']), - "server_ip": str(dhcp_relay['downlink_vlan_iface']['dhcp_server_addrs'][0]), + "server_ip": dhcp_relay['downlink_vlan_iface']['dhcp_server_addrs'], "relay_iface_ip": str(dhcp_relay['downlink_vlan_iface']['addr']), "relay_iface_mac": str(dhcp_relay['downlink_vlan_iface']['mac']), "relay_iface_netmask": str(dhcp_relay['downlink_vlan_iface']['mask']), @@ -414,7 +503,7 @@ def test_dhcp_relay_unicast_mac(ptfhost, dut_dhcp_relay_data, validate_dut_route "client_iface_alias": str(dhcp_relay['client_iface']['alias']), "leaf_port_indices": repr(dhcp_relay['uplink_port_indices']), "num_dhcp_servers": len(dhcp_relay['downlink_vlan_iface']['dhcp_server_addrs']), - "server_ip": str(dhcp_relay['downlink_vlan_iface']['dhcp_server_addrs'][0]), + "server_ip": dhcp_relay['downlink_vlan_iface']['dhcp_server_addrs'], "relay_iface_ip": str(dhcp_relay['downlink_vlan_iface']['addr']), "relay_iface_mac": str(dhcp_relay['downlink_vlan_iface']['mac']), "relay_iface_netmask": str(dhcp_relay['downlink_vlan_iface']['mask']), @@ -449,7 +538,7 @@ def test_dhcp_relay_random_sport(ptfhost, dut_dhcp_relay_data, validate_dut_rout "client_iface_alias": str(dhcp_relay['client_iface']['alias']), "leaf_port_indices": repr(dhcp_relay['uplink_port_indices']), "num_dhcp_servers": len(dhcp_relay['downlink_vlan_iface']['dhcp_server_addrs']), - "server_ip": str(dhcp_relay['downlink_vlan_iface']['dhcp_server_addrs'][0]), + "server_ip": dhcp_relay['downlink_vlan_iface']['dhcp_server_addrs'], "relay_iface_ip": str(dhcp_relay['downlink_vlan_iface']['addr']), "relay_iface_mac": str(dhcp_relay['downlink_vlan_iface']['mac']), "relay_iface_netmask": str(dhcp_relay['downlink_vlan_iface']['mask']), diff --git a/tests/dhcp_relay/test_dhcpv6_relay.py b/tests/dhcp_relay/test_dhcpv6_relay.py index 44e56262af9..a3201254d45 100644 --- a/tests/dhcp_relay/test_dhcpv6_relay.py +++ b/tests/dhcp_relay/test_dhcpv6_relay.py @@ -13,14 +13,53 @@ from tests.common.platform.processes_utils import wait_critical_processes from tests.common.utilities import wait_until from tests.common.helpers.assertions import pytest_assert +from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_rand_selected_tor_m # noqa F401 + pytestmark = [ - pytest.mark.topology('t0'), + pytest.mark.topology('t0', 'm0'), pytest.mark.device_type('vs') ] +SINGLE_TOR_MODE = 'single' +DUAL_TOR_MODE = 'dual' + logger = logging.getLogger(__name__) + +def wait_all_bgp_up(duthost): + config_facts = duthost.config_facts(host=duthost.hostname, source="running")['ansible_facts'] + bgp_neighbors = config_facts.get('BGP_NEIGHBOR', {}) + if not wait_until(60, 10, 0, duthost.check_bgp_session_state, bgp_neighbors.keys()): + pytest.fail("not all bgp sessions are up after config change") + + +@pytest.fixture(scope="module", params=[SINGLE_TOR_MODE, DUAL_TOR_MODE]) +def testing_config(request, duthosts, rand_one_dut_hostname, tbinfo): + testing_mode = request.param + duthost = duthosts[rand_one_dut_hostname] + subtype_exist, subtype_value = get_subtype_from_configdb(duthost) + + if 'dualtor' in tbinfo['topo']['name']: + if testing_mode == SINGLE_TOR_MODE: + pytest.skip("skip SINGLE_TOR_MODE tests on Dual ToR testbeds") + + if testing_mode == DUAL_TOR_MODE: + if not subtype_exist or subtype_value != 'DualToR': + assert False, "Wrong DHCP setup on Dual ToR testbeds" + + yield testing_mode, duthost, 'dual_testbed' + else: + yield testing_mode, duthost, 'single_testbed' + +def get_subtype_from_configdb(duthost): + # HEXISTS returns 1 if the key exists, otherwise 0 + subtype_exist = int(duthost.shell('redis-cli -n 4 HEXISTS "DEVICE_METADATA|localhost" "subtype"')["stdout"]) + subtype_value = "" + if subtype_exist: + subtype_value = duthost.shell('redis-cli -n 4 HGET "DEVICE_METADATA|localhost" "subtype"')["stdout"] + return subtype_exist, subtype_value + @pytest.fixture(scope="module") def dut_dhcp_relay_data(duthosts, rand_one_dut_hostname, ptfhost, tbinfo): """ Fixture which returns a list of dictionaries where each dictionary contains @@ -30,7 +69,7 @@ def dut_dhcp_relay_data(duthosts, rand_one_dut_hostname, ptfhost, tbinfo): """ duthost = duthosts[rand_one_dut_hostname] dhcp_relay_data_list = [] - uplink_interface_link_local = "" + downlink_interface_link_local = "" mg_facts = duthost.get_extended_minigraph_facts(tbinfo) @@ -65,7 +104,7 @@ def dut_dhcp_relay_data(duthosts, rand_one_dut_hostname, ptfhost, tbinfo): for iface_name, neighbor_info_dict in mg_facts['minigraph_neighbors'].items(): if neighbor_info_dict['name'] in mg_facts['minigraph_devices']: neighbor_device_info_dict = mg_facts['minigraph_devices'][neighbor_info_dict['name']] - if 'type' in neighbor_device_info_dict and neighbor_device_info_dict['type'] == 'LeafRouter': + if 'type' in neighbor_device_info_dict and neighbor_device_info_dict['type'] in ['LeafRouter', 'MgmtLeafRouter']: # If this uplink's physical interface is a member of a portchannel interface, # we record the name of the portchannel interface here, as this is the actual # interface the DHCP relay will listen on. @@ -80,18 +119,27 @@ def dut_dhcp_relay_data(duthosts, rand_one_dut_hostname, ptfhost, tbinfo): if not iface_is_portchannel_member: uplink_interfaces.append(iface_name) uplink_port_indices.append(mg_facts['minigraph_ptf_indices'][iface_name]) - if uplink_interface_link_local == "": - command = "ip addr show {} | grep inet6 | grep 'scope link' | awk '{{print $2}}' | cut -d '/' -f1".format(uplink_interfaces[0]) + if downlink_interface_link_local == "": + command = "ip addr show {} | grep inet6 | grep 'scope link' | awk '{{print $2}}' | cut -d '/' -f1"\ + .format(downlink_vlan_iface['name']) res = duthost.shell(command) if res['stdout'] != "": - uplink_interface_link_local = res['stdout'] + downlink_interface_link_local = res['stdout'] dhcp_relay_data = {} dhcp_relay_data['downlink_vlan_iface'] = downlink_vlan_iface dhcp_relay_data['client_iface'] = client_iface dhcp_relay_data['uplink_interfaces'] = uplink_interfaces dhcp_relay_data['uplink_port_indices'] = uplink_port_indices - dhcp_relay_data['uplink_interface_link_local'] = uplink_interface_link_local + dhcp_relay_data['downlink_interface_link_local'] = downlink_interface_link_local + dhcp_relay_data['loopback_ipv6'] = mg_facts['minigraph_lo_interfaces'][1]['addr'] + if 'dualtor' in tbinfo['topo']['name']: + dhcp_relay_data['is_dualtor'] = True + else: + dhcp_relay_data['is_dualtor'] = False + + res = duthost.shell('cat /sys/class/net/{}/address'.format(uplink_interfaces[0])) + dhcp_relay_data['uplink_mac'] = res['stdout'] dhcp_relay_data_list.append(dhcp_relay_data) @@ -111,11 +159,13 @@ def validate_dut_routes_exist(duthosts, rand_one_dut_hostname, dut_dhcp_relay_da rtInfo = duthost.get_ip_route_info(ipaddress.ip_address(dhcp_server)) assert len(rtInfo["nexthops"]) > 0, "Failed to find route to DHCP server '{0}'".format(dhcp_server) + def check_interface_status(duthost): if ":547" in duthost.shell("docker exec -it dhcp_relay ss -nlp | grep dhcp6relay")["stdout"].encode("utf-8"): return True return False + def test_interface_binding(duthosts, rand_one_dut_hostname, dut_dhcp_relay_data): duthost = duthosts[rand_one_dut_hostname] skip_release(duthost, ["201911", "202106"]) @@ -126,9 +176,13 @@ def test_interface_binding(duthosts, rand_one_dut_hostname, dut_dhcp_relay_data) output = duthost.shell("docker exec -it dhcp_relay ss -nlp | grep dhcp6relay")["stdout"].encode("utf-8") logger.info(output) for dhcp_relay in dut_dhcp_relay_data: - assert "*:{}".format(dhcp_relay['downlink_vlan_iface']['name']) in output, "{} is not found in {}".format("*:{}".format(dhcp_relay['downlink_vlan_iface']['name']), output) + assert ("*:{}".format(dhcp_relay['downlink_vlan_iface']['name']) or "*:*" in output, "{} is not found in {}"\ + .format("*:{}".format(dhcp_relay['downlink_vlan_iface']['name']), output)) or \ + ("*:*" in output, "dhcp6relay socket is not properly binded") + -def test_dhcpv6_relay_counter(ptfhost, duthosts, rand_one_dut_hostname, dut_dhcp_relay_data): +def test_dhcpv6_relay_counter(ptfhost, duthosts, rand_one_dut_hostname, dut_dhcp_relay_data, + toggle_all_simulator_ports_to_rand_selected_tor_m): # noqa F811 """ Test DHCPv6 Counter """ duthost = duthosts[rand_one_dut_hostname] skip_release(duthost, ["201911", "202106"]) @@ -153,21 +207,35 @@ def test_dhcpv6_relay_counter(ptfhost, duthosts, rand_one_dut_hostname, dut_dhcp "server_ip": str(dhcp_relay['downlink_vlan_iface']['dhcpv6_server_addrs'][0]), "relay_iface_ip": str(dhcp_relay['downlink_vlan_iface']['addr']), "relay_iface_mac": str(dhcp_relay['downlink_vlan_iface']['mac']), - "relay_link_local": str(dhcp_relay['uplink_interface_link_local']), - "vlan_ip": str(dhcp_relay['downlink_vlan_iface']['addr'])}, + "dut_mac": str(dhcp_relay['uplink_mac']), + "relay_link_local": str(dhcp_relay['downlink_interface_link_local']), + "vlan_ip": str(dhcp_relay['downlink_vlan_iface']['addr']), + "loopback_ipv6": str(dhcp_relay['loopback_ipv6']), + "is_dualtor": str(dhcp_relay['is_dualtor'])}, log_file="/tmp/dhcpv6_relay_test.DHCPCounterTest.log") for message in messages: - get_message = 'sonic-db-cli STATE_DB hget "DHCPv6_COUNTER_TABLE|{}" {}'.format(dhcp_relay['downlink_vlan_iface']['name'], message) - message_count = duthost.shell(get_message)['stdout'] - assert int(message_count) > 0, "Missing {} count".format(message) - -def test_dhcp_relay_default(ptfhost, duthosts, rand_one_dut_hostname, dut_dhcp_relay_data, validate_dut_routes_exist): + if message == "Relay-Reply" and dhcp_relay['is_dualtor']: + get_message = 'sonic-db-cli STATE_DB hget "DHCPv6_COUNTER_TABLE|Loopback0" {}'.format(message) + message_count = duthost.shell(get_message)['stdout'] + assert int(message_count) > 0, "Missing {} count".format(message) + else: + get_message = 'sonic-db-cli STATE_DB hget "DHCPv6_COUNTER_TABLE|{}" {}'\ + .format(dhcp_relay['downlink_vlan_iface']['name'], message) + message_count = duthost.shell(get_message)['stdout'] + assert int(message_count) > 0, "Missing {} count".format(message) + + +def test_dhcp_relay_default(ptfhost, dut_dhcp_relay_data, validate_dut_routes_exist, testing_config, + toggle_all_simulator_ports_to_rand_selected_tor_m): # noqa F811 """Test DHCP relay functionality on T0 topology. For each DHCP relay agent running on the DuT, verify DHCP packets are relayed properly """ - duthost = duthosts[rand_one_dut_hostname] - skip_release(duthost, ["201811", "201911", "202106"]) + testing_mode, duthost, testbed_mode = testing_config + skip_release(duthost, ["201811", "201911", "202106"]) #TO-DO: delete skip release on 201811 and 201911 + + if testing_mode == DUAL_TOR_MODE: + skip_release(duthost, ["201811", "201911"]) for dhcp_relay in dut_dhcp_relay_data: # Run the DHCP relay test on the PTF host @@ -182,19 +250,24 @@ def test_dhcp_relay_default(ptfhost, duthosts, rand_one_dut_hostname, dut_dhcp_r "server_ip": str(dhcp_relay['downlink_vlan_iface']['dhcpv6_server_addrs'][0]), "relay_iface_ip": str(dhcp_relay['downlink_vlan_iface']['addr']), "relay_iface_mac": str(dhcp_relay['downlink_vlan_iface']['mac']), - "relay_link_local": str(dhcp_relay['uplink_interface_link_local']), - "vlan_ip": str(dhcp_relay['downlink_vlan_iface']['addr'])}, + "relay_link_local": str(dhcp_relay['downlink_interface_link_local']), + "vlan_ip": str(dhcp_relay['downlink_vlan_iface']['addr']), + "uplink_mac": str(dhcp_relay['uplink_mac']), + "loopback_ipv6": str(dhcp_relay['loopback_ipv6']), + "is_dualtor": str(dhcp_relay['is_dualtor'])}, log_file="/tmp/dhcpv6_relay_test.DHCPTest.log") - -def test_dhcp_relay_after_link_flap(ptfhost, duthosts, rand_one_dut_hostname, dut_dhcp_relay_data, validate_dut_routes_exist): +def test_dhcp_relay_after_link_flap(ptfhost, dut_dhcp_relay_data, validate_dut_routes_exist, testing_config): """Test DHCP relay functionality on T0 topology after uplinks flap For each DHCP relay agent running on the DuT, with relay agent running, flap the uplinks, then test whether the DHCP relay agent relays packets properly. """ - duthost = duthosts[rand_one_dut_hostname] + testing_mode, duthost, testbed_mode = testing_config skip_release(duthost, ["201811", "201911", "202106"]) + if testbed_mode == 'dual_testbed': + pytest.skip("skip the link flap testcase on dual tor testbeds") + for dhcp_relay in dut_dhcp_relay_data: # Bring all uplink interfaces down for iface in dhcp_relay['uplink_interfaces']: @@ -208,7 +281,7 @@ def test_dhcp_relay_after_link_flap(ptfhost, duthosts, rand_one_dut_hostname, du duthost.shell('ifconfig {} up'.format(iface)) # Sleep a bit to ensure uplinks are up - time.sleep(20) + wait_all_bgp_up(duthost) # Run the DHCP relay test on the PTF host ptf_runner(ptfhost, @@ -222,20 +295,26 @@ def test_dhcp_relay_after_link_flap(ptfhost, duthosts, rand_one_dut_hostname, du "server_ip": str(dhcp_relay['downlink_vlan_iface']['dhcpv6_server_addrs'][0]), "relay_iface_ip": str(dhcp_relay['downlink_vlan_iface']['addr']), "relay_iface_mac": str(dhcp_relay['downlink_vlan_iface']['mac']), - "relay_link_local": str(dhcp_relay['uplink_interface_link_local']), - "vlan_ip": str(dhcp_relay['downlink_vlan_iface']['addr'])}, + "relay_link_local": str(dhcp_relay['downlink_interface_link_local']), + "vlan_ip": str(dhcp_relay['downlink_vlan_iface']['addr']), + "uplink_mac": str(dhcp_relay['uplink_mac']), + "loopback_ipv6": str(dhcp_relay['loopback_ipv6']), + "is_dualtor": str(dhcp_relay['is_dualtor'])}, log_file="/tmp/dhcpv6_relay_test.DHCPTest.log") -def test_dhcp_relay_start_with_uplinks_down(ptfhost, duthosts, rand_one_dut_hostname, dut_dhcp_relay_data, validate_dut_routes_exist): +def test_dhcp_relay_start_with_uplinks_down(ptfhost, dut_dhcp_relay_data, validate_dut_routes_exist, testing_config): """Test DHCP relay functionality on T0 topology when relay agent starts with uplinks down For each DHCP relay agent running on the DuT, bring the uplinks down, then restart the relay agent while the uplinks are still down. Then test whether the DHCP relay agent relays packets properly. """ - duthost = duthosts[rand_one_dut_hostname] + testing_mode, duthost, testbed_mode = testing_config skip_release(duthost, ["201811", "201911", "202106"]) + if testbed_mode == 'dual_testbed': + pytest.skip("skip the uplinks down testcase on dual tor testbeds") + for dhcp_relay in dut_dhcp_relay_data: # Bring all uplink interfaces down for iface in dhcp_relay['uplink_interfaces']: @@ -259,7 +338,7 @@ def test_dhcp_relay_start_with_uplinks_down(ptfhost, duthosts, rand_one_dut_host duthost.shell('ifconfig {} up'.format(iface)) # Sleep a bit to ensure uplinks are up - time.sleep(20) + wait_all_bgp_up(duthost) # Run the DHCP relay test on the PTF host ptf_runner(ptfhost, @@ -273,6 +352,9 @@ def test_dhcp_relay_start_with_uplinks_down(ptfhost, duthosts, rand_one_dut_host "server_ip": str(dhcp_relay['downlink_vlan_iface']['dhcpv6_server_addrs'][0]), "relay_iface_ip": str(dhcp_relay['downlink_vlan_iface']['addr']), "relay_iface_mac": str(dhcp_relay['downlink_vlan_iface']['mac']), - "relay_link_local": str(dhcp_relay['uplink_interface_link_local']), - "vlan_ip": str(dhcp_relay['downlink_vlan_iface']['addr'])}, + "relay_link_local": str(dhcp_relay['downlink_interface_link_local']), + "vlan_ip": str(dhcp_relay['downlink_vlan_iface']['addr']), + "uplink_mac": str(dhcp_relay['uplink_mac']), + "loopback_ipv6": str(dhcp_relay['loopback_ipv6']), + "is_dualtor": str(dhcp_relay['is_dualtor'])}, log_file="/tmp/dhcpv6_relay_test.DHCPTest.log") diff --git a/tests/drop_packets/test_configurable_drop_counters.py b/tests/drop_packets/test_configurable_drop_counters.py index 5c87cc1b8e9..a9ef244308a 100644 --- a/tests/drop_packets/test_configurable_drop_counters.py +++ b/tests/drop_packets/test_configurable_drop_counters.py @@ -59,6 +59,21 @@ def vlan_mac(duthost): dut_vlan_mac = duthost.facts['router_mac'] return dut_vlan_mac + +@pytest.fixture(autouse=True) +def ignore_expected_loganalyzer_exception(duthosts, rand_one_dut_hostname, loganalyzer): + if loganalyzer: + ignore_regex_list = [ + ".*ERR swss[0-9]*#orchagent.*meta_sai_validate_fdb_entry.*object key SAI_OBJECT_TYPE_FDB_ENTRY.*doesn't exist.*", + ".*ERR swss[0-9]*#orchagent.*removeFdbEntry: FdbOrch RemoveFDBEntry: Failed to remove FDB entry. mac=.*, bv_id=.*", + ".*ERR swss[0-9]*#orchagent.*handleSaiRemoveStatus: Encountered failure in remove operation, exiting orchagent, SAI API: SAI_API_FDB, status: SAI_STATUS_INVALID_PARAMETER.*", + ".*ERR syncd[0-9]*#syncd.*SAI_API_DEBUG_COUNTER:_brcm_sai_debug_counter_value_get.*No debug_counter at index.*found.*", + ".*ERR syncd[0-9]*#syncd.*collectPortDebugCounters: Failed to get stats of port.*" + ] + duthost = duthosts[rand_one_dut_hostname] + loganalyzer[duthost.hostname].ignore_regex.extend(ignore_regex_list) + + def apply_fdb_config(duthost, vlan_id, iface, mac_address, op, type): """ Generate FDB config file to apply it using 'swssconfig' tool. Generated config file template: @@ -83,7 +98,7 @@ def apply_fdb_config(duthost, vlan_id, iface, mac_address, op, type): fdb_config_json.append(fdb_entry_json) with tempfile.NamedTemporaryFile(suffix=".json", prefix="fdb_config") as fp: - logging.info("Generating FDB config") + logging.info("Generating FDB config: {}".format(fdb_config_json)) json.dump(fdb_config_json, fp) fp.flush() @@ -167,7 +182,7 @@ def test_neighbor_link_down(testbed_params, setup_counters, duthosts, rand_one_d # FIXME: Add config reload on t0-backend as a workaround to keep DUT healthy because the following # drop packet testcases will suffer from the brcm_sai_get_port_stats errors flooded in syslog if "backend" in tbinfo["topo"]["name"]: - config_reload(duthost) + config_reload(duthost, safe_reload=True) @pytest.mark.parametrize("drop_reason", ["DIP_LINK_LOCAL"]) diff --git a/tests/dualtor/conftest.py b/tests/dualtor/conftest.py index ea7850a411c..9a642a5b819 100644 --- a/tests/dualtor/conftest.py +++ b/tests/dualtor/conftest.py @@ -79,22 +79,35 @@ def common_setup_teardown(rand_selected_dut, request, tbinfo, vmhost): if 'dualtor' in tbinfo['topo']['name']: request.getfixturevalue('run_garp_service') - -@pytest.fixture(scope="module") -def run_arp_responder_ipv6(rand_selected_dut, ptfhost, tbinfo, apply_mock_dual_tor_tables, copy_arp_responder_py): - """Run arp_responder to enable ptf to respond neighbor solicitation messages""" +def _setup_arp_responder(rand_selected_dut, ptfhost, tbinfo, ip_type): + logging.info('Setup ARP responder in the PTF container {}'\ + .format(ptfhost.hostname)) duthost = rand_selected_dut mg_facts = duthost.get_extended_minigraph_facts(tbinfo) minigraph_ptf_indices = mg_facts['minigraph_ptf_indices'] mux_config = mux_cable_server_ip(duthost) - - arp_responder_conf = {"eth%s" % minigraph_ptf_indices[port]: [config["server_ipv6"].split("/")[0]] for port, config in mux_config.items()} + if ip_type == 'ipv4': + arp_responder_conf = {"eth%s" % minigraph_ptf_indices[port]: [config["server_ipv4"].split("/")[0]] for port, config in mux_config.items()} + else: + arp_responder_conf = {"eth%s" % minigraph_ptf_indices[port]: [config["server_ipv6"].split("/")[0]] for port, config in mux_config.items()} ptfhost.copy(content=json.dumps(arp_responder_conf, indent=4), dest="/tmp/from_t1.json") + ptfhost.host.options["variable_manager"].extra_vars.update({"arp_responder_args": ""}) ptfhost.template(src="templates/arp_responder.conf.j2", dest="/etc/supervisor/conf.d/arp_responder.conf") ptfhost.shell('supervisorctl reread && supervisorctl update') ptfhost.shell('supervisorctl restart arp_responder') +@pytest.fixture(scope="module") +def run_arp_responder_ipv6(rand_selected_dut, ptfhost, tbinfo, apply_mock_dual_tor_tables): + """Run arp_responder to enable ptf to respond neighbor solicitation messages""" + _setup_arp_responder(rand_selected_dut, ptfhost, tbinfo, 'ipv6') yield ptfhost.shell('supervisorctl stop arp_responder') + +@pytest.fixture(scope="module") +def run_arp_responder(rand_selected_dut, ptfhost, tbinfo): + _setup_arp_responder(rand_selected_dut, ptfhost, tbinfo, 'ipv4') + yield + + ptfhost.shell('supervisorctl stop arp_responder') \ No newline at end of file diff --git a/tests/dualtor/test_ipinip.py b/tests/dualtor/test_ipinip.py index 0de4d5c02f8..8d7f6b851aa 100644 --- a/tests/dualtor/test_ipinip.py +++ b/tests/dualtor/test_ipinip.py @@ -10,6 +10,7 @@ import random import time import contextlib +import scapy from ptf import mask from ptf import testutils @@ -19,8 +20,10 @@ from tests.common.dualtor.dual_tor_utils import rand_selected_interface from tests.common.dualtor.dual_tor_utils import get_ptf_server_intf_index from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_rand_selected_tor +from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_rand_unselected_tor # lgtm[py/unused-import] from tests.common.dualtor.tunnel_traffic_utils import tunnel_traffic_monitor -from tests.common.utilities import is_ipv4_address +from tests.common.helpers.assertions import pytest_require +from tests.common.utilities import is_ipv4_address, wait_until from tests.common.fixtures.ptfhost_utils import run_icmp_responder from tests.common.fixtures.ptfhost_utils import run_garp_service from tests.common.fixtures.ptfhost_utils import change_mac_addresses @@ -31,6 +34,7 @@ pytest.mark.topology("t0") ] +logger = logging.getLogger(__name__) @pytest.fixture(scope="module", autouse=True) def mock_common_setup_teardown( @@ -126,8 +130,7 @@ def stop_garp(ptfhost): def test_decap_standby_tor( - apply_standby_state_to_orchagent, - build_encapsulated_packet, + build_encapsulated_packet, request, rand_selected_interface, ptfadapter, tbinfo, rand_selected_dut, tunnel_traffic_monitor ): @@ -140,6 +143,11 @@ def verify_downstream_packet_to_server(ptfadapter, port, exp_pkt): return True return False + if is_t0_mocked_dualtor(tbinfo): + request.getfixturevalue('apply_standby_state_to_orchagent') + else: + request.getfixturevalue('toggle_all_simulator_ports_to_rand_unselected_tor') + tor = rand_selected_dut encapsulated_packet = build_encapsulated_packet iface, _ = rand_selected_interface @@ -153,3 +161,103 @@ def verify_downstream_packet_to_server(ptfadapter, port, exp_pkt): testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), encapsulated_packet, count=10) time.sleep(2) verify_downstream_packet_to_server(ptfadapter, exp_ptf_port_index, exp_pkt) + + +def _wait_portchannel_up(duthost, portchannel): + def _check_lag_status(): + cmd = "show interface portchannel | grep {}".format(portchannel) + return '(Up)' in duthost.shell(cmd)['stdout'] + + if not wait_until(300, 10, 30, _check_lag_status): + pytest.fail("PortChannel didn't startup") + # Wait another 60 seconds for routes announcement + time.sleep(60) + + +@pytest.fixture +def setup_uplink(rand_selected_dut, tbinfo): + """ + Function level fixture. + 1. Only keep 1 uplink up. Shutdown others to force the bounced back traffic is egressed from monitor port of mirror session + 2. If there are more than 1 member in the LAG, update the LAG to have only one member + """ + pytest_require("dualtor" in tbinfo['topo']['name'], "Only run on dualtor testbed") + mg_facts = rand_selected_dut.get_extended_minigraph_facts(tbinfo) + portchannels = mg_facts['minigraph_portchannels'].keys() + up_portchannel = random.choice(portchannels) + logger.info("Select uplink {} for testing".format(up_portchannel)) + # Shutdown other uplinks except for the selected one + for pc in portchannels: + if pc != up_portchannel: + cmd = "config interface shutdown {}".format(pc) + rand_selected_dut.shell(cmd) + # Update the LAG if it has more than one member + pc_members = mg_facts['minigraph_portchannels'][up_portchannel]['members'] + if len(pc_members) > 1: + cmds = [ + "sonic-db-cli CONFIG_DB hset 'PORTCHANNEL|{}' 'min_links' 1".format(up_portchannel), # Update min_links + "config portchannel member del {} {}".format(up_portchannel, pc_members[len(pc_members) - 1]), # Remove 1 portchannel member + "systemctl unmask teamd", # Unmask the service + "systemctl restart teamd" # Resart teamd + ] + rand_selected_dut.shell_cmds(cmds=cmds) + _wait_portchannel_up(rand_selected_dut, up_portchannel) + up_member = pc_members[0] + + yield mg_facts['minigraph_ptf_indices'][up_member] + + # Startup the uplinks that were shutdown + for pc in portchannels: + if pc != up_portchannel: + cmd = "config interface startup {}".format(pc) + rand_selected_dut.shell(cmd) + # Restore the LAG + if len(pc_members) > 1: + cmds = [ + "sonic-db-cli CONFIG_DB hset 'PORTCHANNEL|{}' 'min_links' 2".format(up_portchannel), # Update min_links + "config portchannel member add {} {}".format(up_portchannel, pc_members[1]), # Add back portchannel member + "systemctl unmask teamd", # Unmask the service + "systemctl restart teamd" # Resart teamd + ] + rand_selected_dut.shell_cmds(cmds=cmds) + _wait_portchannel_up(rand_selected_dut, up_portchannel) + + +@pytest.fixture +def setup_mirror_session(rand_selected_dut, setup_uplink): + """ + A function level fixture to add/remove a dummy mirror session. + The mirror session is to trigger the issue. No packet is mirrored actually. + """ + session_name = "dummy_session" + cmd = "config mirror_session add {} 25.192.243.243 20.2.214.125 8 100 1234 0".format(session_name) + rand_selected_dut.shell(cmd=cmd) + uplink_port_id = setup_uplink + yield uplink_port_id + + cmd = "config mirror_session remove {}".format(session_name) + rand_selected_dut.shell(cmd=cmd) + + +@pytest.mark.disable_loganalyzer +def test_encap_with_mirror_session(rand_selected_dut, rand_selected_interface, ptfadapter, tbinfo, setup_mirror_session, toggle_all_simulator_ports_to_rand_unselected_tor, tunnel_traffic_monitor): + """ + A test case to verify the bounced back packet from Standby ToR to T1 doesn't have an unexpected vlan id (4095) + The issue can happen if the bounced back packets egressed from the monitor port of mirror session + Find more details in CSP CS00012263713. + """ + # Since we have only 1 uplink, the source port is also the dest port + src_port_id = setup_mirror_session + _, server_ip = rand_selected_interface + # Construct the packet to server + pkt_to_server = testutils.simple_tcp_packet( + eth_dst=rand_selected_dut.facts["router_mac"], + ip_src="1.1.1.1", + ip_dst=server_ip['server_ipv4'].split('/')[0] + ) + logging.info("Sending packet from ptf t1 interface {}".format(src_port_id)) + inner_packet = pkt_to_server[scapy.all.IP].copy() + inner_packet[IP].ttl -= 1 + with tunnel_traffic_monitor(rand_selected_dut, inner_packet=inner_packet, check_items=()): + testutils.send(ptfadapter, src_port_id, pkt_to_server) + diff --git a/tests/dualtor/test_orch_stress.py b/tests/dualtor/test_orch_stress.py index 2e7fed2ffb5..b1934fc68ec 100644 --- a/tests/dualtor/test_orch_stress.py +++ b/tests/dualtor/test_orch_stress.py @@ -147,7 +147,6 @@ def config_crm_polling_interval(rand_selected_dut): def test_change_mux_state( - require_mocked_dualtor, apply_mock_dual_tor_tables, apply_mock_dual_tor_kernel_configs, rand_selected_dut, @@ -215,7 +214,6 @@ def add_neighbors(dut, neighbors, interface): def test_flap_neighbor_entry_active( - require_mocked_dualtor, apply_mock_dual_tor_tables, apply_mock_dual_tor_kernel_configs, rand_selected_dut, @@ -249,7 +247,6 @@ def test_flap_neighbor_entry_active( def test_flap_neighbor_entry_standby( - require_mocked_dualtor, apply_mock_dual_tor_tables, apply_mock_dual_tor_kernel_configs, rand_selected_dut, diff --git a/tests/dualtor/test_orchagent_active_tor_downstream.py b/tests/dualtor/test_orchagent_active_tor_downstream.py index 995578540a6..6979af2bd07 100644 --- a/tests/dualtor/test_orchagent_active_tor_downstream.py +++ b/tests/dualtor/test_orchagent_active_tor_downstream.py @@ -3,6 +3,7 @@ import pytest import random +from ipaddress import ip_address from ptf import testutils from tests.common.dualtor.dual_tor_mock import * from tests.common.dualtor.dual_tor_utils import dualtor_info @@ -19,6 +20,8 @@ from tests.common.fixtures.ptfhost_utils import run_icmp_responder from tests.common.fixtures.ptfhost_utils import run_garp_service from tests.common.fixtures.ptfhost_utils import change_mac_addresses +from tests.common.helpers.assertions import pytest_assert +from tests.common.utilities import wait_until pytestmark = [ @@ -43,6 +46,7 @@ def testbed_setup(ip_version, ptfhost, rand_selected_dut, rand_unselected_dut, t test_port = testbed_params["selected_port"] if ip_version == "ipv4": server_ip = testbed_params["target_server_ip"] + request.getfixturevalue("run_arp_responder") elif ip_version == "ipv6": server_ip = testbed_params["target_server_ipv6"] # setup arp_responder to answer ipv6 neighbor solicitation messages @@ -52,10 +56,16 @@ def testbed_setup(ip_version, ptfhost, rand_selected_dut, rand_unselected_dut, t return test_port, server_ip, ip_version +def neighbor_reachable(duthost, neighbor_ip): + neigh_table = duthost.switch_arptable()['ansible_facts']['arptable'] + ip_version = 'v4' if ip_address(neighbor_ip).version == 4 else 'v6' + neigh_status = neigh_table[ip_version][neighbor_ip]['state'].lower() + return "reachable" in neigh_status or "permanent" in neigh_status + + def test_active_tor_remove_neighbor_downstream_active( conn_graph_facts, ptfadapter, ptfhost, testbed_setup, - rand_selected_dut, tbinfo, - require_mocked_dualtor, set_crm_polling_interval, + rand_selected_dut, tbinfo, set_crm_polling_interval, tunnel_traffic_monitor, vmhost ): """ @@ -67,60 +77,68 @@ def test_active_tor_remove_neighbor_downstream_active( """ @contextlib.contextmanager - def remove_neighbor(ptfhost, duthost, server_ip, ip_version): + def remove_neighbor(ptfhost, duthost, server_ip, ip_version, neighbor_details): # restore ipv4 neighbor since it is statically configured - if ip_version == "ipv4": - restore = True - neighbor_advertise_process = "garp_service" - elif ip_version == "ipv6": - restore = False - neighbor_advertise_process = "arp_responder" - else: - raise ValueError("Unknown IP version '%s'" % ip_version) - flush_neighbor_ct = flush_neighbor(duthost, server_ip, restore=restore) + flush_neighbor_ct = flush_neighbor(duthost, server_ip, restore=ip_version == "ipv4" or "ipv6") try: - ptfhost.shell("supervisorctl stop %s" % neighbor_advertise_process) - with flush_neighbor_ct: + ptfhost.shell("supervisorctl stop arp_responder") + # stop garp_service since there is no equivalent in production + ptfhost.shell("supervisorctl stop garp_service") + with flush_neighbor_ct as flushed_neighbor: + neighbor_details.update(flushed_neighbor) yield finally: - ptfhost.shell("supervisorctl start %s" % neighbor_advertise_process) - - tor = rand_selected_dut - test_port, server_ip, ip_version = testbed_setup - - pkt, exp_pkt = build_packet_to_server(tor, ptfadapter, server_ip) - ptf_t1_intf = random.choice(get_t1_ptf_ports(tor, tbinfo)) - logging.info("send traffic to server %s from ptf t1 interface %s", server_ip, ptf_t1_intf) - server_traffic_monitor = ServerTrafficMonitor( - tor, ptfhost, vmhost, tbinfo, test_port, - conn_graph_facts, exp_pkt, existing=True, is_mocked=is_mocked_dualtor(tbinfo) - ) - tunnel_monitor = tunnel_traffic_monitor(tor, existing=False) - with crm_neighbor_checker(tor), tunnel_monitor, server_traffic_monitor: - testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), pkt, count=10) - - logging.info("send traffic to server %s after removing neighbor entry", server_ip) - server_traffic_monitor = ServerTrafficMonitor( - tor, ptfhost, vmhost, tbinfo, test_port, - conn_graph_facts, exp_pkt, existing=False, is_mocked=is_mocked_dualtor(tbinfo) - ) - remove_neighbor_ct = remove_neighbor(ptfhost, tor, server_ip, ip_version) - with crm_neighbor_checker(tor), remove_neighbor_ct, tunnel_monitor, server_traffic_monitor: - testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), pkt, count=10) - - logging.info("send traffic to server %s after neighbor entry is restored", server_ip) - server_traffic_monitor = ServerTrafficMonitor( - tor, ptfhost, vmhost, tbinfo, test_port, - conn_graph_facts, exp_pkt, existing=True, is_mocked=is_mocked_dualtor(tbinfo) - ) - with crm_neighbor_checker(tor), tunnel_monitor, server_traffic_monitor: - testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), pkt, count=10) + ptfhost.shell("supervisorctl start arp_responder") + duthost.shell("docker exec -it swss supervisorctl restart arp_update") + + try: + removed_neighbor = {} + tor = rand_selected_dut + test_port, server_ip, ip_version = testbed_setup + + pkt, exp_pkt = build_packet_to_server(tor, ptfadapter, server_ip) + ptf_t1_intf = random.choice(get_t1_ptf_ports(tor, tbinfo)) + logging.info("send traffic to server %s from ptf t1 interface %s", server_ip, ptf_t1_intf) + server_traffic_monitor = ServerTrafficMonitor( + tor, ptfhost, vmhost, tbinfo, test_port, + conn_graph_facts, exp_pkt, existing=True, is_mocked=is_mocked_dualtor(tbinfo) + ) + tunnel_monitor = tunnel_traffic_monitor(tor, existing=False) + with crm_neighbor_checker(tor, ip_version, expect_change=ip_version == "ipv6"), tunnel_monitor, server_traffic_monitor: + testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), pkt, count=10) + + logging.info("send traffic to server %s after removing neighbor entry", server_ip) + server_traffic_monitor = ServerTrafficMonitor( + tor, ptfhost, vmhost, tbinfo, test_port, + conn_graph_facts, exp_pkt, existing=False, is_mocked=is_mocked_dualtor(tbinfo) + ) + remove_neighbor_ct = remove_neighbor(ptfhost, tor, server_ip, ip_version, removed_neighbor) + with crm_neighbor_checker(tor, ip_version, expect_change=ip_version == "ipv6"), remove_neighbor_ct, tunnel_monitor, server_traffic_monitor: + testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), pkt, count=10) + # wait up to a minute for the neighbor entry to become reachable due to performance limitation on some testbeds/lab servers + pytest_assert(wait_until(60, 5, 0, lambda: neighbor_reachable(tor, server_ip))) + + logging.info("send traffic to server %s after neighbor entry is restored", server_ip) + server_traffic_monitor = ServerTrafficMonitor( + tor, ptfhost, vmhost, tbinfo, test_port, + conn_graph_facts, exp_pkt, existing=True, is_mocked=is_mocked_dualtor(tbinfo) + ) + with crm_neighbor_checker(tor, ip_version, expect_change=ip_version == "ipv6"), tunnel_monitor, server_traffic_monitor: + testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), pkt, count=10) + finally: + # try to recover the removed neighbor so test_downstream_ecmp_nexthops could have a healthy mocked device + if removed_neighbor: + if ip_version == "ipv4": + cmd = 'ip -4 neigh replace {} lladdr {} dev {}'.format(server_ip, removed_neighbor['lladdr'], removed_neighbor['dev']) + else: + cmd = 'ip -6 neigh replace {} lladdr {} dev {}'.format(server_ip, removed_neighbor['lladdr'], removed_neighbor['dev']) + tor.shell(cmd) + ptfhost.shell("supervisorctl start garp_service") def test_downstream_ecmp_nexthops( ptfadapter, rand_selected_dut, tbinfo, - require_mocked_dualtor, toggle_all_simulator_ports, - tor_mux_intfs, ip_version + toggle_all_simulator_ports, tor_mux_intfs, ip_version ): nexthops_count = 4 set_mux_state(rand_selected_dut, tbinfo, 'active', tor_mux_intfs, toggle_all_simulator_ports) diff --git a/tests/dualtor/test_orchagent_mac_move.py b/tests/dualtor/test_orchagent_mac_move.py index 1fb6cf8ac5e..692f69616d5 100644 --- a/tests/dualtor/test_orchagent_mac_move.py +++ b/tests/dualtor/test_orchagent_mac_move.py @@ -81,7 +81,6 @@ def enable_garp(duthost): def test_mac_move( - require_mocked_dualtor, announce_new_neighbor, apply_active_state_to_orchagent, conn_graph_facts, ptfadapter, ptfhost, rand_selected_dut, set_crm_polling_interval, diff --git a/tests/dualtor/test_orchagent_slb.py b/tests/dualtor/test_orchagent_slb.py index bced43051ae..169703843e7 100644 --- a/tests/dualtor/test_orchagent_slb.py +++ b/tests/dualtor/test_orchagent_slb.py @@ -200,9 +200,18 @@ def verify_traffic(duthost, connection, route, is_duthost_active=True, is_route_ is_tunnel_traffic_existed = is_route_existed and not is_duthost_active is_server_traffic_existed = is_route_existed and is_duthost_active - tunnel_innner_pkt = pkt[scapyall.IP].copy() - tunnel_innner_pkt[scapyall.IP].ttl -= 1 - tunnel_monitor = tunnel_traffic_monitor(duthost, existing=is_tunnel_traffic_existed, inner_packet=tunnel_innner_pkt) + if isinstance(prefix, ipaddress.IPv4Network): + tunnel_innner_pkt = pkt[scapyall.IP].copy() + tunnel_innner_pkt[scapyall.IP].ttl -= 1 + else: + tunnel_innner_pkt = pkt[scapyall.IPv6].copy() + tunnel_innner_pkt[scapyall.IPv6].hlim -= 1 + tunnel_monitor = tunnel_traffic_monitor( + duthost, + existing=is_tunnel_traffic_existed, + inner_packet=tunnel_innner_pkt, + check_items=["ttl", "queue"] + ) server_traffic_monitor = ServerTrafficMonitor( duthost, ptfhost, vmhost, tbinfo, connection["test_intf"], conn_graph_facts, exp_pkt, existing=is_server_traffic_existed diff --git a/tests/dualtor/test_orchagent_standby_tor_downstream.py b/tests/dualtor/test_orchagent_standby_tor_downstream.py index 8808fbaa50e..df369f03270 100644 --- a/tests/dualtor/test_orchagent_standby_tor_downstream.py +++ b/tests/dualtor/test_orchagent_standby_tor_downstream.py @@ -4,10 +4,10 @@ import logging import ipaddress import contextlib -import time from ptf import testutils from tests.common.dualtor.dual_tor_mock import * +from tests.common.dualtor.dual_tor_mock import set_mux_state, is_t0_mocked_dualtor, is_mocked_dualtor from tests.common.dualtor.dual_tor_utils import dualtor_info from tests.common.dualtor.dual_tor_utils import check_tunnel_balance from tests.common.dualtor.dual_tor_utils import flush_neighbor @@ -15,14 +15,16 @@ from tests.common.dualtor.dual_tor_utils import build_packet_to_server from tests.common.dualtor.dual_tor_utils import crm_neighbor_checker from tests.common.dualtor.dual_tor_utils import add_nexthop_routes, remove_static_routes -from tests.common.fixtures.ptfhost_utils import copy_ptftests_directory -from tests.common.fixtures.ptfhost_utils import change_mac_addresses -from tests.common.fixtures.ptfhost_utils import run_garp_service -from tests.common.fixtures.ptfhost_utils import run_icmp_responder # lgtm[py/unused-import] -from tests.common.helpers.assertions import pytest_require as pt_require -from tests.common.dualtor.tunnel_traffic_utils import tunnel_traffic_monitor +from tests.common.fixtures.ptfhost_utils import copy_ptftests_directory # noqa: F401 +from tests.common.fixtures.ptfhost_utils import change_mac_addresses # noqa: F401 +from tests.common.fixtures.ptfhost_utils import run_garp_service # noqa: F401 +from tests.common.fixtures.ptfhost_utils import run_icmp_responder # noqa: F401 # lgtm[py/unused-import] +from tests.common.helpers.assertions import pytest_assert as pt_assert +from tests.common.dualtor.tunnel_traffic_utils import tunnel_traffic_monitor # noqa: F401 from tests.common.dualtor.server_traffic_utils import ServerTrafficMonitor -from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports +from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports # noqa: F401 +from tests.common.dualtor.tor_failure_utils import shutdown_bgp_sessions # noqa: F401 +from tests.common.utilities import wait_until pytestmark = [ @@ -51,12 +53,15 @@ def setup_testbed_ipv6(ip_version, request): if ip_version == "ipv6": request.getfixturevalue("run_arp_responder_ipv6") +@pytest.fixture(scope='module') +def get_function_completeness_level(pytestconfig): + return pytestconfig.getoption("--completeness_level") @pytest.fixture -def get_testbed_params(ptfhost, rand_selected_dut, rand_unselected_dut, tbinfo, ip_version, setup_testbed_ipv6): +def get_testbed_params(ptfhost, rand_selected_dut, rand_unselected_dut, tbinfo, ip_version, setup_testbed_ipv6, get_function_completeness_level): """Return a function to get testbed params.""" def _get_testbed_params(): - params = dualtor_info(ptfhost, rand_selected_dut, rand_unselected_dut, tbinfo) + params = dualtor_info(ptfhost, rand_selected_dut, rand_unselected_dut, tbinfo, get_function_completeness_level) params["check_ipv6"] = (ip_version == "ipv6") return params @@ -125,7 +130,7 @@ def shutdown_one_bgp_session(rand_selected_dut): startup_bgp_session(rand_selected_dut, bgp_shutdown) -def test_standby_tor_downstream(rand_selected_dut, require_mocked_dualtor, get_testbed_params): +def test_standby_tor_downstream(rand_selected_dut, get_testbed_params): """ Verify tunnel traffic to active ToR is distributed equally across nexthops, and no traffic is forwarded to server from standby ToR @@ -135,8 +140,7 @@ def test_standby_tor_downstream(rand_selected_dut, require_mocked_dualtor, get_t def test_standby_tor_downstream_t1_link_recovered( - rand_selected_dut, require_mocked_dualtor, - verify_crm_nexthop_counter_not_increased, tbinfo, get_testbed_params + rand_selected_dut, verify_crm_nexthop_counter_not_increased, tbinfo, get_testbed_params ): """ Verify traffic is distributed evenly after t1 link is recovered; @@ -164,7 +168,7 @@ def test_standby_tor_downstream_t1_link_recovered( def test_standby_tor_downstream_bgp_recovered( - rand_selected_dut, require_mocked_dualtor, verify_crm_nexthop_counter_not_increased, + rand_selected_dut, verify_crm_nexthop_counter_not_increased, get_testbed_params, tbinfo ): """ @@ -172,8 +176,6 @@ def test_standby_tor_downstream_bgp_recovered( Verify traffic is distributed evenly after BGP session is recovered; Verify CRM that no new nexthop created """ - # require real dualtor, because for mocked testbed, the route to standby is mocked. - pt_require('dualtor' in tbinfo['topo']['name'], "Only run on dualtor testbed") PAUSE_TIME = 30 down_bgp = shutdown_random_one_bgp_session(rand_selected_dut) @@ -191,28 +193,81 @@ def test_standby_tor_downstream_bgp_recovered( check_tunnel_balance(**params) -def test_standby_tor_downstream_loopback_route_readded(rand_selected_dut, get_testbed_params, tbinfo): +def route_matches_expected_state(duthost, route_ip, expect_route): + get_route_cmd = "ip route | grep -w {}".format(route_ip) + rc = int(duthost.shell(get_route_cmd, module_ignore_errors=True)['rc']) + return rc == 0 if expect_route else 1 + + +@pytest.fixture +def remove_peer_loopback_route(rand_selected_dut, rand_unselected_dut, shutdown_bgp_sessions, get_testbed_params): # noqa: F811 + """ + Remove routes to peer ToR loopback IP by shutting down BGP sessions on the peer + """ + + def _remove_peer_loopback_route(): + if rand_unselected_dut is None: + # mocked testbed, remove the static route installed by + # apply_dual_tor_peer_switch_route from kernel + remove_static_routes(rand_selected_dut, active_tor_loopback0) + else: + shutdown_bgp_sessions(rand_unselected_dut) + # We need to maintain the expected active/standby state for the test + rand_unselected_dut.shell("config mux mode active all") + + active_tor_loopback0 = get_testbed_params()['active_tor_ip'] + + yield _remove_peer_loopback_route + + if rand_unselected_dut is None: + # mocked testbed, need to add back the static route to kernel + add_nexthop_routes(rand_selected_dut, active_tor_loopback0) + else: + # The `shutdown_bgp_sessions` fixture already restores BGP sessions during teardown so we + # don't need to do it here + rand_unselected_dut.shell("config mux mode auto all") + + +def test_standby_tor_downstream_loopback_route_readded( + rand_selected_dut, rand_unselected_dut, get_testbed_params, + tbinfo, remove_peer_loopback_route +): """ Verify traffic is equally distributed via loopback route """ - pt_require('dualtor' in tbinfo['topo']['name'], "Only run on dualtor testbed") params = get_testbed_params() active_tor_loopback0 = params['active_tor_ip'] - # Remove loopback routes and verify traffic is equally distributed - remove_static_routes(rand_selected_dut, active_tor_loopback0) + remove_peer_loopback_route() + pt_assert( + wait_until( + 10, 1, 0, + lambda: route_matches_expected_state(rand_selected_dut, active_tor_loopback0, expect_route=False)), + "Unexpected route {} found on {}".format(active_tor_loopback0, rand_selected_dut) + ) + # Verify traffic is equally distributed check_tunnel_balance(**params) # Readd loopback routes and verify traffic is equally distributed - add_nexthop_routes(rand_selected_dut, active_tor_loopback0) + if rand_unselected_dut is None: + # mocked testbed, need to add back the static route to kernel + add_nexthop_routes(rand_selected_dut, active_tor_loopback0) + else: + rand_unselected_dut.shell("config bgp start all") + pt_assert( + wait_until( + 10, 1, 0, + lambda: route_matches_expected_state(rand_selected_dut, active_tor_loopback0, expect_route=True)), + "Expected route {} not found on {}".format(active_tor_loopback0, rand_selected_dut) + ) check_tunnel_balance(**params) def test_standby_tor_remove_neighbor_downstream_standby( conn_graph_facts, ptfadapter, ptfhost, rand_selected_dut, rand_unselected_dut, tbinfo, - require_mocked_dualtor, set_crm_polling_interval, - tunnel_traffic_monitor, vmhost, get_testbed_params, + set_crm_polling_interval, tunnel_traffic_monitor, # noqa: F811 + vmhost, get_testbed_params, ip_version ): """ @@ -268,8 +323,8 @@ def stop_neighbor_advertiser(ptfhost, ip_version): def test_downstream_standby_mux_toggle_active( conn_graph_facts, ptfadapter, ptfhost, rand_selected_dut, rand_unselected_dut, tbinfo, - require_mocked_dualtor, tunnel_traffic_monitor, - vmhost, toggle_all_simulator_ports, tor_mux_intfs, + tunnel_traffic_monitor, vmhost, # noqa: F811 + toggle_all_simulator_ports, tor_mux_intfs, # noqa: F811 ip_version, get_testbed_params ): # set rand_selected_dut as standby and rand_unselected_dut to active tor diff --git a/tests/dualtor/test_standby_tor_upstream_mux_toggle.py b/tests/dualtor/test_standby_tor_upstream_mux_toggle.py index 10b0f67c9b6..ae94c62dab3 100644 --- a/tests/dualtor/test_standby_tor_upstream_mux_toggle.py +++ b/tests/dualtor/test_standby_tor_upstream_mux_toggle.py @@ -31,7 +31,7 @@ def test_cleanup(rand_selected_dut): def test_standby_tor_upstream_mux_toggle( rand_selected_dut, tbinfo, ptfadapter, rand_selected_interface, - require_mocked_dualtor, toggle_all_simulator_ports, set_crm_polling_interval): + toggle_all_simulator_ports, set_crm_polling_interval): itfs, ip = rand_selected_interface PKT_NUM = 100 # Step 1. Set mux state to standby and verify traffic is dropped by ACL rule and drop counters incremented @@ -77,5 +77,3 @@ def test_standby_tor_upstream_mux_toggle( crm_facts1 = rand_selected_dut.get_crm_facts() unmatched_crm_facts = compare_crm_facts(crm_facts0, crm_facts1) pt_assert(len(unmatched_crm_facts)==0, 'Unmatched CRM facts: {}'.format(json.dumps(unmatched_crm_facts, indent=4))) - - diff --git a/tests/dualtor/test_toggle_mux.py b/tests/dualtor/test_toggle_mux.py index 7c57cee4418..4f1dfb3bcc3 100644 --- a/tests/dualtor/test_toggle_mux.py +++ b/tests/dualtor/test_toggle_mux.py @@ -1,11 +1,10 @@ -import json import logging - +import json import pytest from tests.common.dualtor.constants import UPPER_TOR, LOWER_TOR -from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports -from tests.common.dualtor.mux_simulator_control import get_mux_status +from tests.common.dualtor.mux_simulator_control import check_mux_status, validate_check_result +from tests.common.dualtor.dual_tor_utils import update_linkmgrd_probe_interval from tests.common.utilities import wait_until @@ -15,6 +14,8 @@ logger = logging.getLogger(__name__) +DEFAUL_INTERVAL_V4 = 100 + @pytest.fixture(scope="module", autouse=True) def check_topo(tbinfo): @@ -29,89 +30,54 @@ def restore_mux_auto_mode(duthosts): duthosts.shell('config muxcable mode auto all') -def check_mux_status(duthosts, active_side): - """Verify that status of muxcables are expected - - This function runs "show muxcable status --json" on both ToRs. Before call this function, active side of all - mux cables must be toggled to one side of the ToR. Active side ToR should be indicated in argument "active_side". - - This function will ensure that on one ToR, all the mux cables are active. On the other ToR, all the mux cable - should be standby. - - Args: - duthosts (list): List of duthost objects - active_side (str): Active side of all mux cables, either UPPER_TOR or LOWER_TOR - - Returns: - bool: True if check passed. Otherwise, return False. - """ - if active_side == UPPER_TOR: - mux_active_dut = duthosts[0] - mux_standby_dut = duthosts[1] +@pytest.fixture(scope="module") +def get_interval_v4(duthosts): + mux_linkmgr_output = duthosts.shell('sonic-cfggen -d --var-json MUX_LINKMGR') + mux_linkmgr = list(mux_linkmgr_output.values())[0]['stdout'] + if len(mux_linkmgr) != 0: + cur_interval_v4 = json.loads(mux_linkmgr)['LINK_PROBER']['interval_v4'] + return cur_interval_v4 else: - mux_active_dut = duthosts[1] - mux_standby_dut = duthosts[0] + return None - active_side_muxstatus = json.loads(mux_active_dut.shell("show muxcable status --json")['stdout']) - standby_side_muxstatus = json.loads(mux_standby_dut.shell("show muxcable status --json")['stdout']) - active_side_active_muxcables = [intf for intf, muxcable in active_side_muxstatus['MUX_CABLE'].items() if muxcable['STATUS'] == 'active'] - active_side_standby_muxcables = [intf for intf, muxcable in active_side_muxstatus['MUX_CABLE'].items() if muxcable['STATUS'] == 'standby'] +@pytest.fixture(scope="module") +def reset_link_prober_interval_v4(duthosts, get_interval_v4, tbinfo): + cur_interval_v4 = get_interval_v4 + if cur_interval_v4 is not None: + update_linkmgrd_probe_interval(duthosts, tbinfo, DEFAUL_INTERVAL_V4) - standby_side_active_muxcables = [intf for intf, muxcable in standby_side_muxstatus['MUX_CABLE'].items() if muxcable['STATUS'] == 'active'] - standby_side_standby_muxcables = [intf for intf, muxcable in standby_side_muxstatus['MUX_CABLE'].items() if muxcable['STATUS'] == 'standby'] + # NOTE: as there is no icmp_responder running, the device is stucked in consistently probing + # the mux status. If there is a previous case that has fixture run_icmp_responder called, the + # link prober interval is changed into 1000ms, the mux probing interval could be 384s at most. + # So after a hardware mux change, SONiC is only able to learn the change after 384s in worst case. + # To accelerate this, let's restarting linkmgrd to break out from the probing loop firstly and + # change the the probing interval back to 100ms to reduce the future probing interval maximum + # down to 38.4s. + duthosts.shell("docker exec mux supervisorctl restart linkmgrd") - if len(active_side_active_muxcables) > 0 and \ - len(active_side_standby_muxcables) == 0 and \ - len(standby_side_active_muxcables) == 0 and \ - len(standby_side_standby_muxcables) > 0 and \ - set(active_side_active_muxcables) == set(standby_side_standby_muxcables): - logger.info('Check mux status on DUTs passed') - return True - else: - logger.info('Unexpected mux status. active_side={}'.format(active_side)) - logger.info('Active side active muxcables: {}'.format(active_side_active_muxcables)) - logger.info('Active side standby muxcables: {}'.format(active_side_standby_muxcables)) - logger.info('Standby side active muxcables: {}'.format(standby_side_active_muxcables)) - logger.info('Standby side standby muxcables: {}'.format(standby_side_standby_muxcables)) - logger.info('Check mux status on DUTs failed') - return False - - -def validate_check_result(check_result, duthosts): - """If check_result is False, collect some log and fail the test. - - Args: - check_result (bool): Check result - duthosts (list): List of duthost objects. - """ - if not check_result: - duthosts.shell('show muxcable config') - duthosts.shell('show muxcable status') - simulator_muxstatus = get_mux_status() - if simulator_muxstatus is not None: - logger.info('Mux status from mux simulator: {}'.format(json.dumps(simulator_muxstatus))) - else: - logger.error('Failed to get mux status from mux simulator') - pytest.fail('Toggle mux from simulator test failed') + yield + + if cur_interval_v4 is not None: + update_linkmgrd_probe_interval(duthosts, tbinfo, cur_interval_v4) @pytest.mark.parametrize("active_side", [UPPER_TOR, LOWER_TOR]) -def test_toggle_mux_from_simulator(duthosts, active_side, toggle_all_simulator_ports, get_mux_status, restore_mux_auto_mode): +def test_toggle_mux_from_simulator(duthosts, active_side, toggle_all_simulator_ports, + get_mux_status, reset_link_prober_interval_v4, restore_mux_auto_mode): logger.info('Set all muxcable to manual mode on all ToRs') duthosts.shell('config muxcable mode manual all') logger.info('Toggle mux active side from mux simulator') toggle_all_simulator_ports(active_side) - check_result = wait_until(10, 2, 2, check_mux_status, duthosts, active_side) - - validate_check_result(check_result, duthosts) + check_result = wait_until(60, 5, 2, check_mux_status, duthosts, active_side) + validate_check_result(check_result, duthosts, get_mux_status) @pytest.mark.parametrize("active_side", [UPPER_TOR, LOWER_TOR]) -def test_toggle_mux_from_cli(duthosts, active_side, restore_mux_auto_mode): - +def test_toggle_mux_from_cli(duthosts, active_side, get_mux_status, + reset_link_prober_interval_v4, restore_mux_auto_mode): logger.info('Reset muxcable mode to auto for all ports on all DUTs') duthosts.shell('config muxcable mode auto all') @@ -122,6 +88,5 @@ def test_toggle_mux_from_cli(duthosts, active_side, restore_mux_auto_mode): mux_active_dut = duthosts[1] mux_active_dut.shell('config muxcable mode active all') - check_result = wait_until(10, 2, 2, check_mux_status, duthosts, active_side) - - validate_check_result(check_result, duthosts) + check_result = wait_until(60, 5, 2, check_mux_status, duthosts, active_side) + validate_check_result(check_result, duthosts, get_mux_status) diff --git a/tests/dualtor/test_tor_ecn.py b/tests/dualtor/test_tor_ecn.py index 37e5ec802ff..5f2149ce8ff 100644 --- a/tests/dualtor/test_tor_ecn.py +++ b/tests/dualtor/test_tor_ecn.py @@ -28,12 +28,16 @@ from tests.common.fixtures.ptfhost_utils import run_garp_service from tests.common.fixtures.ptfhost_utils import change_mac_addresses from tests.common.utilities import dump_scapy_packet_show_output -from tests.common.dualtor.tunnel_traffic_utils import derive_queue_id_from_dscp +from tests.common.dualtor.tunnel_traffic_utils import derive_queue_id_from_dscp, derive_out_dscp_from_inner_dscp +from tests.common.dualtor.dual_tor_utils import is_tunnel_qos_remap_enabled pytestmark = [ - pytest.mark.topology("t0") + pytest.mark.topology("dualtor") ] +# The packet number for test +PACKET_NUM = 100 + @contextlib.contextmanager def stop_garp(ptfhost): """Temporarily stop garp service.""" @@ -69,12 +73,12 @@ def setup_dualtor_tor_standby( else: request.getfixturevalue('toggle_all_simulator_ports_to_rand_selected_tor') -@pytest.fixture(scope="function") + def build_encapsulated_ip_packet( - rand_selected_interface, - ptfadapter, - rand_selected_dut, - tunnel_traffic_monitor + inner_dscp, + rand_selected_interface, + ptfadapter, + rand_selected_dut ): """ Build the encapsulated packet to be sent from T1 to ToR. @@ -92,9 +96,14 @@ def build_encapsulated_ip_packet( if is_ipv4_address(addr.split("/")[0])][0] tor_ipv4_address = tor_ipv4_address.split("/")[0] - inner_dscp = random.choice(range(0, 33)) - inner_ttl = random.choice(range(3, 65)) - inner_ecn = random.choice(range(0,3)) + inner_ttl = random.choice(list(range(3, 65))) + inner_ecn = random.choice(list(range(0, 3))) + if is_tunnel_qos_remap_enabled(tor): + outer_dscp = derive_out_dscp_from_inner_dscp(tor, inner_dscp) + outer_ecn = inner_ecn + + logging.info("Inner DSCP: {0:06b}, Inner ECN: {1:02b}".format(inner_dscp, inner_ecn)) + logging.info("Outer DSCP: {0:06b}, Outer ECN: {1:02b}".format(outer_dscp, outer_ecn)) inner_packet = testutils.simple_ip_packet( ip_src="1.1.1.1", @@ -108,21 +117,21 @@ def build_encapsulated_ip_packet( eth_src=ptfadapter.dataplane.get_mac(0, 0), ip_src=peer_ipv4_address, ip_dst=tor_ipv4_address, - ip_dscp=inner_dscp, + ip_dscp=outer_dscp, ip_ttl=255, - ip_ecn=inner_ecn, + ip_ecn=outer_ecn, inner_frame=inner_packet ) logging.info("the encapsulated packet to send:\n%s", dump_scapy_packet_show_output(packet)) return packet -@pytest.fixture(scope="function") + def build_non_encapsulated_ip_packet( - rand_selected_interface, - ptfadapter, - rand_selected_dut, - tunnel_traffic_monitor + dscp, + rand_selected_interface, + ptfadapter, + rand_selected_dut ): """ Build the regular (non encapsulated) packet to be sent from T1 to ToR. @@ -140,9 +149,9 @@ def build_non_encapsulated_ip_packet( if is_ipv4_address(addr.split("/")[0])][0] tor_ipv4_address = tor_ipv4_address.split("/")[0] - dscp = random.choice(range(0, 33)) ttl = random.choice(range(3, 65)) ecn = random.choice(range(0,3)) + logging.info("DSCP: {0:06b}, ECN: {1:02b}".format(dscp, ecn)) packet = testutils.simple_ip_packet( eth_dst=tor.facts["router_mac"], @@ -158,8 +167,8 @@ def build_non_encapsulated_ip_packet( return packet def get_ptf_server_intf_index( - tor, - tbinfo, + tor, + tbinfo, iface ): """ @@ -187,41 +196,45 @@ def build_expected_packet_to_server( return exp_pkt -def get_queue_id_of_received_packet( - duthosts, - rand_one_dut_hostname, - rand_selected_interface +def check_received_packet_on_expected_queue( + duthosts, + rand_one_dut_hostname, + rand_selected_interface, + expected_queue ): """ - Get queue id of the packet received on destination + Check if received expected number of packets on expected queue """ duthost = duthosts[rand_one_dut_hostname] queue_counter = duthost.shell('show queue counters {} | grep "UC"'.format(rand_selected_interface[0]))['stdout'] logging.info('queue_counter:\n{}'.format(queue_counter)) - """ + """ regex search will look for following pattern in queue_counter o/p for interface ----------------------------------------------------------------------------_--- Port TxQ Counter/pkts Counter/bytes Drop/pkts Drop/bytes ----------- ----- -------------- --------------- ----------- -------------- - Ethernet124 UC1 10 1000 0 0 + Ethernet124 UC1 100 1000 0 0 """ - result = re.search(r'\S+\s+UC\d\s+10+\s+\S+\s+\S+\s+\S+', queue_counter) - - if result is not None: - output = result.group(0) - output_list = output.split() - queue = int(output_list[1][2]) + # In case of other noise packets + DIFF = 0.1 + result = re.findall(r'\S+\s+UC%d\s+(\d+)+\s+\S+\s+\S+\s+\S+' % expected_queue, queue_counter) + + if result: + for number in result: + if int(number) <= PACKET_NUM * (1 + DIFF) and int(number) >= PACKET_NUM: + logging.info("the expected Queue : {} received expected numbers of packet {}".format(expected_queue, number)) + return True + logging.debug("the expected Queue : {} did not receive expected numbers of packet : {}".format(expected_queue, PACKET_NUM)) + return False else: - logging.info("Error occured while fetching queue counters from DUT") - return None - - return queue + logging.debug("Could not find expected queue counter matches.") + return False def verify_ecn_on_received_packet( - ptfadapter, - exp_pkt, - exp_ptf_port_index, + ptfadapter, + exp_pkt, + exp_ptf_port_index, exp_ecn ): """ @@ -239,18 +252,18 @@ def verify_ecn_on_received_packet( else: logging.info("the expected ECN: {0:02b} matching with received ECN: {0:02b}".format(exp_ecn, rec_ecn)) +@pytest.mark.parametrize("inner_dscp", [3, 4, 2, 6]) #lossless queue is 3 or 4 or 2 or 6. def test_dscp_to_queue_during_decap_on_active( - ptfhost, setup_dualtor_tor_active, - build_encapsulated_ip_packet, request, - rand_selected_interface, ptfadapter, - tbinfo, rand_selected_dut, tunnel_traffic_monitor, + inner_dscp, ptfhost, setup_dualtor_tor_active, + request, rand_selected_interface, ptfadapter, + tbinfo, rand_selected_dut, tunnel_traffic_monitor, duthosts, rand_one_dut_hostname ): """ Test if DSCP to Q mapping for inner header is matching with outer header during decap on active """ tor = rand_selected_dut - encapsulated_packet = build_encapsulated_ip_packet + encapsulated_packet = build_encapsulated_ip_packet(inner_dscp, rand_selected_interface, ptfadapter, rand_selected_dut) iface, _ = rand_selected_interface exp_ptf_port_index = get_ptf_server_intf_index(tor, tbinfo, iface) @@ -265,23 +278,23 @@ def test_dscp_to_queue_during_decap_on_active( ptfadapter.dataplane.flush() ptf_t1_intf = random.choice(get_t1_ptf_ports(tor, tbinfo)) logging.info("send encapsulated packet from ptf t1 interface %s", ptf_t1_intf) - testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), encapsulated_packet, count=10) + testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), encapsulated_packet, count=PACKET_NUM) exp_tos = encapsulated_packet[IP].payload[IP].tos exp_dscp = exp_tos >> 2 - exp_queue = derive_queue_id_from_dscp(exp_dscp) + exp_queue = derive_queue_id_from_dscp(duthost, exp_dscp, False) _, rec_pkt = testutils.verify_packet_any_port(ptfadapter, exp_pkt, ports=[exp_ptf_port_index], timeout=10) rec_pkt = Ether(rec_pkt) logging.info("received decap packet:\n%s", dump_scapy_packet_show_output(rec_pkt)) time.sleep(10) - rec_queue = get_queue_id_of_received_packet(duthosts, rand_one_dut_hostname, rand_selected_interface) + check_result = check_received_packet_on_expected_queue(duthosts, rand_one_dut_hostname, rand_selected_interface, exp_queue) - if rec_queue == None or rec_queue != exp_queue: - pytest.fail("the expected Queue : {} not matching with received Queue : {}".format(exp_queue, rec_queue)) + if not check_result: + pytest.fail("the expected Queue : {} did not receive expected numbers of packet : {}".format(exp_queue, PACKET_NUM)) else: - logging.info("the expected Queue : {} matching with received Queue : {}".format(exp_queue, rec_queue)) + logging.info("the expected Queue : {} received expected numbers of packet {}".format(exp_queue, PACKET_NUM)) @pytest.fixture(scope='module') def write_standby(rand_selected_dut): @@ -295,14 +308,15 @@ def runcmd(): except: pytest.skip('file {} not found'.format(file)) +@pytest.mark.parametrize("dscp", [3, 4, 2, 6]) #lossless queue is 3 or 4 or 2 or 6. def test_dscp_to_queue_during_encap_on_standby( + dscp, setup_dualtor_tor_standby, - build_non_encapsulated_ip_packet, rand_selected_interface, ptfadapter, - tbinfo, - rand_selected_dut, - tunnel_traffic_monitor, - duthosts, + tbinfo, + rand_selected_dut, + tunnel_traffic_monitor, + duthosts, rand_one_dut_hostname, write_standby ): @@ -312,7 +326,7 @@ def test_dscp_to_queue_during_encap_on_standby( write_standby() tor = rand_selected_dut - non_encapsulated_packet = build_non_encapsulated_ip_packet + non_encapsulated_packet = build_non_encapsulated_ip_packet(dscp, rand_selected_interface, ptfadapter, rand_selected_dut) iface, _ = rand_selected_interface exp_ptf_port_index = get_ptf_server_intf_index(tor, tbinfo, iface) @@ -325,20 +339,20 @@ def test_dscp_to_queue_during_encap_on_standby( ptfadapter.dataplane.flush() ptf_t1_intf = random.choice(get_t1_ptf_ports(tor, tbinfo)) logging.info("send IP packet from ptf t1 interface %s", ptf_t1_intf) - with tunnel_traffic_monitor(tor, existing=True): - testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), non_encapsulated_packet, count=10) + with tunnel_traffic_monitor(tor, existing=True, packet_count=PACKET_NUM): + testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), non_encapsulated_packet, count=PACKET_NUM) +@pytest.mark.parametrize("inner_dscp", [3, 4, 2, 6]) #lossless queue is 3 or 4 or 2 or 6. def test_ecn_during_decap_on_active( - ptfhost, setup_dualtor_tor_active, - build_encapsulated_ip_packet, request, - rand_selected_interface, ptfadapter, + inner_dscp, ptfhost, setup_dualtor_tor_active, + request, rand_selected_interface, ptfadapter, tbinfo, rand_selected_dut, tunnel_traffic_monitor ): """ Test if the ECN stamping on inner header is matching with outer during decap on active """ tor = rand_selected_dut - encapsulated_packet = build_encapsulated_ip_packet + encapsulated_packet = build_encapsulated_ip_packet(inner_dscp, rand_selected_interface, ptfadapter, rand_selected_dut) iface, _ = rand_selected_interface exp_ptf_port_index = get_ptf_server_intf_index(tor, tbinfo, iface) @@ -350,13 +364,17 @@ def test_ecn_during_decap_on_active( exp_tos = encapsulated_packet[IP].payload[IP].tos exp_ecn = exp_tos & 3 with stop_garp(ptfhost): + tor.shell("portstat -c") + tor.shell("show arp") ptfadapter.dataplane.flush() - testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), encapsulated_packet, count=10) + testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), encapsulated_packet, count=PACKET_NUM) + tor.shell("portstat -j") verify_ecn_on_received_packet(ptfadapter, exp_pkt, exp_ptf_port_index, exp_ecn) +@pytest.mark.parametrize("dscp", [3, 4, 2, 6]) #lossless queue is 3 or 4 or 2 or 6. def test_ecn_during_encap_on_standby( + dscp, setup_dualtor_tor_standby, - build_non_encapsulated_ip_packet, rand_selected_interface, ptfadapter, tbinfo, rand_selected_dut, tunnel_traffic_monitor, write_standby @@ -367,12 +385,12 @@ def test_ecn_during_encap_on_standby( write_standby() tor = rand_selected_dut - non_encapsulated_packet = build_non_encapsulated_ip_packet + non_encapsulated_packet = build_non_encapsulated_ip_packet(dscp, rand_selected_interface, ptfadapter, rand_selected_dut) iface, _ = rand_selected_interface exp_ptf_port_index = get_ptf_server_intf_index(tor, tbinfo, iface) ptf_t1_intf = random.choice(get_t1_ptf_ports(tor, tbinfo)) logging.info("send IP packet from ptf t1 interface %s", ptf_t1_intf) - with tunnel_traffic_monitor(tor, existing=True): - testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), non_encapsulated_packet, count=10) + with tunnel_traffic_monitor(tor, existing=True, packet_count=PACKET_NUM): + testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), non_encapsulated_packet, count=PACKET_NUM) diff --git a/tests/dualtor/test_tunnel_memory_leak.py b/tests/dualtor/test_tunnel_memory_leak.py new file mode 100644 index 00000000000..3200a3b3605 --- /dev/null +++ b/tests/dualtor/test_tunnel_memory_leak.py @@ -0,0 +1,185 @@ +""" +1. On a dual ToR testbed, confirm that the tunnel packet handler service is running +in the SWSS container on active Tor (supervisorctl status tunnel_packet_handler) +2. Send a continuous stream of IPinIP packets similar to those sent from the standby +ToR to the active ToR +3. Check SWSS container memory consumption +""" +import pytest +import logging +import random +import time +import contextlib +from ptf import testutils +from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_upper_tor # noqa: F401 +from tests.common.dualtor.dual_tor_utils import upper_tor_host, lower_tor_host # noqa: F401 +from tests.common.dualtor.server_traffic_utils import ServerTrafficMonitor +from tests.common.helpers.assertions import pytest_assert +from tests.common.dualtor.dual_tor_utils import get_t1_ptf_ports +from tests.common.dualtor.dual_tor_utils import mux_cable_server_ip +from tests.common.dualtor.dual_tor_utils import build_packet_to_server +from tests.common.dualtor.dual_tor_utils import delete_neighbor +from tests.common.helpers.dut_utils import get_program_info +from tests.common.fixtures.ptfhost_utils import run_garp_service, run_icmp_responder # noqa: F401 +from tests.common.utilities import wait_until + + +pytestmark = [ + pytest.mark.topology("dualtor") +] + +PACKET_COUNT = 1000 +swss_mem_percent = 0 +# It's normal to see the mem usage increased a little bit +# set threshold buffer to 0.02% +MEM_THRESHOLD_BUFFER = 0.02 + + +def validate_neighbor_entry_exist(duthost, neighbor_addr): + """Validate if neighbor entry exist on duthost + + Args: + duthost (AnsibleHost): Device Under Test (DUT) + neighbor_addr (str): neighbor's ip address + + Returns: + bool: True if neighbor exists. Otherwise, return False. + """ + command = "ip neighbor show %s" % neighbor_addr + output = [_.strip() for _ in duthost.shell(command)["stdout_lines"]] + if not output or "REACHABLE" not in output[0]: + return False + return True + + +def is_tunnel_packet_handler_running(duthost): + """Check if tunnel_packet_handler is running on duthost + + Args: + duthost (AnsibleHost): Device Under Test (DUT) + + Returns: + bool: True if tunnel_packet_handler is running. Otherwise, return False. + """ + status, _ = get_program_info(duthost, "swss", "tunnel_packet_handler") + return status == 'RUNNING' + + +def check_memory_leak(duthost): + """Check if it has memory leak on duthost + + Args: + duthost (AnsibleHost): Device Under Test (DUT) + + Returns: + bool: True if there is memory leak. Otherwise, return False. + """ + global swss_mem_percent + stdout_lines = duthost.command("docker stats swss --no-stream")["stdout_lines"] + header = stdout_lines[0] + # Find the position of category "MEM USAGE", "MEM %" and "NET I/O" + pos1 = header.index("MEM USAGE") + pos2 = header.index("MEM %") + pos3 = header.index("NET I/O") + if len(stdout_lines) < 2: + pytest.fail("Collect swss stat failed, swss container may die.") + line = stdout_lines[-1] + # Get the value of "MEM USAGE", "LIMIT" and "MEM %" + mem_info = line[pos1:pos2].strip().split("/") + mem_usage = mem_info[0].strip() + mem_limit = mem_info[1].strip() + mem_percent = line[pos2:pos3].strip() + + logging.info("SWSS MEM USAGE:{} LIMIT:{} PERCENT:{}".format(mem_usage, mem_limit, mem_percent)) + + mem_percent = float(mem_percent[:-1]) + if not swss_mem_percent: + # Save swss mem usage at the first time. + swss_mem_percent = mem_percent + logging.info("SWSS container original MEM USAGE:{} original percent: {}%".format(mem_usage, swss_mem_percent)) + return False + elif mem_percent > swss_mem_percent + MEM_THRESHOLD_BUFFER: + logging.error("SWSS container MEM percent is increased. current percent:{}%, original percent: {}%" + .format(mem_percent, swss_mem_percent)) + return True + return False + + +def test_tunnel_memory_leak(toggle_all_simulator_ports_to_upper_tor, upper_tor_host, lower_tor_host, # noqa: F811 + ptfhost, ptfadapter, conn_graph_facts, tbinfo, vmhost, run_arp_responder): # noqa: F811 + """ + Test if there is memory leak for service tunnel_packet_handler. + Send ip packets from standby TOR T1 to Server, standby TOR will + forward the packets to active TOR with tunnel, active TOR will + decapsulate the IPinIP packets, but there is no neighbor for destination + as we remove neighbor before test, tunnel_packet_handler will be + triggered and neighbor will be added. Server will receive the packets. + Check if memory usage is increased after tunnel_packet_handler's + operation. Since tunnel_packet_handler is only triggered by the + first packet, loop the process for all severs to trigger it as much + as possible. + """ + @contextlib.contextmanager + def prepare_services(ptfhost): + """ + Temporarily start arp and icmp service. Make sure to stop garp service, + otherwise, it will add neighbor entry back automatically. + It has to stop garp_service for triggering tunnel_packet_handler. + It has to start arp and icmp service for receiving packets at server side. + """ + ptfhost.shell("supervisorctl stop garp_service") + ptfhost.shell("supervisorctl start arp_responder") + ptfhost.shell("supervisorctl start icmp_responder") + yield + ptfhost.shell("supervisorctl stop arp_responder") + ptfhost.shell("supervisorctl stop icmp_responder") + + pytest_assert(is_tunnel_packet_handler_running(upper_tor_host), + "tunnel_packet_handler is not running in SWSS conainter.") + + ptf_t1_intf = random.choice(get_t1_ptf_ports(lower_tor_host, tbinfo)) + + all_servers_ips = mux_cable_server_ip(upper_tor_host) + unexpected_count = 0 + expected_count = 0 + + with prepare_services(ptfhost): + # Get the original memeory percent before test + check_memory_leak(upper_tor_host) + for iface, server_ips in all_servers_ips.items(): + server_ipv4 = server_ips["server_ipv4"].split("/")[0] + logging.info("Select DUT interface {} and server IP {} to test.".format(iface, server_ipv4)) + + pkt, exp_pkt = build_packet_to_server(lower_tor_host, ptfadapter, server_ipv4) + + pytest_assert(wait_until(10, 1, 0, delete_neighbor, upper_tor_host, server_ipv4), + "server ip {} hasn't been deleted from neighbor table.".format(server_ipv4)) + + server_traffic_monitor = ServerTrafficMonitor( + upper_tor_host, ptfhost, vmhost, tbinfo, iface, + conn_graph_facts, exp_pkt, existing=True, is_mocked=False + ) + try: + with server_traffic_monitor: + testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), pkt, count=PACKET_COUNT) + logging.info("Sent {} packets from ptf t1 interface {} on standby TOR {}" + .format(PACKET_COUNT, ptf_t1_intf, lower_tor_host.hostname)) + # Check memory usage for every operation, used for debugging if test failed + check_memory_leak(upper_tor_host) + pytest_assert(validate_neighbor_entry_exist(upper_tor_host, server_ipv4), + "The server ip {} doesn't exist in neighbor table on dut {}. \ + tunnel_packet_handler isn't triggered.".format(server_ipv4, upper_tor_host.hostname)) + except Exception as e: + logging.error("Capture exception {}, continue the process.".format(repr(e))) + if len(server_traffic_monitor.matched_packets) == 0: + logging.error("Didn't receive any expected packets for server {}.".format(server_ipv4)) + unexpected_count += 1 + else: + expected_count += 1 + logging.info("The amount of expected scenarios: {}, the amount of unexpected scenarios: {}." + .format(expected_count, unexpected_count)) + # sleep 10s to wait memory usage stable, check if there is memory leak + time.sleep(10) + check_result = check_memory_leak(upper_tor_host) + pytest_assert(check_result is False, "Test failed because there is memory leak on {}" + .format(upper_tor_host.hostname)) diff --git a/tests/dualtor_io/test_heartbeat_failure.py b/tests/dualtor_io/test_heartbeat_failure.py index 2db9bc6a3ac..32be14e19e3 100644 --- a/tests/dualtor_io/test_heartbeat_failure.py +++ b/tests/dualtor_io/test_heartbeat_failure.py @@ -15,6 +15,20 @@ ] +@pytest.fixture(autouse=True) +def ignore_expected_loganalyzer_exception(loganalyzer, duthosts): + + ignore_errors = [ + r".* ERR monit.*: 'container_checker' status failed \(3\) -- Expected containers not running: mux" + ] + + if loganalyzer: + for duthost in duthosts: + loganalyzer[duthost.hostname].ignore_regex.extend(ignore_errors) + + return None + + def test_active_tor_heartbeat_failure_upstream( toggle_all_simulator_ports_to_upper_tor, upper_tor_host, lower_tor_host, send_server_to_t1_with_action, diff --git a/tests/dualtor_io/test_normal_op.py b/tests/dualtor_io/test_normal_op.py index 543bcae1f1b..d8e8f23a6e4 100644 --- a/tests/dualtor_io/test_normal_op.py +++ b/tests/dualtor_io/test_normal_op.py @@ -46,6 +46,7 @@ def test_normal_op_downstream_standby(upper_tor_host, lower_tor_host, expected_standby_host=lower_tor_host) +@pytest.mark.disable_loganalyzer def test_active_config_reload_upstream(upper_tor_host, lower_tor_host, send_server_to_t1_with_action, toggle_all_simulator_ports_to_upper_tor): @@ -59,6 +60,7 @@ def test_active_config_reload_upstream(upper_tor_host, lower_tor_host, expected_standby_host=upper_tor_host) +@pytest.mark.disable_loganalyzer def test_standby_config_reload_upstream(upper_tor_host, lower_tor_host, send_server_to_t1_with_action, toggle_all_simulator_ports_to_upper_tor): @@ -72,6 +74,7 @@ def test_standby_config_reload_upstream(upper_tor_host, lower_tor_host, expected_standby_host=lower_tor_host) +@pytest.mark.disable_loganalyzer def test_standby_config_reload_downstream_active(upper_tor_host, lower_tor_host, send_t1_to_server_with_action, @@ -86,6 +89,7 @@ def test_standby_config_reload_downstream_active(upper_tor_host, expected_standby_host=lower_tor_host) +@pytest.mark.disable_loganalyzer def test_active_config_reload_downstream_standby(upper_tor_host, lower_tor_host, send_t1_to_server_with_action, diff --git a/tests/dualtor_io/test_tor_failure.py b/tests/dualtor_io/test_tor_failure.py index 58b7eb11250..6330781759a 100644 --- a/tests/dualtor_io/test_tor_failure.py +++ b/tests/dualtor_io/test_tor_failure.py @@ -36,6 +36,7 @@ def toggle_lower_tor_pdu(lower_tor_host, get_pdu_controller): return lambda: toggle_pdu_outlet(pdu_controller) +@pytest.mark.disable_loganalyzer def test_active_tor_reboot_upstream( upper_tor_host, lower_tor_host, send_server_to_t1_with_action, toggle_all_simulator_ports_to_upper_tor, toggle_upper_tor_pdu, @@ -56,6 +57,7 @@ def test_active_tor_reboot_upstream( ) +@pytest.mark.disable_loganalyzer def test_active_tor_reboot_downstream_standby( upper_tor_host, lower_tor_host, send_t1_to_server_with_action, toggle_all_simulator_ports_to_upper_tor, toggle_upper_tor_pdu, @@ -76,6 +78,7 @@ def test_active_tor_reboot_downstream_standby( ) +@pytest.mark.disable_loganalyzer def test_standby_tor_reboot_upstream( upper_tor_host, lower_tor_host, send_server_to_t1_with_action, toggle_all_simulator_ports_to_upper_tor, toggle_lower_tor_pdu, @@ -96,6 +99,7 @@ def test_standby_tor_reboot_upstream( ) +@pytest.mark.disable_loganalyzer def test_standby_tor_reboot_downstream_active( upper_tor_host, lower_tor_host, send_t1_to_server_with_action, toggle_all_simulator_ports_to_upper_tor, toggle_lower_tor_pdu, diff --git a/tests/everflow/everflow_test_utilities.py b/tests/everflow/everflow_test_utilities.py index 3966e675b7b..7a38bfdefcb 100644 --- a/tests/everflow/everflow_test_utilities.py +++ b/tests/everflow/everflow_test_utilities.py @@ -15,6 +15,7 @@ from abc import abstractmethod from ptf.mask import Mask from tests.common.helpers.assertions import pytest_assert +from tests.common.helpers.constants import UPSTREAM_NEIGHBOR_MAP, DOWNSTREAM_NEIGHBOR_MAP import json # TODO: Add suport for CONFIGLET mode @@ -43,9 +44,11 @@ VLAN_BASE_MAC_PATTERN = "72060001{:04}" DOWN_STREAM = "downstream" UP_STREAM = "upstream" +# Topo that downstream neighbor of DUT are servers +DOWNSTREAM_SERVER_TOPO = ["t0", "m0_vlan"] @pytest.fixture(scope="module") -def setup_info(duthosts, rand_one_dut_hostname, tbinfo): +def setup_info(duthosts, rand_one_dut_hostname, tbinfo, topo_scenario): """ Gather all required test information. @@ -60,74 +63,47 @@ def setup_info(duthosts, rand_one_dut_hostname, tbinfo): duthost = duthosts[rand_one_dut_hostname] topo = tbinfo['topo']['name'] - # {namespace: [server ports]} - server_ports_namespace_map = defaultdict(list) - # {namespace: [T1 ports]} - t1_ports_namespace_map = defaultdict(list) - # { namespace : [tor ports] } - tor_ports_namespace_map = defaultdict(list) - # { namespace : [spine ports] } - spine_ports_namespace_map = defaultdict(list) - - # { set of namespace server ports belong } - server_ports_namespace = set() - # { set of namespace t1 ports belong} - t1_ports_namespace = set() - # { set of namespace tor ports belongs } - tor_ports_namespace = set() - # { set of namespace spine ports belongs } - spine_ports_namespace = set() - + upstream_ports_namespace_map = defaultdict(list) + downstream_ports_namespace_map = defaultdict(list) + upstream_ports_namespace = set() + downstream_ports_namespace = set() # Gather test facts mg_facts = duthost.get_extended_minigraph_facts(tbinfo) switch_capability_facts = duthost.switch_capabilities_facts()["ansible_facts"] acl_capability_facts = duthost.acl_capabilities_facts()["ansible_facts"] + topo_type = tbinfo["topo"]["type"] + if topo_type == "m0": + topo_type = "m0_vlan" if "m0_vlan_scenario" in topo_scenario else "m0_l3" # Get the list of T0/T2 ports for dut_port, neigh in mg_facts["minigraph_neighbors"].items(): - if "t1" in topo: - # Get the list of T0/T2 ports - if "t0" in neigh["name"].lower(): - # Add Tor ports to namespace - tor_ports_namespace_map[neigh['namespace']].append(dut_port) - tor_ports_namespace.add(neigh['namespace']) - elif "t2" in neigh["name"].lower(): - # Add Spine ports to namespace - spine_ports_namespace_map[neigh['namespace']].append(dut_port) - spine_ports_namespace.add(neigh['namespace']) - elif "t0" in topo: - # Get the list of Server/T1 ports - if "server" in neigh["name"].lower(): - # Add Server ports to namespace - server_ports_namespace_map[neigh['namespace']].append(dut_port) - server_ports_namespace.add(neigh['namespace']) - elif "t1" in neigh["name"].lower(): - # Add T1 ports to namespace - t1_ports_namespace_map[neigh['namespace']].append(dut_port) - t1_ports_namespace.add(neigh['namespace']) - else: - # Todo: Support dualtor testbed - pytest.skip("Unsupported topo") + pytest_assert(topo_type in UPSTREAM_NEIGHBOR_MAP and topo_type in DOWNSTREAM_NEIGHBOR_MAP, "Unsupported topo") + if UPSTREAM_NEIGHBOR_MAP[topo_type] in neigh["name"].lower(): + upstream_ports_namespace_map[neigh['namespace']].append(dut_port) + upstream_ports_namespace.add(neigh['namespace']) + elif DOWNSTREAM_NEIGHBOR_MAP[topo_type] in neigh["name"].lower(): + downstream_ports_namespace_map[neigh['namespace']].append(dut_port) + downstream_ports_namespace.add(neigh['namespace']) if 't1' in topo: - # Set of TOR ports only Namespace - tor_only_namespace = tor_ports_namespace.difference(spine_ports_namespace) - # Set of Spine ports only Namespace - spine_only_namespace = spine_ports_namespace.difference(tor_ports_namespace) - # Randomly choose from TOR_only Namespace if present else just use first one - tor_namespace = random.choice(tuple(tor_only_namespace)) if tor_only_namespace else tuple(tor_ports_namespace)[0] - # Randomly choose from Spine_only Namespace if present else just use first one - spine_namespace = random.choice(tuple(spine_only_namespace)) if spine_only_namespace else tuple(spine_ports_namespace)[0] - tor_ports = tor_ports_namespace_map[tor_namespace] - spine_ports = spine_ports_namespace_map[spine_namespace] - + # Set of downstream ports only Namespace + downstream_only_namespace = downstream_ports_namespace.difference(upstream_ports_namespace) + # Set of upstream ports only Namespace + upstream_only_namespace = upstream_ports_namespace.difference(downstream_ports_namespace) + # Randomly choose from downstream_only Namespace if present else just use first one + downstream_namespace = random.choice(tuple(downstream_only_namespace)) \ + if downstream_only_namespace else tuple(downstream_ports_namespace)[0] + # Randomly choose from upstream_only Namespace if present else just use first one + upstream_namespace = random.choice(tuple(upstream_only_namespace)) \ + if upstream_only_namespace else tuple(upstream_ports_namespace)[0] else: - # Use the default namespace for Server and T1 - server_namespace = tuple(server_ports_namespace)[0] - t1_namespace = tuple(t1_ports_namespace)[0] - server_ports = server_ports_namespace_map[server_namespace] - t1_ports = t1_ports_namespace_map[t1_namespace] + # Use the default namespace + downstream_namespace = tuple(downstream_ports_namespace)[0] + upstream_namespace = tuple(upstream_ports_namespace)[0] + + downstream_ports = downstream_ports_namespace_map[downstream_namespace] + upstream_ports = upstream_ports_namespace_map[upstream_namespace] switch_capabilities = switch_capability_facts["switch_capabilities"]["switch"] acl_capabilities = acl_capability_facts["acl_capabilities"] @@ -179,7 +155,7 @@ def get_port_info(in_port_list, out_port_list, out_port_ptf_id_list, out_port_la out_port_exclude_list.append(lag_member) out_port_ptf_id_list.append(ptf_port_id) - + setup_information = { "router_mac": duthost.facts["router_mac"], "test_mirror_v4": test_mirror_v4, @@ -198,89 +174,60 @@ def get_port_info(in_port_list, out_port_list, out_port_ptf_id_list, out_port_la if k in mg_facts["minigraph_ports"] }, # { ptf_port_id : namespace } - "port_index_namespace_map" : { + "port_index_namespace_map": { v: mg_facts["minigraph_neighbors"][k]['namespace'] for k, v in mg_facts["minigraph_ptf_indices"].items() if k in mg_facts["minigraph_ports"] } } - if 't0' in topo: - # Downstream traffic (T0 -> Server) - server_dest_ports = [] - server_dest_ports_ptf_id = [] - get_port_info(server_ports, server_dest_ports, server_dest_ports_ptf_id, None) - - # Upstream traffic (Server -> T0) - t1_dest_ports = [] - t1_dest_ports_ptf_id = [] - t1_dest_lag_name = [] - get_port_info(t1_ports, t1_dest_ports, t1_dest_ports_ptf_id, t1_dest_lag_name) - - setup_information.update( - { - "topo": "t0", - "server_ports": server_ports, - "server_dest_ports_ptf_id": server_dest_ports_ptf_id, - "t1_ports": t1_ports, - DOWN_STREAM: { - "src_port": t1_ports[0], - "src_port_lag_name":t1_dest_lag_name[0], - "src_port_ptf_id": str(mg_facts["minigraph_ptf_indices"][t1_ports[0]]), - # Downstream traffic ingress from the first portchannel, - # and mirror packet egress from other portchannels - "dest_port": t1_ports[2:] if len(t1_dest_ports_ptf_id[0].split(',')) == 2 else t1_ports[1:], - "dest_port_ptf_id": t1_dest_ports_ptf_id[1:], - "dest_port_lag_name": t1_dest_lag_name[1:], - "namespace": server_namespace - }, - UP_STREAM: { - "src_port": server_ports[0], - "src_port_lag_name":"Not Applicable", - "src_port_ptf_id": str(mg_facts["minigraph_ptf_indices"][server_ports[0]]), - "dest_port": t1_dest_ports, - "dest_port_ptf_id": t1_dest_ports_ptf_id, - "dest_port_lag_name": t1_dest_lag_name, - "namespace": t1_namespace - }, - } - ) - elif 't1' in topo: - # Downstream traffic (T1 -> T0) - tor_dest_ports = [] - tor_dest_ports_ptf_id = [] - tor_dest_lag_name = [] - get_port_info(tor_ports, tor_dest_ports, tor_dest_ports_ptf_id, tor_dest_lag_name) - - # Upstream traffic (T0 -> T1) - spine_dest_ports = [] - spine_dest_ports_ptf_id = [] - spine_dest_lag_name = [] - get_port_info(spine_ports, spine_dest_ports, spine_dest_ports_ptf_id, spine_dest_lag_name) + # Downstream traffic + downstream_dest_ports = [] + downstream_dest_ports_ptf_id = [] + downstream_dest_lag_name = None if topo_type in DOWNSTREAM_SERVER_TOPO else [] + get_port_info(downstream_ports, downstream_dest_ports, downstream_dest_ports_ptf_id, downstream_dest_lag_name) + + # Upstream traffic + upstream_dest_ports = [] + upstream_dest_ports_ptf_id = [] + upstream_dest_lag_name = [] + get_port_info(upstream_ports, upstream_dest_ports, upstream_dest_ports_ptf_id, upstream_dest_lag_name) + + setup_information.update( + { + "topo": topo_type, + DOWN_STREAM: { + "src_port": upstream_ports[0], + "src_port_lag_name": upstream_dest_lag_name[0], + "src_port_ptf_id": str(mg_facts["minigraph_ptf_indices"][upstream_ports[0]]), + # For T0 topo, downstream traffic ingress from the first portchannel, + # and mirror packet egress from other portchannels + "dest_port": upstream_dest_ports[1:] \ + if topo_type in DOWNSTREAM_SERVER_TOPO else downstream_dest_ports, + "dest_port_ptf_id": upstream_dest_ports_ptf_id[1:] \ + if topo_type in DOWNSTREAM_SERVER_TOPO else downstream_dest_ports_ptf_id, + "dest_port_lag_name": upstream_dest_lag_name[1:] \ + if topo_type in DOWNSTREAM_SERVER_TOPO else downstream_dest_lag_name, + "namespace": downstream_namespace + }, + UP_STREAM: { + "src_port": downstream_ports[0], + # DUT whose downstream are servers doesn't have lag connect to server + "src_port_lag_name": "Not Applicable" \ + if topo_type in DOWNSTREAM_SERVER_TOPO else downstream_dest_lag_name[0], + "src_port_ptf_id": str(mg_facts["minigraph_ptf_indices"][downstream_ports[0]]), + "dest_port": upstream_dest_ports, + "dest_port_ptf_id": upstream_dest_ports_ptf_id, + "dest_port_lag_name": upstream_dest_lag_name, + "namespace": upstream_namespace + }, + } + ) + if topo_type in DOWNSTREAM_SERVER_TOPO: setup_information.update( { - "topo": "t1", - "tor_ports": tor_ports, - "spine_ports": spine_ports, - DOWN_STREAM: { - "src_port": spine_ports[0], - "src_port_lag_name":spine_dest_lag_name[0], - "src_port_ptf_id": str(mg_facts["minigraph_ptf_indices"][spine_ports[0]]), - "dest_port": tor_dest_ports, - "dest_port_ptf_id": tor_dest_ports_ptf_id, - "dest_port_lag_name": tor_dest_lag_name, - "namespace": tor_namespace - }, - UP_STREAM: { - "src_port": tor_ports[0], - "src_port_lag_name":tor_dest_lag_name[0], - "src_port_ptf_id": str(mg_facts["minigraph_ptf_indices"][tor_ports[0]]), - "dest_port": spine_dest_ports, - "dest_port_ptf_id": spine_dest_ports_ptf_id, - "dest_port_lag_name": spine_dest_lag_name, - "namespace": spine_namespace - } + "server_dest_ports_ptf_id": downstream_dest_ports_ptf_id } ) @@ -329,7 +276,7 @@ def remove_route(duthost, prefix, nexthop, namespace): @pytest.fixture(scope='module', autouse=True) def setup_arp_responder(duthost, ptfhost, setup_info): - if setup_info['topo'] != 't0': + if setup_info['topo'] not in ['t0', 'm0_vlan']: yield return ip_list = [TARGET_SERVER_IP, DEFAULT_SERVER_IP] diff --git a/tests/everflow/test_everflow_ipv6.py b/tests/everflow/test_everflow_ipv6.py index 94312427707..a00b8679109 100644 --- a/tests/everflow/test_everflow_ipv6.py +++ b/tests/everflow/test_everflow_ipv6.py @@ -9,7 +9,7 @@ from everflow_test_utilities import setup_info # noqa: F401, E501 lgtm[py/unused-import] pylint: disable=import-error pytestmark = [ - pytest.mark.topology("t0","t1") + pytest.mark.topology("t0", "t1", "m0") ] EVERFLOW_V6_RULES = "ipv6_test_rules.yaml" @@ -37,7 +37,7 @@ def setup_mirror_session_dest_ip_route(self, duthosts, rand_one_dut_hostname, tb Remove the route as part of cleanup. """ duthost = duthosts[rand_one_dut_hostname] - if setup_info['topo'] == 't0': + if setup_info['topo'] in ['t0', 'm0_vlan']: # On T0 testbed, the collector IP is routed to T1 namespace = setup_info[UP_STREAM]['namespace'] tx_port = setup_info[UP_STREAM]["dest_port"][0] diff --git a/tests/everflow/test_everflow_per_interface.py b/tests/everflow/test_everflow_per_interface.py index 4ed920a68b7..d83b5e2dba1 100644 --- a/tests/everflow/test_everflow_per_interface.py +++ b/tests/everflow/test_everflow_per_interface.py @@ -25,18 +25,6 @@ logger = logging.getLogger(__file__) -@pytest.fixture(scope="module", autouse=True) -def skip_if_not_supported(tbinfo, rand_selected_dut, ip_ver): - if 'dualtor' in tbinfo['topo']['name']: - pytest.skip("Skip running on dualtor testbed") - - asic_type = rand_selected_dut.facts["asic_type"] - unsupported_platforms = ["mellanox", "marvell", "barefoot", "cisco-8000"] - # Skip ipv6 test on Mellanox platform - is_mellanox_ipv4 = asic_type == 'mellanox' and ip_ver == 'ipv4' - # Skip ipv6 test on cisco-8000 platform - is_cisco_ipv4 = asic_type == 'cisco-8000' and ip_ver == 'ipv4' - pytest_require(asic_type not in unsupported_platforms or is_mellanox_ipv4 or is_cisco_ipv4, "Match 'IN_PORTS' is not supported on {} platform".format(asic_type)) def build_candidate_ports(duthost, tbinfo): """ @@ -46,6 +34,8 @@ def build_candidate_ports(duthost, tbinfo): unselected_ports = {} if tbinfo['topo']['type'] == 't0': candidate_neigh_name = 'Server' + elif tbinfo['topo']['type'] == 'm0': + candidate_neigh_name = 'MX' else: candidate_neigh_name = 'T0' mg_facts = duthost.get_extended_minigraph_facts(tbinfo) @@ -56,10 +46,10 @@ def build_candidate_ports(duthost, tbinfo): candidate_ports.update({dut_port: ptf_idx}) if len(unselected_ports) < 4 and dut_port not in candidate_ports: unselected_ports.update({dut_port: ptf_idx}) - + logger.info("Candidate testing ports are {}".format(candidate_ports)) return candidate_ports, unselected_ports - + def build_acl_rule_vars(candidate_ports, ip_ver): """ @@ -77,9 +67,15 @@ def apply_mirror_session(rand_selected_dut): logger.info("Applying mirror session to DUT") BaseEverflowTest.apply_mirror_config(rand_selected_dut, mirror_session_info) time.sleep(10) - cmd = 'sonic-db-cli STATE_DB hget \"MIRROR_SESSION_TABLE|{}\" \"monitor_port\"'.format(EVERFLOW_SESSION_NAME) - monitor_port = rand_selected_dut.shell(cmd=cmd)['stdout'] - pytest_assert(monitor_port != "", "Failed to retrieve monitor_port") + single_asic_cmd = 'sonic-db-cli STATE_DB hget \"MIRROR_SESSION_TABLE|{}\" \"monitor_port\"'.format(EVERFLOW_SESSION_NAME) + if rand_selected_dut.is_multi_asic: + for front_ns in rand_selected_dut.get_frontend_asic_namespace_list(): + cmd = "{} -n {}".format(single_asic_cmd, front_ns) + monitor_port = rand_selected_dut.shell(cmd=cmd)['stdout'] + pytest_assert(monitor_port != "", "Failed to retrieve monitor_port on multi-asic dut's frontend namespace: {}".format(front_ns)) + else: + monitor_port = rand_selected_dut.shell(cmd=single_asic_cmd)['stdout'] + pytest_assert(monitor_port != "", "Failed to retrieve monitor_port") yield mirror_session_info, monitor_port @@ -101,7 +97,7 @@ def apply_acl_rule(rand_selected_dut, tbinfo, apply_mirror_session, ip_ver): # Skip if EVERFLOW table doesn't exist pytest_require(len(output) > 2, "Skip test since {} dosen't exist".format(table_name)) mg_facts = rand_selected_dut.get_extended_minigraph_facts(tbinfo) - mirror_session_info, monitor_port = apply_mirror_session + mirror_session_info, monitor_port = apply_mirror_session # Build testing port list candidate_ports, unselected_ports = build_candidate_ports(rand_selected_dut, tbinfo) pytest_require(len(candidate_ports) >= 1, "Not sufficient ports for testing") @@ -122,18 +118,22 @@ def apply_acl_rule(rand_selected_dut, tbinfo, apply_mirror_session, ip_ver): "mirror_session_info": mirror_session_info, "monitor_port": {monitor_port: mg_facts["minigraph_ptf_indices"][monitor_port]} } - + yield ret logger.info("Removing acl rule config from DUT") BaseEverflowTest.remove_acl_rule_config(rand_selected_dut, table_name) -def generate_testing_packet(ptfadapter, duthost, mirror_session_info, router_mac): - packet = testutils.simple_tcp_packet( - eth_src=ptfadapter.dataplane.get_mac(0, 0), - eth_dst=router_mac - ) +def generate_testing_packet(ptfadapter, duthost, mirror_session_info, router_mac, ip_ver): + if ip_ver == 'ipv4': + packet = \ + testutils.simple_tcp_packet(eth_src=ptfadapter.dataplane.get_mac(0, + 0), eth_dst=router_mac) + else: + packet = \ + testutils.simple_tcpv6_packet(eth_src=ptfadapter.dataplane.get_mac(0, + 0), eth_dst=router_mac) setup = {} setup["router_mac"] = router_mac exp_packet = BaseEverflowTest.get_expected_mirror_packet(mirror_session_info, setup, duthost, packet, False) @@ -141,13 +141,15 @@ def generate_testing_packet(ptfadapter, duthost, mirror_session_info, router_mac def get_uplink_ports(duthost, tbinfo): - """The collector IP is a destination reachable by default. + """The collector IP is a destination reachable by default. So we need to collect the uplink ports to do a packet capture """ uplink_ports = [] mg_facts = duthost.get_extended_minigraph_facts(tbinfo) if 't0' == tbinfo['topo']['type']: neigh_name = 'T1' + elif 'm0' == tbinfo['topo']['type']: + neigh_name = 'M1' else: neigh_name = 'T2' for dut_port, neigh in mg_facts["minigraph_neighbors"].items(): @@ -166,21 +168,21 @@ def send_and_verify_packet(ptfadapter, packet, expected_packet, tx_port, rx_port testutils.verify_no_packet_any(ptfadapter, pkt=expected_packet, ports=rx_ports) -def test_everflow_per_interface(ptfadapter, rand_selected_dut, apply_acl_rule, tbinfo): +def test_everflow_per_interface(ptfadapter, rand_selected_dut, apply_acl_rule, tbinfo, ip_ver): """Verify packet ingress from candidate ports are captured by EVERFLOW, while packets ingress from unselected ports are not captured """ everflow_config = apply_acl_rule - packet, exp_packet = generate_testing_packet(ptfadapter, rand_selected_dut, everflow_config['mirror_session_info'], rand_selected_dut.facts["router_mac"]) + packet, exp_packet = generate_testing_packet(ptfadapter, rand_selected_dut, everflow_config['mirror_session_info'], rand_selected_dut.facts["router_mac"], ip_ver) uplink_ports = get_uplink_ports(rand_selected_dut, tbinfo) # Verify that packet ingressed from INPUT_PORTS (candidate ports) are mirrored for port, ptf_idx in everflow_config['candidate_ports'].items(): logger.info("Verifying packet ingress from {} is mirrored".format(port)) send_and_verify_packet(ptfadapter, packet, exp_packet, ptf_idx, uplink_ports, True) - + # Verify that packet ingressed from unselected ports are not mirrored for port, ptf_idx in everflow_config['unselected_ports'].items(): logger.info("Verifying packet ingress from {} is not mirrored".format(port)) send_and_verify_packet(ptfadapter, packet, exp_packet, ptf_idx, uplink_ports, False) - - + + diff --git a/tests/everflow/test_everflow_testbed.py b/tests/everflow/test_everflow_testbed.py index 197b24d673d..1147ddd7674 100644 --- a/tests/everflow/test_everflow_testbed.py +++ b/tests/everflow/test_everflow_testbed.py @@ -15,7 +15,7 @@ from tests.common.fixtures.ptfhost_utils import copy_arp_responder_py # noqa: F401, E501 lgtm[py/unused-import] pylint: disable=import-error pytestmark = [ - pytest.mark.topology("t0", "t1") + pytest.mark.topology("t0", "t1", "m0") ] @@ -77,10 +77,10 @@ def dest_port_type(self, duthosts, rand_one_dut_hostname, setup_info, setup_mirr on that. As of now cleanup is being done here. """ duthost = duthosts[rand_one_dut_hostname] - + duthost.shell(duthost.get_vtysh_cmd_for_namespace("vtysh -c \"config\" -c \"router bgp\" -c \"address-family ipv4\" -c \"redistribute static\"",setup_info[request.param]["namespace"])) yield request.param - + for index in range(0, min(3, len(setup_info[request.param]["dest_port"]))): tx_port = setup_info[request.param]["dest_port"][index] @@ -101,7 +101,7 @@ def add_dest_routes(self, duthosts, rand_one_dut_hostname, setup_info, tbinfo, d default_traffic_port_type = DOWN_STREAM if dest_port_type == UP_STREAM else UP_STREAM rx_port = setup_info[default_traffic_port_type]["dest_port"][0] nexthop_ip = everflow_utils.get_neighbor_info(duthost, rx_port, tbinfo) - + ns = setup_info[default_traffic_port_type]["namespace"] dst_mask = "30.0.0.0/28" @@ -254,7 +254,7 @@ def test_everflow_neighbor_mac_change(self, duthosts, rand_one_dut_hostname, set [tx_port_ptf_id], dest_port_type ) - + def test_everflow_remove_unused_ecmp_next_hop(self, duthosts, rand_one_dut_hostname, setup_info, setup_mirror_session, dest_port_type, ptfadapter, tbinfo): """Verify that session is still active after removal of next hop from ECMP route that was not in use.""" duthost = duthosts[rand_one_dut_hostname] @@ -429,7 +429,7 @@ def test_everflow_remove_used_ecmp_next_hop(self, duthosts, rand_one_dut_hostnam tx_port_ptf_ids, dest_port_type ) - + def test_everflow_dscp_with_policer( self, duthost, @@ -453,11 +453,16 @@ def test_everflow_dscp_with_policer( vendor = duthost.facts["asic_type"] hostvars = duthost.host.options['variable_manager']._hostvars[duthost.hostname] + + send_time = "10" + if vendor == "mellanox": + send_time = "75" + for asic in self.MIRROR_POLICER_UNSUPPORTED_ASIC_LIST: vendorAsic = "{0}_{1}_hwskus".format(vendor, asic) if vendorAsic in hostvars.keys() and duthost.facts['hwsku'] in hostvars[vendorAsic]: pytest.skip("Skipping test since mirror policing is not supported on {0} {1} platforms".format(vendor,asic)) - if setup_info['topo'] == 't0': + if setup_info['topo'] in ['t0', 'm0_vlan']: default_tarffic_port_type = dest_port_type # Use the second portchannel as missor session nexthop tx_port = setup_info[dest_port_type]["dest_port"][1] @@ -480,7 +485,7 @@ def test_everflow_dscp_with_policer( table_type = "MIRROR_DSCP" rx_port_ptf_id = setup_info[dest_port_type]["src_port_ptf_id"] tx_port_ptf_id = setup_info[dest_port_type]["dest_port_ptf_id"][0] - if setup_info['topo'] == 't0' and self.acl_stage() == "egress": + if setup_info['topo'] in ['t0', 'm0_vlan'] and self.acl_stage() == "egress": # For T0 upstream, the EVERFLOW_DSCP table is binded to one of portchannels bind_interface = setup_info[dest_port_type]["dest_port_lag_name"][0] mirror_port_id = setup_info[dest_port_type]["dest_port_ptf_id"][1] @@ -495,7 +500,7 @@ def test_everflow_dscp_with_policer( self.apply_acl_table_config(duthost, table_name, table_type, config_method, [bind_interface]) bind_interface_namespace = self._get_port_namespace(setup_info, int(rx_port_ptf_id)) if bind_interface_namespace: - self.apply_acl_table_config(duthost, table_name, table_type, config_method, [bind_interface], bind_interface_namespace) + self.apply_acl_table_config(duthost, table_name, table_type, config_method, [bind_interface], bind_interface_namespace) # Add rule to match on DSCP self.apply_acl_rule_config(duthost, table_name, @@ -516,7 +521,7 @@ def test_everflow_dscp_with_policer( meter_type="packets", cir="100", cbs="100", - send_time="10", + send_time=send_time, tolerance="10") finally: # Clean up ACL rules and routes @@ -534,7 +539,7 @@ def _run_everflow_test_scenarios(self, ptfadapter, setup, mirror_session, duthos tx_port_ids = self._get_tx_port_id_list(tx_ports) target_ip = "30.0.0.10" default_ip = self.DEFAULT_DST_IP - if 't0' == setup['topo'] and direction == DOWN_STREAM: + if setup['topo'] in ['t0', 'm0_vlan'] and direction == DOWN_STREAM: target_ip = TARGET_SERVER_IP default_ip = DEFAULT_SERVER_IP diff --git a/tests/fdb/conftest.py b/tests/fdb/conftest.py index c24685ac81e..807bb69bae4 100644 --- a/tests/fdb/conftest.py +++ b/tests/fdb/conftest.py @@ -36,6 +36,9 @@ def set_polling_interval(duthost): wait(wait_time, "Waiting {} sec for CRM counters to become updated".format(wait_time)) @pytest.fixture(scope='module') -def get_function_conpleteness_level(pytestconfig): - return pytestconfig.getoption("--completeness_level") - +def get_function_conpleteness_level(pytestconfig, duthost): + asic_name = duthost.get_asic_name() + if asic_name in ['td2']: + return None + else: + return pytestconfig.getoption("--completeness_level") diff --git a/tests/fdb/test_fdb.py b/tests/fdb/test_fdb.py index 72b741e99ef..ecae7af1cfe 100644 --- a/tests/fdb/test_fdb.py +++ b/tests/fdb/test_fdb.py @@ -19,7 +19,7 @@ from utils import fdb_cleanup, send_eth, send_arp_request, send_arp_reply, send_recv_eth pytestmark = [ - pytest.mark.topology('t0', 't0-56-po2vlan'), + pytest.mark.topology('t0', 'm0'), pytest.mark.usefixtures('disable_fdb_aging') ] @@ -131,7 +131,7 @@ def send_arp_reply(ptfadapter, source_port, source_mac, dest_mac, vlan_id): testutils.send(ptfadapter, source_port, pkt) -def send_recv_eth(ptfadapter, source_ports, source_mac, dest_ports, dest_mac, src_vlan, dst_vlan): +def send_recv_eth(duthost, ptfadapter, source_ports, source_mac, dest_ports, dest_mac, src_vlan, dst_vlan): """ send ethernet packet and verify it on dest_port :param ptfadapter: PTF adapter object @@ -160,9 +160,30 @@ def send_recv_eth(ptfadapter, source_ports, source_mac, dest_ports, dest_mac, sr exp_pkt.set_do_not_care_scapy(scapy.Dot1Q, "prio") logger.debug('send packet src port {} smac: {} dmac: {} vlan: {} verifying on dst port {}'.format( source_ports, source_mac, dest_mac, src_vlan, dest_ports)) - testutils.send(ptfadapter, source_ports[0], pkt) - testutils.verify_packet_any_port(ptfadapter, exp_pkt, dest_ports, timeout=FDB_WAIT_EXPECTED_PACKET_TIMEOUT) + # fdb test will send lots of pkts between paired ports, it's hard to guarantee there is no congestion + # on server side during this period. So tolerant to retry 3 times before complain the assert. + + retry_count = 3 + pkt_count = 1 + for _ in range(retry_count): + try: + ptfadapter.dataplane.flush() + testutils.send(ptfadapter, source_ports[0], pkt, count=pkt_count) + if len(dest_ports) == 1: + testutils.verify_packet(ptfadapter, exp_pkt, dest_ports[0], timeout=FDB_WAIT_EXPECTED_PACKET_TIMEOUT) + else: + testutils.verify_packet_any_port(ptfadapter, exp_pkt, dest_ports, timeout=FDB_WAIT_EXPECTED_PACKET_TIMEOUT) + break + except: + # Send 10 pkts in retry to make this test case to be more tolerent of congestion on server/ptf + pkt_count = 10 + pass + else: + result = duthost.command("show mac", module_ignore_errors=True) + logger.info("Dest MAC is {}, show mac results {}".format(dest_mac, result['stdout'])) + pytest_assert(False, "Expected packet was not received on ports {}" + "Dest MAC in fdb is {}".format(dest_ports, dest_mac.lower() in result['stdout'].lower())) def setup_fdb(ptfadapter, vlan_table, router_mac, pkt_type, dummy_mac_count): """ @@ -308,7 +329,7 @@ def test_fdb(ansible_adhoc, ptfadapter, duthosts, rand_one_dut_hostname, ptfhost src_ports = src['port_index'] dst_ports = dst['port_index'] for src_mac, dst_mac in itertools.product(fdb[src_ports[0]], fdb[dst_ports[0]]): - send_recv_eth(ptfadapter, src_ports, src_mac, dst_ports, dst_mac, src_vlan, dst_vlan) + send_recv_eth(duthost, ptfadapter, src_ports, src_mac, dst_ports, dst_mac, src_vlan, dst_vlan) # Should we have fdb_facts ansible module for this test? fdb_fact = duthost.fdb_facts()['ansible_facts'] @@ -316,15 +337,16 @@ def test_fdb(ansible_adhoc, ptfadapter, duthosts, rand_one_dut_hostname, ptfhost dummy_mac_count = 0 total_mac_count = 0 - for k, v in fdb_fact.items(): - assert v['port'] in interface_table - assert v['vlan'] in interface_table[ifname] + for k, vl in fdb_fact.items(): assert validate_mac(k) == True - assert v['type'] in ['Dynamic', 'Static'] - if DUMMY_MAC_PREFIX in k.lower(): - dummy_mac_count += 1 - if "dynamic" in k.lower(): - total_mac_count += 1 + for v in vl: + assert v['port'] in interface_table + assert v['vlan'] in interface_table[v['port']] + assert v['type'] in ['Dynamic', 'Static'] + if DUMMY_MAC_PREFIX in k.lower(): + dummy_mac_count += 1 + if "dynamic" in v['type'].lower(): + total_mac_count += 1 assert vlan_member_count > 0 diff --git a/tests/fdb/test_fdb_mac_expire.py b/tests/fdb/test_fdb_mac_expire.py index 1c6fa14576c..6492edde16d 100644 --- a/tests/fdb/test_fdb_mac_expire.py +++ b/tests/fdb/test_fdb_mac_expire.py @@ -6,7 +6,7 @@ from tests.common.fixtures.ptfhost_utils import copy_ptftests_directory # lgtm [py/unused-import] pytestmark = [ - pytest.mark.topology('t0', 't0-56-po2vlan') + pytest.mark.topology('t0', 'm0') ] logger = logging.getLogger(__name__) @@ -221,9 +221,10 @@ def testFdbMacExpire(self, request, tbinfo, rand_selected_dut, ptfhost, refresh_ Returns: None """ - if "t0" not in tbinfo["topo"]["type"]: + topo_type = tbinfo["topo"]["type"] + if "t0" not in topo_type and "m0" not in topo_type: pytest.skip( - "FDB MAC Expire test case is not supported on this DUT topology '{0}'".format(tbinfo["topo"]["type"]) + "FDB MAC Expire test case is not supported on this DUT topology '{0}'".format(topo_type) ) fdbAgingTime = request.config.getoption('--fdb_aging_time') diff --git a/tests/fdb/test_fdb_mac_move.py b/tests/fdb/test_fdb_mac_move.py index 470ba3a0741..c2761c2474d 100644 --- a/tests/fdb/test_fdb_mac_move.py +++ b/tests/fdb/test_fdb_mac_move.py @@ -1,3 +1,4 @@ +import pytest import logging import time import math @@ -20,6 +21,10 @@ logger = logging.getLogger(__name__) +pytestmark = [ + pytest.mark.topology('t0') +] + def get_fdb_dict(ptfadapter, vlan_table, dummay_mac_count): """ diff --git a/tests/fib/test_fib.py b/tests/fib/test_fib.py index 6e298e51b23..d365d2a30e5 100644 --- a/tests/fib/test_fib.py +++ b/tests/fib/test_fib.py @@ -19,6 +19,7 @@ from tests.common.fixtures.fib_utils import fib_info_files_per_function from tests.common.fixtures.fib_utils import single_fib_for_duts from tests.common.utilities import wait +from tests.common.helpers.assertions import pytest_require logger = logging.getLogger(__name__) @@ -73,7 +74,8 @@ def test_basic_fib(duthosts, ptfhost, ipv4, ipv6, mtu, # do not test load balancing for vs platform as kernel 4.9 # can only do load balance base on L3 - if duthosts[0].facts['asic_type'] in ["vs"]: + asic_type = duthosts[0].facts['asic_type'] + if asic_type in ["vs"]: test_balancing = False else: test_balancing = True @@ -93,7 +95,8 @@ def test_basic_fib(duthosts, ptfhost, ipv4, ipv6, mtu, "testbed_mtu": mtu, "test_balancing": test_balancing, "ignore_ttl": ignore_ttl, - "single_fib_for_duts": single_fib_for_duts}, + "single_fib_for_duts": single_fib_for_duts, + "asic_type": asic_type}, log_file=log_file, qlen=PTF_QLEN, socket_recv_size=16384) @@ -140,7 +143,7 @@ def hash_keys(duthost): hash_keys.remove('src-port') if 'dst-port' in hash_keys: hash_keys.remove('dst-port') - if duthost.facts['asic_type'] in ["mellanox"]: + if duthost.facts['asic_type'] in ["mellanox", "innovium", "cisco-8000"]: if 'ip-proto' in hash_keys: hash_keys.remove('ip-proto') if duthost.facts['asic_type'] in ["barefoot"]: @@ -206,7 +209,7 @@ def ipver(request): return request.param -@pytest.fixture +@pytest.fixture(scope='module') def add_default_route_to_dut(duts_running_config_facts, duthosts, tbinfo): """ Add a default route to the device for storage backend testbed. @@ -281,3 +284,73 @@ def test_hash(add_default_route_to_dut, duthosts, fib_info_files_per_function, s log_file=log_file, qlen=PTF_QLEN, socket_recv_size=16384) + +# The test case is to verify src-ip, dst-ip, src-port, dst-port and ip-proto of inner_frame in a IPinIP packet are +# used as hash keys +def test_ipinip_hash(add_default_route_to_dut, duthost, duthosts, fib_info_files_per_function, hash_keys, ptfhost, ipver, + tbinfo, mux_server_url, router_macs, + ignore_ttl, single_fib_for_duts): + # Skip test on none T1 testbed + pytest_require('t1' == tbinfo['topo']['type'], "The test case runs on T1 topology") + timestamp = datetime.now().strftime('%Y-%m-%d-%H:%M:%S') + log_file = "/tmp/hash_test.IPinIPHashTest.{}.{}.log".format(ipver, timestamp) + logging.info("PTF log file: %s" % log_file) + if ipver == "ipv4": + src_ip_range = SRC_IP_RANGE + dst_ip_range = DST_IP_RANGE + else: + src_ip_range = SRC_IPV6_RANGE + dst_ip_range = DST_IPV6_RANGE + ptf_runner(ptfhost, + "ptftests", + "hash_test.IPinIPHashTest", + platform_dir="ptftests", + params={"fib_info_files": fib_info_files_per_function[:3], # Test at most 3 DUTs + "ptf_test_port_map": ptf_test_port_map(ptfhost, tbinfo, duthosts, mux_server_url), + "hash_keys": hash_keys, + "src_ip_range": ",".join(src_ip_range), + "dst_ip_range": ",".join(dst_ip_range), + "router_macs": router_macs, + "vlan_ids": VLANIDS, + "ignore_ttl":ignore_ttl, + "single_fib_for_duts": single_fib_for_duts, + "ipver": ipver + }, + log_file=log_file, + qlen=PTF_QLEN, + socket_recv_size=16384) + +# The test is to verify the hashing logic is not using unexpected field as keys +# Only inner frame length is tested at this moment +def test_ipinip_hash_negative(add_default_route_to_dut, duthosts, fib_info_files_per_function, ptfhost, ipver, + tbinfo, mux_server_url, router_macs, + ignore_ttl, single_fib_for_duts): + hash_keys = ['inner_length'] + timestamp = datetime.now().strftime('%Y-%m-%d-%H:%M:%S') + log_file = "/tmp/hash_test.IPinIPHashTest.{}.{}.log".format(ipver, timestamp) + logging.info("PTF log file: %s" % log_file) + if ipver == "ipv4": + src_ip_range = SRC_IP_RANGE + dst_ip_range = DST_IP_RANGE + else: + src_ip_range = SRC_IPV6_RANGE + dst_ip_range = DST_IPV6_RANGE + ptf_runner(ptfhost, + "ptftests", + "hash_test.IPinIPHashTest", + platform_dir="ptftests", + params={"fib_info_files": fib_info_files_per_function[:3], # Test at most 3 DUTs + "ptf_test_port_map": ptf_test_port_map(ptfhost, tbinfo, duthosts, mux_server_url), + "hash_keys": hash_keys, + "src_ip_range": ",".join(src_ip_range), + "dst_ip_range": ",".join(dst_ip_range), + "router_macs": router_macs, + "vlan_ids": VLANIDS, + "ignore_ttl":ignore_ttl, + "single_fib_for_duts": single_fib_for_duts, + "ipver": ipver + }, + log_file=log_file, + qlen=PTF_QLEN, + socket_recv_size=16384) + diff --git a/tests/generic_config_updater/test_bgpl.py b/tests/generic_config_updater/test_bgpl.py index c83d851f3a5..4344f4312b2 100644 --- a/tests/generic_config_updater/test_bgpl.py +++ b/tests/generic_config_updater/test_bgpl.py @@ -9,7 +9,7 @@ from tests.generic_config_updater.gu_utils import create_checkpoint, delete_checkpoint, rollback_or_reload pytestmark = [ - pytest.mark.topology('t0'), + pytest.mark.topology('t0', 'm0'), ] logger = logging.getLogger(__name__) diff --git a/tests/generic_config_updater/test_cacl.py b/tests/generic_config_updater/test_cacl.py index 70232b09a46..59a3d92cf14 100644 --- a/tests/generic_config_updater/test_cacl.py +++ b/tests/generic_config_updater/test_cacl.py @@ -17,7 +17,7 @@ # SSH_ONLY CTRLPLANE SSH SSH_ONLY ingress pytestmark = [ - pytest.mark.topology('t0'), + pytest.mark.topology('t0', 'm0'), ] logger = logging.getLogger(__name__) diff --git a/tests/generic_config_updater/test_dhcp_relay.py b/tests/generic_config_updater/test_dhcp_relay.py index 1bf5689ab16..b91f59c8f12 100644 --- a/tests/generic_config_updater/test_dhcp_relay.py +++ b/tests/generic_config_updater/test_dhcp_relay.py @@ -9,7 +9,7 @@ from tests.generic_config_updater.gu_utils import create_checkpoint, delete_checkpoint, rollback_or_reload, rollback pytestmark = [ - pytest.mark.topology('t0'), + pytest.mark.topology('t0', 'm0'), ] logger = logging.getLogger(__name__) diff --git a/tests/generic_config_updater/test_incremental_qos.py b/tests/generic_config_updater/test_incremental_qos.py index d6894f97932..359105c219e 100644 --- a/tests/generic_config_updater/test_incremental_qos.py +++ b/tests/generic_config_updater/test_incremental_qos.py @@ -11,7 +11,7 @@ from tests.generic_config_updater.gu_utils import create_checkpoint, delete_checkpoint, rollback_or_reload pytestmark = [ - pytest.mark.topology('t0'), + pytest.mark.topology('t0', 'm0'), pytest.mark.asic('mellanox', 'barefoot') ] diff --git a/tests/generic_config_updater/test_lo_interface.py b/tests/generic_config_updater/test_lo_interface.py index cbb6c499677..f2af4a938ed 100644 --- a/tests/generic_config_updater/test_lo_interface.py +++ b/tests/generic_config_updater/test_lo_interface.py @@ -20,7 +20,7 @@ # fe80::4a3:18ff:fec2:f9e3%Loopback0/64 N/A N/A pytestmark = [ - pytest.mark.topology('t0'), + pytest.mark.topology('t0', 'm0'), ] logger = logging.getLogger(__name__) diff --git a/tests/generic_config_updater/test_portchannel_interface.py b/tests/generic_config_updater/test_portchannel_interface.py index 1d7d106e507..9ba67b6ddc4 100644 --- a/tests/generic_config_updater/test_portchannel_interface.py +++ b/tests/generic_config_updater/test_portchannel_interface.py @@ -24,7 +24,7 @@ # } pytestmark = [ - pytest.mark.topology('t0'), + pytest.mark.topology('t0', 'm0'), ] logger = logging.getLogger(__name__) diff --git a/tests/generic_config_updater/test_vlan_interface.py b/tests/generic_config_updater/test_vlan_interface.py index 2a172bfbc92..c511159dc90 100644 --- a/tests/generic_config_updater/test_vlan_interface.py +++ b/tests/generic_config_updater/test_vlan_interface.py @@ -14,7 +14,7 @@ # } pytestmark = [ - pytest.mark.topology('t0'), + pytest.mark.topology('t0', 'm0'), ] logger = logging.getLogger(__name__) diff --git a/tests/iface_namingmode/test_iface_namingmode.py b/tests/iface_namingmode/test_iface_namingmode.py index c6c5161235a..665080ff34a 100644 --- a/tests/iface_namingmode/test_iface_namingmode.py +++ b/tests/iface_namingmode/test_iface_namingmode.py @@ -523,13 +523,13 @@ def test_show_queue_watermark_unicast(self, setup, setup_config_mode): for intf in setup['default_interfaces']: assert re.search(r'{}'.format(intf), show_queue_wm_ucast) is not None -# Tests to be run in t0 topology +# Tests to be run in t0/m0 topology class TestShowVlan(): @pytest.fixture(scope="class", autouse=True) def setup_check_topo(self, tbinfo): - if tbinfo['topo']['type'] != 't0': + if tbinfo['topo']['type'] not in ['t0', 'm0']: pytest.skip('Unsupported topology') @pytest.fixture() @@ -544,11 +544,18 @@ def setup_vlan(self, setup_config_mode): """ dutHostGuest, mode, ifmode = setup_config_mode logger.info('Creating a test vlan 100') - dutHostGuest.shell('SONIC_CLI_IFACE_MODE={} sudo config vlan add 100'.format(ifmode)) + res = dutHostGuest.shell('SONIC_CLI_IFACE_MODE={} sudo config vlan add 100' + .format(ifmode), module_ignore_errors=True) + if res["rc"] != 0 and "Restart service dhcp_relay failed with error" not in res["stderr"]: + pytest.fail("Add vlan failed in setup") + yield logger.info('Cleaning up the test vlan 100') - dutHostGuest.shell('SONIC_CLI_IFACE_MODE={} sudo config vlan del 100'.format(ifmode)) + res = dutHostGuest.shell('SONIC_CLI_IFACE_MODE={} sudo config vlan del 100' + .format(ifmode), module_ignore_errors=True) + if res["rc"] != 0 and "Restart service dhcp_relay failed with error" not in res["stderr"]: + pytest.fail("Del vlan failed in teardown") def test_show_vlan_brief(self, setup, setup_config_mode): """ diff --git a/tests/ip/test_ip_packet.py b/tests/ip/test_ip_packet.py index 42be8468a2c..e95f8223f5b 100644 --- a/tests/ip/test_ip_packet.py +++ b/tests/ip/test_ip_packet.py @@ -1,6 +1,7 @@ import random import re import time +import logging import ipaddress import ptf.testutils as testutils @@ -8,13 +9,21 @@ from ptf import mask, packet from tests.common.helpers.assertions import pytest_assert +from tests.common.portstat_utilities import parse_column_positions from tests.common.portstat_utilities import parse_portstat +pytestmark = [ + pytest.mark.topology('any') +] + +logger = logging.getLogger(__name__) + class TestIPPacket(object): PKT_NUM = 1000 PKT_NUM_MIN = PKT_NUM * 0.9 - PKT_NUM_MAX = PKT_NUM * 1.3 + # in dualtor PKT_NUM_MAX should be larger + PKT_NUM_MAX = PKT_NUM * 1.5 # a number <= PKT_NUM * 0.1 can be considered as 0 PKT_NUM_ZERO = PKT_NUM * 0.1 @@ -23,12 +32,12 @@ def skip(self, mg_facts): pytest.skip("Skipping ip packet test since can't provide enough interfaces") @staticmethod - def sum_portstat_ifaces_counts(portstat_out, ifaces, column): + def sum_ifaces_counts(counter_out, ifaces, column): if len(ifaces) == 0: return 0 if len(ifaces) == 1: - return int(portstat_out[ifaces[0]][column].replace(",", "")) - return sum(map(lambda iface: int(portstat_out[iface][column].replace(",", "")), ifaces)) + return int(counter_out[ifaces[0]][column].replace(",", "")) + return sum(map(lambda iface: int(counter_out[iface][column].replace(",", "")), ifaces)) @staticmethod def parse_interfaces(output_lines, pc_ports_map): @@ -50,13 +59,67 @@ def parse_interfaces(output_lines, pc_ports_map): elif route_target.startswith("PortChannel") and route_target in pc_ports_map: ifaces.extend(pc_ports_map[route_target]) - return ifaces + return route_targets, ifaces + + @staticmethod + def parse_rif_counters(output_lines): + '''Parse the output of "show interfaces counters rif" command + Args: + output_lines (list): The output lines of "show interfaces counters rif" command + Returns: + list: A dictionary, key is interface name, value is a dictionary of fields/values + ''' + + header_line = '' + separation_line = '' + separation_line_number = 0 + for idx, line in enumerate(output_lines): + if line.find('----') >= 0: + header_line = output_lines[idx-1] + separation_line = output_lines[idx] + separation_line_number = idx + break + + try: + positions = parse_column_positions(separation_line) + except Exception: + logger.error('Possibly bad command output') + return {} + + headers = [] + for pos in positions: + header = header_line[pos[0]:pos[1]].strip().lower() + headers.append(header) + + if not headers: + return {} + + results = {} + for line in output_lines[separation_line_number+1:]: + portstats = [] + for pos in positions: + portstat = line[pos[0]:pos[1]].strip() + portstats.append(portstat) + + intf = portstats[0] + results[intf] = {} + for idx in range(1, len(portstats)): # Skip the first column interface name + results[intf][headers[idx]] = portstats[idx] + + return results + @pytest.fixture(scope="class") def common_param(self, duthost, tbinfo): mg_facts = duthost.get_extended_minigraph_facts(tbinfo) self.skip(mg_facts) - pc_ports_map = {} + + # generate peer_ip and port channel pair, be like:[("10.0.0.57", "PortChannel0001")] + peer_ip_pc_pair = [(pc["peer_addr"], pc["attachto"]) for pc in mg_facts["minigraph_portchannel_interfaces"] + if + ipaddress.ip_address(pc['peer_addr']).version == 4] + pc_ports_map = {pair[1]: mg_facts["minigraph_portchannels"][pair[1]]["members"] for pair in + peer_ip_pc_pair} if len(mg_facts["minigraph_interfaces"]) >= 2: # generate peer_ip and interfaces pair, @@ -64,31 +127,36 @@ def common_param(self, duthost, tbinfo): peer_ip_ifaces_pair = [(intf["peer_addr"], [intf["attachto"]]) for intf in mg_facts["minigraph_interfaces"] if ipaddress.ip_address(intf['peer_addr']).version == 4] + selected_peer_ip_ifaces_pairs = random.sample(peer_ip_ifaces_pair, k=2) + rif_rx_ifaces = selected_peer_ip_ifaces_pairs[0][1][0] else: - # generate peer_ip and port channel pair, be like:[("10.0.0.57", "PortChannel0001")] - peer_ip_pc_pair = [(pc["peer_addr"], pc["attachto"]) for pc in mg_facts["minigraph_portchannel_interfaces"] - if - ipaddress.ip_address(pc['peer_addr']).version == 4] - pc_ports_map = {pair[1]: mg_facts["minigraph_portchannels"][pair[1]]["members"] for pair in - peer_ip_pc_pair} # generate peer_ip and interfaces(port channel members) pair, # be like:[("10.0.0.57", ["Ethernet48", "Ethernet52"])] peer_ip_ifaces_pair = [(pair[0], mg_facts["minigraph_portchannels"][pair[1]]["members"]) for pair in peer_ip_pc_pair] + selected_peer_ip_ifaces_pairs = random.sample(peer_ip_ifaces_pair, k=2) + rif_rx_ifaces = list(pc_ports_map.keys())[list(pc_ports_map.values()).index(selected_peer_ip_ifaces_pairs[0][1])] - selected_peer_ip_ifaces_pairs = random.sample(peer_ip_ifaces_pair, k=2) # use first port of first peer_ip_ifaces pair as input port # all ports in second peer_ip_ifaces pair will be output/forward port ptf_port_idx = mg_facts["minigraph_ptf_indices"][selected_peer_ip_ifaces_pairs[0][1][0]] + # Some platforms do not support rif counter + try: + rif_counter_out = TestIPPacket.parse_rif_counters(duthost.command("show interfaces counters rif")["stdout_lines"]) + rif_iface = list(rif_counter_out.keys())[0] + rif_support = False if rif_counter_out[rif_iface]['rx_err'] == 'N/A' else True + except Exception as e: + logger.info("Show rif counters failed with exception: {}".format(repr(e))) + rif_support = False - yield selected_peer_ip_ifaces_pairs, ptf_port_idx, pc_ports_map, mg_facts["minigraph_ptf_indices"] + yield selected_peer_ip_ifaces_pairs, rif_rx_ifaces, rif_support, ptf_port_idx, pc_ports_map, mg_facts["minigraph_ptf_indices"] def test_forward_ip_packet_with_0x0000_chksum(self, duthost, ptfadapter, common_param): # GIVEN a ip packet with checksum 0x0000(compute from scratch) # WHEN send the packet to DUT # THEN DUT should forward it as normal ip packet - (peer_ip_ifaces_pair, ptf_port_idx, pc_ports_map, ptf_indices) = common_param + (peer_ip_ifaces_pair, rif_rx_ifaces, rif_support, ptf_port_idx, pc_ports_map, ptf_indices) = common_param pkt = testutils.simple_ip_packet( eth_dst=duthost.facts["router_mac"], eth_src=ptfadapter.dataplane.get_mac(0, ptf_port_idx), @@ -109,36 +177,45 @@ def test_forward_ip_packet_with_0x0000_chksum(self, duthost, ptfadapter, common_ exp_pkt.set_do_not_care_scapy(packet.Ether, 'dst') exp_pkt.set_do_not_care_scapy(packet.Ether, 'src') - out_ifaces = TestIPPacket.parse_interfaces(duthost.command("show ip route 10.156.94.34")["stdout_lines"], + out_rif_ifaces, out_ifaces = TestIPPacket.parse_interfaces(duthost.command("show ip route 10.156.94.34")["stdout_lines"], pc_ports_map) + logger.info("out_rif_ifaces: {}, out_ifaces: {}".format(out_rif_ifaces, out_ifaces)) out_ptf_indices = map(lambda iface: ptf_indices[iface], out_ifaces) duthost.command("portstat -c") + if rif_support: + duthost.command("sonic-clear rifcounters") ptfadapter.dataplane.flush() + testutils.send(ptfadapter, ptf_port_idx, pkt, self.PKT_NUM) time.sleep(5) match_cnt = testutils.count_matched_packets_all_ports(ptfadapter, exp_pkt, ports=out_ptf_indices) portstat_out = parse_portstat(duthost.command("portstat")["stdout_lines"]) + if rif_support: + rif_counter_out = TestIPPacket.parse_rif_counters(duthost.command("show interfaces counters rif")["stdout_lines"]) + # In different platforms, IP packets with specific checksum will be dropped in different layer + # We use both layer 2 counter and layer 3 counter to check where packet are dropped rx_ok = int(portstat_out[peer_ip_ifaces_pair[0][1][0]]["rx_ok"].replace(",", "")) rx_drp = int(portstat_out[peer_ip_ifaces_pair[0][1][0]]["rx_drp"].replace(",", "")) - tx_ok = TestIPPacket.sum_portstat_ifaces_counts(portstat_out, out_ifaces, "tx_ok") - tx_drp = TestIPPacket.sum_portstat_ifaces_counts(portstat_out, out_ifaces, "tx_drp") - - pytest_assert(match_cnt == self.PKT_NUM, "Packet lost") - pytest_assert(self.PKT_NUM_MIN <= rx_ok <= self.PKT_NUM_MAX, "rx_ok unexpected") - pytest_assert(self.PKT_NUM_MIN <= tx_ok <= self.PKT_NUM_MAX, "tx_ok unexpected") - pytest_assert(rx_drp <= self.PKT_NUM_ZERO, "rx_drp unexpected") - pytest_assert(tx_drp <= self.PKT_NUM_ZERO, "tx_drp unexpected") - - @pytest.mark.xfail + rx_err = int(rif_counter_out[rif_rx_ifaces]["rx_err"].replace(",", "")) if rif_support else 0 + tx_ok = TestIPPacket.sum_ifaces_counts(portstat_out, out_ifaces, "tx_ok") + tx_drp = TestIPPacket.sum_ifaces_counts(portstat_out, out_ifaces, "tx_drp") + tx_err = TestIPPacket.sum_ifaces_counts(rif_counter_out, out_rif_ifaces, "tx_err") if rif_support else 0 + + pytest_assert(rx_ok >= self.PKT_NUM_MIN, "Received {} packets in rx, not in expected range".format(rx_ok)) + pytest_assert(tx_ok >= self.PKT_NUM_MIN, "Forwarded {} packets in tx, not in expected range".format(tx_ok)) + pytest_assert(max(rx_drp, rx_err) <= self.PKT_NUM_ZERO, "Dropped {} packets in rx, not in expected range".format(rx_err)) + pytest_assert(max(tx_drp, tx_err) <= self.PKT_NUM_ZERO, "Dropped {} packets in tx, not in expected range".format(tx_err)) + pytest_assert(match_cnt >= self.PKT_NUM_MIN, "DUT forwarded {} packets, but {} packets matched expected format, not in expected range".format(tx_ok, match_cnt)) + def test_forward_ip_packet_with_0xffff_chksum_tolerant(self, duthost, ptfadapter, common_param): # GIVEN a ip packet with checksum 0x0000(compute from scratch) # WHEN manually set checksum as 0xffff and send the packet to DUT # THEN DUT should tolerant packet with 0xffff, forward it as normal packet - (peer_ip_ifaces_pair, ptf_port_idx, pc_ports_map, ptf_indices) = common_param + (peer_ip_ifaces_pair, rif_rx_ifaces, rif_support, ptf_port_idx, pc_ports_map, ptf_indices) = common_param pkt = testutils.simple_ip_packet( eth_dst=duthost.facts["router_mac"], eth_src=ptfadapter.dataplane.get_mac(0, ptf_port_idx), @@ -160,36 +237,44 @@ def test_forward_ip_packet_with_0xffff_chksum_tolerant(self, duthost, ptfadapter exp_pkt.set_do_not_care_scapy(packet.Ether, 'dst') exp_pkt.set_do_not_care_scapy(packet.Ether, 'src') - out_ifaces = TestIPPacket.parse_interfaces(duthost.command("show ip route 10.156.94.34")["stdout_lines"], + out_rif_ifaces, out_ifaces = TestIPPacket.parse_interfaces(duthost.command("show ip route 10.156.94.34")["stdout_lines"], pc_ports_map) out_ptf_indices = map(lambda iface: ptf_indices[iface], out_ifaces) duthost.command("portstat -c") + if rif_support: + duthost.command("sonic-clear rifcounters") ptfadapter.dataplane.flush() + testutils.send(ptfadapter, ptf_port_idx, pkt, self.PKT_NUM) time.sleep(5) match_cnt = testutils.count_matched_packets_all_ports(ptfadapter, exp_pkt, ports=out_ptf_indices) portstat_out = parse_portstat(duthost.command("portstat")["stdout_lines"]) + if rif_support: + rif_counter_out = TestIPPacket.parse_rif_counters(duthost.command("show interfaces counters rif")["stdout_lines"]) + # In different platforms, IP packets with specific checksum will be dropped in different layer + # We use both layer 2 counter and layer 3 counter to check where packet are dropped rx_ok = int(portstat_out[peer_ip_ifaces_pair[0][1][0]]["rx_ok"].replace(",", "")) rx_drp = int(portstat_out[peer_ip_ifaces_pair[0][1][0]]["rx_drp"].replace(",", "")) - tx_ok = TestIPPacket.sum_portstat_ifaces_counts(portstat_out, out_ifaces, "tx_ok") - tx_drp = TestIPPacket.sum_portstat_ifaces_counts(portstat_out, out_ifaces, "tx_drp") + rx_err = int(rif_counter_out[rif_rx_ifaces]["rx_err"].replace(",", "")) if rif_support else 0 + tx_ok = TestIPPacket.sum_ifaces_counts(portstat_out, out_ifaces, "tx_ok") + tx_drp = TestIPPacket.sum_ifaces_counts(portstat_out, out_ifaces, "tx_drp") + tx_err = TestIPPacket.sum_ifaces_counts(rif_counter_out, out_rif_ifaces, "tx_err") if rif_support else 0 - pytest_assert(match_cnt == self.PKT_NUM, "Packet lost") - pytest_assert(self.PKT_NUM_MIN <= rx_ok <= self.PKT_NUM_MAX, "rx_ok unexpected") - pytest_assert(self.PKT_NUM_MIN <= tx_ok <= self.PKT_NUM_MAX, "tx_ok unexpected") - pytest_assert(rx_drp <= self.PKT_NUM_ZERO, "rx_drp unexpected") - pytest_assert(tx_drp <= self.PKT_NUM_ZERO, "tx_drp unexpected") + pytest_assert(rx_ok >= self.PKT_NUM_MIN, "Received {} packets in rx, not in expected range".format(rx_ok)) + pytest_assert(tx_ok >= self.PKT_NUM_MIN, "Forwarded {} packets in tx, not in expected range".format(tx_ok)) + pytest_assert(max(rx_drp, rx_err) <= self.PKT_NUM_ZERO, "Dropped {} packets in rx, not in expected range".format(rx_err)) + pytest_assert(max(tx_drp, tx_err) <= self.PKT_NUM_ZERO, "Dropped {} packets in tx, not in expected range".format(tx_err)) + pytest_assert(match_cnt >= self.PKT_NUM_MIN, "DUT forwarded {} packets, but {} packets matched expected format, not in expected range".format(tx_ok, match_cnt)) - @pytest.mark.xfail def test_forward_ip_packet_with_0xffff_chksum_drop(self, duthost, ptfadapter, common_param): # GIVEN a ip packet with checksum 0x0000(compute from scratch) # WHEN manually set checksum as 0xffff and send the packet to DUT # THEN DUT should drop packet with 0xffff and add drop count - (peer_ip_ifaces_pair, ptf_port_idx, pc_ports_map, ptf_indices) = common_param + (peer_ip_ifaces_pair, rif_rx_ifaces, rif_support, ptf_port_idx, pc_ports_map, ptf_indices) = common_param pkt = testutils.simple_ip_packet( eth_dst=duthost.facts["router_mac"], eth_src=ptfadapter.dataplane.get_mac(0, ptf_port_idx), @@ -211,28 +296,37 @@ def test_forward_ip_packet_with_0xffff_chksum_drop(self, duthost, ptfadapter, co exp_pkt.set_do_not_care_scapy(packet.Ether, 'dst') exp_pkt.set_do_not_care_scapy(packet.Ether, 'src') - out_ifaces = TestIPPacket.parse_interfaces(duthost.command("show ip route 10.156.94.34")["stdout_lines"], + out_rif_ifaces, out_ifaces = TestIPPacket.parse_interfaces(duthost.command("show ip route 10.156.94.34")["stdout_lines"], pc_ports_map) out_ptf_indices = map(lambda iface: ptf_indices[iface], out_ifaces) duthost.command("portstat -c") + if rif_support: + duthost.command("sonic-clear rifcounters") ptfadapter.dataplane.flush() + testutils.send(ptfadapter, ptf_port_idx, pkt, self.PKT_NUM) time.sleep(5) match_cnt = testutils.count_matched_packets_all_ports(ptfadapter, exp_pkt, ports=out_ptf_indices) portstat_out = parse_portstat(duthost.command("portstat")["stdout_lines"]) + if rif_support: + rif_counter_out = TestIPPacket.parse_rif_counters(duthost.command("show interfaces counters rif")["stdout_lines"]) + # In different platforms, IP packets with specific checksum will be dropped in different layer + # We use both layer 2 counter and layer 3 counter to check where packet are dropped rx_ok = int(portstat_out[peer_ip_ifaces_pair[0][1][0]]["rx_ok"].replace(",", "")) rx_drp = int(portstat_out[peer_ip_ifaces_pair[0][1][0]]["rx_drp"].replace(",", "")) - tx_ok = TestIPPacket.sum_portstat_ifaces_counts(portstat_out, out_ifaces, "tx_ok") - tx_drp = TestIPPacket.sum_portstat_ifaces_counts(portstat_out, out_ifaces, "tx_drp") + rx_err = int(rif_counter_out[rif_rx_ifaces]["rx_err"].replace(",", "")) if rif_support else 0 + tx_ok = TestIPPacket.sum_ifaces_counts(portstat_out, out_ifaces, "tx_ok") + tx_drp = TestIPPacket.sum_ifaces_counts(portstat_out, out_ifaces, "tx_drp") + tx_err = TestIPPacket.sum_ifaces_counts(rif_counter_out, out_rif_ifaces, "tx_err") if rif_support else 0 - pytest_assert(match_cnt == 0, "Packet not dropped") - pytest_assert(self.PKT_NUM_MIN <= rx_ok <= self.PKT_NUM_MAX, "rx_ok unexpected") - pytest_assert(self.PKT_NUM_MIN <= rx_drp <= self.PKT_NUM_MAX, "rx_drp unexpected") - pytest_assert(tx_drp <= self.PKT_NUM_ZERO, "tx_drp unexpected") - pytest_assert(tx_ok <= self.PKT_NUM_ZERO, "tx_ok unexpected") + pytest_assert(rx_ok >= self.PKT_NUM_MIN, "Received {} packets in rx, not in expected range".format(rx_ok)) + pytest_assert(max(rx_drp, rx_err) >= self.PKT_NUM_MIN, "Dropped {} packets in rx, not in expected range".format(rx_err)) + pytest_assert(tx_ok <= self.PKT_NUM_ZERO, "Forwarded {} packets in tx, not in expected range".format(tx_ok)) + pytest_assert(max(tx_drp, tx_err) <= self.PKT_NUM_ZERO, "Dropped {} packets in tx, not in expected range".format(tx_err)) + pytest_assert(match_cnt == 0, "DUT shouldn't forward packets, but forwarded {} packets, not in expected range".format(match_cnt)) def test_forward_ip_packet_recomputed_0xffff_chksum(self, duthost, ptfadapter, common_param): # GIVEN a ip packet, after forwarded(ttl-1) by DUT, @@ -242,7 +336,7 @@ def test_forward_ip_packet_recomputed_0xffff_chksum(self, duthost, ptfadapter, c # WHEN send the packet to DUT # THEN DUT recompute new checksum correctly and forward packet as expected. - (peer_ip_ifaces_pair, ptf_port_idx, pc_ports_map, ptf_indices) = common_param + (peer_ip_ifaces_pair, rif_rx_ifaces, rif_support, ptf_port_idx, pc_ports_map, ptf_indices) = common_param pkt = testutils.simple_ip_packet( eth_dst=duthost.facts["router_mac"], eth_src=ptfadapter.dataplane.get_mac(0, ptf_port_idx), @@ -263,35 +357,44 @@ def test_forward_ip_packet_recomputed_0xffff_chksum(self, duthost, ptfadapter, c exp_pkt.set_do_not_care_scapy(packet.Ether, 'dst') exp_pkt.set_do_not_care_scapy(packet.Ether, 'src') - out_ifaces = TestIPPacket.parse_interfaces(duthost.command("show ip route 10.156.190.188")["stdout_lines"], + out_rif_ifaces, out_ifaces = TestIPPacket.parse_interfaces(duthost.command("show ip route 10.156.190.188")["stdout_lines"], pc_ports_map) out_ptf_indices = map(lambda iface: ptf_indices[iface], out_ifaces) duthost.command("portstat -c") + if rif_support: + duthost.command("sonic-clear rifcounters") ptfadapter.dataplane.flush() + testutils.send(ptfadapter, ptf_port_idx, pkt, self.PKT_NUM) time.sleep(5) match_cnt = testutils.count_matched_packets_all_ports(ptfadapter, exp_pkt, ports=out_ptf_indices) portstat_out = parse_portstat(duthost.command("portstat")["stdout_lines"]) + if rif_support: + rif_counter_out = TestIPPacket.parse_rif_counters(duthost.command("show interfaces counters rif")["stdout_lines"]) + # In different platforms, IP packets with specific checksum will be dropped in different layer + # We use both layer 2 counter and layer 3 counter to check where packet are dropped rx_ok = int(portstat_out[peer_ip_ifaces_pair[0][1][0]]["rx_ok"].replace(",", "")) rx_drp = int(portstat_out[peer_ip_ifaces_pair[0][1][0]]["rx_drp"].replace(",", "")) - tx_ok = TestIPPacket.sum_portstat_ifaces_counts(portstat_out, out_ifaces, "tx_ok") - tx_drp = TestIPPacket.sum_portstat_ifaces_counts(portstat_out, out_ifaces, "tx_drp") + rx_err = int(rif_counter_out[rif_rx_ifaces]["rx_err"].replace(",", "")) if rif_support else 0 + tx_ok = TestIPPacket.sum_ifaces_counts(portstat_out, out_ifaces, "tx_ok") + tx_drp = TestIPPacket.sum_ifaces_counts(portstat_out, out_ifaces, "tx_drp") + tx_err = TestIPPacket.sum_ifaces_counts(rif_counter_out, out_rif_ifaces, "tx_err") if rif_support else 0 - pytest_assert(match_cnt == self.PKT_NUM, "Packet lost") - pytest_assert(self.PKT_NUM_MIN <= rx_ok <= self.PKT_NUM_MAX, "rx_ok unexpected") - pytest_assert(self.PKT_NUM_MIN <= tx_ok <= self.PKT_NUM_MAX, "tx_ok unexpected") - pytest_assert(rx_drp <= self.PKT_NUM_ZERO, "rx_drp unexpected") - pytest_assert(tx_drp <= self.PKT_NUM_ZERO, "tx_drp unexpected") + pytest_assert(rx_ok >= self.PKT_NUM_MIN, "Received {} packets in rx, not in expected range".format(rx_ok)) + pytest_assert(tx_ok >= self.PKT_NUM_MIN, "Forwarded {} packets in tx, not in expected range".format(tx_ok)) + pytest_assert(max(rx_drp, rx_err) <= self.PKT_NUM_ZERO, "Dropped {} packets in rx, not in expected range".format(rx_err)) + pytest_assert(max(tx_drp, tx_err) <= self.PKT_NUM_ZERO, "Dropped {} packets in tx, not in expected range".format(tx_err)) + pytest_assert(match_cnt >= self.PKT_NUM_MIN, "DUT forwarded {} packets, but {} packets matched expected format, not in expected range".format(tx_ok, match_cnt)) def test_forward_ip_packet_recomputed_0x0000_chksum(self, duthost, ptfadapter, common_param): # GIVEN a ip packet, after forwarded(ttl-1) by DUT, it's checksum will be 0x0000 after recompute from scratch # WHEN send the packet to DUT # THEN DUT recompute new checksum as 0x0000 and forward packet as expected. - (peer_ip_ifaces_pair, ptf_port_idx, pc_ports_map, ptf_indices) = common_param + (peer_ip_ifaces_pair, rif_rx_ifaces, rif_support, ptf_port_idx, pc_ports_map, ptf_indices) = common_param pkt = testutils.simple_ip_packet( eth_dst=duthost.facts["router_mac"], eth_src=ptfadapter.dataplane.get_mac(0, ptf_port_idx), @@ -312,34 +415,43 @@ def test_forward_ip_packet_recomputed_0x0000_chksum(self, duthost, ptfadapter, c exp_pkt.set_do_not_care_scapy(packet.Ether, 'dst') exp_pkt.set_do_not_care_scapy(packet.Ether, 'src') - out_ifaces = TestIPPacket.parse_interfaces(duthost.command("show ip route 10.156.94.34")["stdout_lines"], + out_rif_ifaces, out_ifaces = TestIPPacket.parse_interfaces(duthost.command("show ip route 10.156.94.34")["stdout_lines"], pc_ports_map) out_ptf_indices = map(lambda iface: ptf_indices[iface], out_ifaces) duthost.command("portstat -c") + if rif_support: + duthost.command("sonic-clear rifcounters") ptfadapter.dataplane.flush() + testutils.send(ptfadapter, ptf_port_idx, pkt, self.PKT_NUM) time.sleep(5) match_cnt = testutils.count_matched_packets_all_ports(ptfadapter, exp_pkt, ports=out_ptf_indices) portstat_out = parse_portstat(duthost.command("portstat")["stdout_lines"]) + if rif_support: + rif_counter_out = TestIPPacket.parse_rif_counters(duthost.command("show interfaces counters rif")["stdout_lines"]) + # In different platforms, IP packets with specific checksum will be dropped in different layer + # We use both layer 2 counter and layer 3 counter to check where packet are dropped rx_ok = int(portstat_out[peer_ip_ifaces_pair[0][1][0]]["rx_ok"].replace(",", "")) rx_drp = int(portstat_out[peer_ip_ifaces_pair[0][1][0]]["rx_drp"].replace(",", "")) - tx_ok = TestIPPacket.sum_portstat_ifaces_counts(portstat_out, out_ifaces, "tx_ok") - tx_drp = TestIPPacket.sum_portstat_ifaces_counts(portstat_out, out_ifaces, "tx_drp") + rx_err = int(rif_counter_out[rif_rx_ifaces]["rx_err"].replace(",", "")) if rif_support else 0 + tx_ok = TestIPPacket.sum_ifaces_counts(portstat_out, out_ifaces, "tx_ok") + tx_drp = TestIPPacket.sum_ifaces_counts(portstat_out, out_ifaces, "tx_drp") + tx_err = TestIPPacket.sum_ifaces_counts(rif_counter_out, out_rif_ifaces, "tx_err") if rif_support else 0 - pytest_assert(match_cnt == self.PKT_NUM, "Packet lost") - pytest_assert(self.PKT_NUM_MIN <= rx_ok <= self.PKT_NUM_MAX, "rx_ok unexpected") - pytest_assert(self.PKT_NUM_MIN <= tx_ok <= self.PKT_NUM_MAX, "tx_ok unexpected") - pytest_assert(rx_drp <= self.PKT_NUM_ZERO, "rx_drp unexpected") - pytest_assert(tx_drp <= self.PKT_NUM_ZERO, "tx_drp unexpected") + pytest_assert(rx_ok >= self.PKT_NUM_MIN, "Received {} packets in rx, not in expected range".format(rx_ok)) + pytest_assert(tx_ok >= self.PKT_NUM_MIN, "Forwarded {} packets in tx, not in expected range".format(tx_ok)) + pytest_assert(max(rx_drp, rx_err) <= self.PKT_NUM_ZERO, "Dropped {} packets in rx, not in expected range".format(rx_err)) + pytest_assert(max(tx_drp, tx_err) <= self.PKT_NUM_ZERO, "Dropped {} packets in tx, not in expected range".format(tx_err)) + pytest_assert(match_cnt >= self.PKT_NUM_MIN, "DUT forwarded {} packets, but {} packets matched expected format, not in expected range".format(tx_ok, match_cnt)) def test_forward_normal_ip_packet(self, duthost, ptfadapter, common_param): # GIVEN a random normal ip packet # WHEN send the packet to DUT # THEN DUT should forward it as normal ip packet, nothing change but ttl-1 - (peer_ip_ifaces_pair, ptf_port_idx, pc_ports_map, ptf_indices) = common_param + (peer_ip_ifaces_pair, rif_rx_ifaces, rif_support, ptf_port_idx, pc_ports_map, ptf_indices) = common_param pkt = testutils.simple_ip_packet( eth_dst=duthost.facts["router_mac"], eth_src=ptfadapter.dataplane.get_mac(0, ptf_port_idx), @@ -353,35 +465,44 @@ def test_forward_normal_ip_packet(self, duthost, ptfadapter, common_param): exp_pkt.set_do_not_care_scapy(packet.Ether, 'dst') exp_pkt.set_do_not_care_scapy(packet.Ether, 'src') - out_ifaces = TestIPPacket.parse_interfaces( + out_rif_ifaces, out_ifaces = TestIPPacket.parse_interfaces( duthost.command("show ip route %s" % peer_ip_ifaces_pair[1][0])["stdout_lines"], pc_ports_map) out_ptf_indices = map(lambda iface: ptf_indices[iface], out_ifaces) duthost.command("portstat -c") + if rif_support: + duthost.command("sonic-clear rifcounters") ptfadapter.dataplane.flush() + testutils.send(ptfadapter, ptf_port_idx, pkt, self.PKT_NUM) time.sleep(5) match_cnt = testutils.count_matched_packets_all_ports(ptfadapter, exp_pkt, ports=out_ptf_indices) portstat_out = parse_portstat(duthost.command("portstat")["stdout_lines"]) + if rif_support: + rif_counter_out = TestIPPacket.parse_rif_counters(duthost.command("show interfaces counters rif")["stdout_lines"]) + # In different platforms, IP packets with specific checksum will be dropped in different layer + # We use both layer 2 counter and layer 3 counter to check where packet are dropped rx_ok = int(portstat_out[peer_ip_ifaces_pair[0][1][0]]["rx_ok"].replace(",", "")) rx_drp = int(portstat_out[peer_ip_ifaces_pair[0][1][0]]["rx_drp"].replace(",", "")) - tx_ok = TestIPPacket.sum_portstat_ifaces_counts(portstat_out, out_ifaces, "tx_ok") - tx_drp = TestIPPacket.sum_portstat_ifaces_counts(portstat_out, out_ifaces, "tx_drp") + rx_err = int(rif_counter_out[rif_rx_ifaces]["rx_err"].replace(",", "")) if rif_support else 0 + tx_ok = TestIPPacket.sum_ifaces_counts(portstat_out, out_ifaces, "tx_ok") + tx_drp = TestIPPacket.sum_ifaces_counts(portstat_out, out_ifaces, "tx_drp") + tx_err = TestIPPacket.sum_ifaces_counts(rif_counter_out, out_rif_ifaces, "tx_err") if rif_support else 0 - pytest_assert(match_cnt == self.PKT_NUM, "Packet lost") - pytest_assert(self.PKT_NUM_MIN <= rx_ok <= self.PKT_NUM_MAX, "rx_ok unexpected") - pytest_assert(self.PKT_NUM_MIN <= tx_ok <= self.PKT_NUM_MAX, "tx_ok unexpected") - pytest_assert(rx_drp <= self.PKT_NUM_ZERO, "rx_drp unexpected") - pytest_assert(tx_drp <= self.PKT_NUM_ZERO, "tx_drp unexpected") + pytest_assert(rx_ok >= self.PKT_NUM_MIN, "Received {} packets in rx, not in expected range".format(rx_ok)) + pytest_assert(tx_ok >= self.PKT_NUM_MIN, "Forwarded {} packets in tx, not in expected range".format(tx_ok)) + pytest_assert(max(rx_drp, rx_err) <= self.PKT_NUM_ZERO, "Dropped {} packets in rx, not in expected range".format(rx_err)) + pytest_assert(max(tx_drp, tx_err) <= self.PKT_NUM_ZERO, "Dropped {} packets in tx, not in expected range".format(tx_err)) + pytest_assert(match_cnt >= self.PKT_NUM_MIN, "DUT forwarded {} packets, but {} packets matched expected format, not in expected range".format(tx_ok, match_cnt)) def test_drop_ip_packet_with_wrong_0xffff_chksum(self, duthost, ptfadapter, common_param): # GIVEN a random normal ip packet, and manually modify checksum to 0xffff # WHEN send the packet to DUT # THEN DUT should drop it and add drop count - (peer_ip_ifaces_pair, ptf_port_idx, pc_ports_map, ptf_indices) = common_param + (peer_ip_ifaces_pair, rif_rx_ifaces, rif_support, ptf_port_idx, pc_ports_map, ptf_indices) = common_param pkt = testutils.simple_ip_packet( eth_dst=duthost.facts["router_mac"], eth_src=ptfadapter.dataplane.get_mac(0, ptf_port_idx), @@ -390,23 +511,33 @@ def test_drop_ip_packet_with_wrong_0xffff_chksum(self, duthost, ptfadapter, comm pkt.payload.chksum = 0xffff - out_ifaces = TestIPPacket.parse_interfaces( + out_rif_ifaces, out_ifaces = TestIPPacket.parse_interfaces( duthost.command("show ip route %s" % peer_ip_ifaces_pair[1][0])["stdout_lines"], pc_ports_map) duthost.command("portstat -c") + if rif_support: + duthost.command("sonic-clear rifcounters") ptfadapter.dataplane.flush() + testutils.send(ptfadapter, ptf_port_idx, pkt, self.PKT_NUM) time.sleep(5) portstat_out = parse_portstat(duthost.command("portstat")["stdout_lines"]) + if rif_support: + rif_counter_out = TestIPPacket.parse_rif_counters(duthost.command("show interfaces counters rif")["stdout_lines"]) + # In different platforms, IP packets with specific checksum will be dropped in different layer + # We use both layer 2 counter and layer 3 counter to check where packet are dropped rx_ok = int(portstat_out[peer_ip_ifaces_pair[0][1][0]]["rx_ok"].replace(",", "")) rx_drp = int(portstat_out[peer_ip_ifaces_pair[0][1][0]]["rx_drp"].replace(",", "")) - tx_ok = TestIPPacket.sum_portstat_ifaces_counts(portstat_out, out_ifaces, "tx_ok") - tx_drp = TestIPPacket.sum_portstat_ifaces_counts(portstat_out, out_ifaces, "tx_drp") - - pytest_assert(self.PKT_NUM_MIN <= rx_ok <= self.PKT_NUM_MAX, "rx_ok unexpected") - pytest_assert(self.PKT_NUM_MIN <= rx_drp <= self.PKT_NUM_MAX, "rx_drp unexpected") - pytest_assert(tx_ok <= self.PKT_NUM_ZERO, "tx_ok unexpected") - pytest_assert(tx_drp <= self.PKT_NUM_ZERO, "tx_drp unexpected") + rx_err = int(rif_counter_out[rif_rx_ifaces]["rx_err"].replace(",", "")) if rif_support else 0 + tx_ok = TestIPPacket.sum_ifaces_counts(portstat_out, out_ifaces, "tx_ok") + tx_drp = TestIPPacket.sum_ifaces_counts(portstat_out, out_ifaces, "tx_drp") + tx_err = TestIPPacket.sum_ifaces_counts(rif_counter_out, out_rif_ifaces, "tx_err") if rif_support else 0 + + asic_type = duthost.facts['asic_type'] + pytest_assert(rx_ok >= self.PKT_NUM_MIN, "Received {} packets in rx, not in expected range".format(rx_ok)) + pytest_assert(max(rx_drp, rx_err) >= self.PKT_NUM_MIN if asic_type not in ["marvell"] else True, "Dropped {} packets in rx, not in expected range".format(rx_err)) + pytest_assert(tx_ok <= self.PKT_NUM_ZERO, "Forwarded {} packets in tx, not in expected range".format(tx_ok)) + pytest_assert(max(tx_drp, tx_err) <= self.PKT_NUM_ZERO, "Dropped {} packets in tx, not in expected range".format(tx_err)) diff --git a/tests/ipfwd/test_dip_sip.py b/tests/ipfwd/test_dip_sip.py index 8a696f27f40..b46da63961f 100644 --- a/tests/ipfwd/test_dip_sip.py +++ b/tests/ipfwd/test_dip_sip.py @@ -11,7 +11,7 @@ logger = logging.getLogger(__name__) pytestmark = [ - pytest.mark.topology('t0', 't1', 't2') + pytest.mark.topology('t0', 't1', 't2', 'm0') ] @pytest.fixture(scope="module", autouse="True") @@ -83,7 +83,7 @@ def run_test_ipv4(ptfadapter, facts): def test_dip_sip(tbinfo, ptfadapter, gather_facts, enum_frontend_asic_index): topo_type = tbinfo['topo']['type'] - if topo_type not in ('t0', 't1', 't2'): + if topo_type not in ('t0', 't1', 't2', 'm0'): pytest.skip("Unsupported topology") ptfadapter.reinit() diff --git a/tests/ipfwd/test_dir_bcast.py b/tests/ipfwd/test_dir_bcast.py index 6a45c07a4eb..2d1cb14df8f 100644 --- a/tests/ipfwd/test_dir_bcast.py +++ b/tests/ipfwd/test_dir_bcast.py @@ -1,36 +1,86 @@ import pytest +import json +import logging -from tests.common.fixtures.ptfhost_utils import copy_ptftests_directory # lgtm[py/unused-import] +from tests.common.fixtures.ptfhost_utils import copy_ptftests_directory # noqa F401 from tests.ptf_runner import ptf_runner from datetime import datetime - +from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_rand_selected_tor_m # noqa F401 +from tests.common.utilities import get_neighbor_ptf_port_list +from tests.common.helpers.constants import UPSTREAM_NEIGHBOR_MAP pytestmark = [ - pytest.mark.topology('t0') + pytest.mark.topology('t0', 'm0', 'mx') ] -def test_dir_bcast(duthosts, rand_one_dut_hostname, ptfhost, tbinfo): +logger = logging.getLogger(__name__) + +PTF_TEST_PORT_MAP = '/root/ptf_test_port_map.json' + + +def get_ptf_src_ports(tbinfo, duthost): + # Source ports are upstream ports + upstream_neightbor_name = UPSTREAM_NEIGHBOR_MAP[tbinfo["topo"]["type"]] + ptf_src_ports = get_neighbor_ptf_port_list(duthost, upstream_neightbor_name, tbinfo) + return ptf_src_ports + + +def get_ptf_dst_ports(duthost, mg_facts, testbed_type): + if "dualtor" in testbed_type: + # In dualtor, only active port in active tor could be dst port + mux_status_out = duthost.show_and_parse("show mux status") + mux_active_ports = [] + for mux_info in mux_status_out: + if mux_info['status'] == 'active': + mux_active_ports.append(mux_info['port']) + + vlan_ip_port_pair = {} + for vlan_intf in mg_facts['minigraph_vlan_interfaces']: + vlan_subnet = vlan_intf["subnet"] + vlan_name = vlan_intf["attachto"] + + ptf_dst_ports = [] + for member in mg_facts['minigraph_vlans'][vlan_name]['members']: + if "Ethernet" in member: + if "dualtor" not in testbed_type: + ptf_dst_ports.append(mg_facts['minigraph_port_indices'][member]) + elif member in mux_active_ports: + ptf_dst_ports.append(mg_facts['minigraph_port_indices'][member]) + + if ptf_dst_ports: + vlan_ip_port_pair[vlan_subnet] = ptf_dst_ports + + return vlan_ip_port_pair + + +def ptf_test_port_map(duthost, ptfhost, mg_facts, testbed_type, tbinfo): + ptf_test_port_map = {} + ptf_src_ports = get_ptf_src_ports(tbinfo, duthost) + vlan_ip_port_pair = get_ptf_dst_ports(duthost, mg_facts, testbed_type) + + ptf_test_port_map = { + 'ptf_src_ports': ptf_src_ports, + 'vlan_ip_port_pair': vlan_ip_port_pair + } + ptfhost.copy(content=json.dumps(ptf_test_port_map), dest=PTF_TEST_PORT_MAP) + + +def test_dir_bcast(duthosts, rand_one_dut_hostname, ptfhost, tbinfo, + toggle_all_simulator_ports_to_rand_selected_tor_m): # noqa F811 duthost = duthosts[rand_one_dut_hostname] - support_testbed_types = frozenset(['t0', 't0-16', 't0-52', 't0-56', 't0-64', 't0-64-32', 't0-116']) testbed_type = tbinfo['topo']['name'] - if testbed_type not in support_testbed_types: - pytest.skip("Not support given test bed type %s" % testbed_type) + logger.info("tbinfo: {}".format(tbinfo)) # Copy VLAN information file to PTF-docker mg_facts = duthost.get_extended_minigraph_facts(tbinfo) - extra_vars = { - 'minigraph_vlan_interfaces': mg_facts['minigraph_vlan_interfaces'], - 'minigraph_vlans': mg_facts['minigraph_vlans'], - 'minigraph_port_indices': mg_facts['minigraph_ptf_indices'], - 'minigraph_portchannels': mg_facts['minigraph_portchannels'] - } - ptfhost.host.options['variable_manager'].extra_vars.update(extra_vars) - ptfhost.template(src="../ansible/roles/test/templates/fdb.j2", dest="/root/vlan_info.txt") + logger.info("mg_facts: {}".format(mg_facts)) + + ptf_test_port_map(duthost, ptfhost, mg_facts, testbed_type, tbinfo) # Start PTF runner params = { 'testbed_type': testbed_type, 'router_mac': duthost.facts['router_mac'], - 'vlan_info': '/root/vlan_info.txt' + 'ptf_test_port_map': PTF_TEST_PORT_MAP } log_file = "/tmp/dir_bcast.BcastTest.{}.log".format(datetime.now().strftime("%Y-%m-%d-%H:%M:%S")) ptf_runner( diff --git a/tests/ipfwd/test_nhop_count.py b/tests/ipfwd/test_nhop_count.py index e7cdd4c4202..6390f784bb6 100644 --- a/tests/ipfwd/test_nhop_count.py +++ b/tests/ipfwd/test_nhop_count.py @@ -7,10 +7,13 @@ from collections import namedtuple from tests.common.helpers.assertions import pytest_assert +from tests.common.cisco_data import is_cisco_device from tests.common.mellanox_data import is_mellanox_device from tests.common.plugins.loganalyzer.loganalyzer import LogAnalyzer from tests.common.utilities import skip_release +CISCO_NHOP_GROUP_FILL_PERCENTAGE = 0.92 + pytestmark = [ pytest.mark.topology('t1', 't2') ] @@ -267,11 +270,19 @@ def test_nhop(request, duthost, tbinfo): - Add IP route and nexthop - check CRM resource - clean up - - Verify no erros and crash + - Verify no errors and crash """ skip_release(duthost, ["201811", "201911"]) - default_max_nhop_paths = 32 + # Set of parameters for Cisco-8000 devices + if is_cisco_device(duthost): + default_max_nhop_paths = 2 + polling_interval = 1 + sleep_time = 380 + else: + default_max_nhop_paths = 32 + polling_interval = 10 + sleep_time = 120 nhop_group_limit = 1024 # program more than the advertised limit extra_nhops = 10 @@ -286,7 +297,12 @@ def test_nhop(request, duthost, tbinfo): switch_capability = dict(zip(it, it)) max_nhop = switch_capability.get("MAX_NEXTHOP_GROUP_COUNT") max_nhop = nhop_group_limit if max_nhop == None else int(max_nhop) - nhop_group_count = min(max_nhop, nhop_group_limit) + extra_nhops + if is_cisco_device(duthost): + crm_stat = get_crm_info(duthost, asic) + nhop_group_count = crm_stat["available"] + nhop_group_count = int(nhop_group_count * CISCO_NHOP_GROUP_FILL_PERCENTAGE) + else: + nhop_group_count = min(max_nhop, nhop_group_limit) + extra_nhops # find out an active IP port ip_ifaces = asic.get_active_ip_interfaces(tbinfo).keys() @@ -294,7 +310,10 @@ def test_nhop(request, duthost, tbinfo): eth_if = ip_ifaces[0] # Generate ARP entries - arp_count = 40 + if is_cisco_device(duthost): + arp_count = 257 + else: + arp_count = 40 arplist = Arp(duthost, asic, arp_count, eth_if) arplist.arps_add() @@ -318,7 +337,7 @@ def test_nhop(request, duthost, tbinfo): crm_before = get_crm_info(duthost, asic) # increase CRM polling time - asic.command("crm config polling interval 10") + asic.command("crm config polling interval {}".format(polling_interval)) logging.info("Adding {} next hops on {}".format(nhop_group_count, eth_if)) @@ -336,7 +355,7 @@ def test_nhop(request, duthost, tbinfo): nhop.program_routes() # wait for routes to be synced and programmed - time.sleep(120) + time.sleep(sleep_time) crm_after = get_crm_info(duthost, asic) finally: @@ -351,10 +370,20 @@ def test_nhop(request, duthost, tbinfo): # verify the test used up all the NHOP group resources # skip this check on Mellanox as ASIC resources are shared - if not is_mellanox_device(duthost): + if is_cisco_device(duthost): + pytest_assert( + crm_after["available"] + nhop_group_count == crm_before["available"], + "Unused NHOP group resource:{}, used:{}, nhop_group_count:{}, Unused NHOP group resource before:{}".format( + crm_after["available"], crm_after["used"], nhop_group_count, crm_before["available"] + ) + ) + elif is_mellanox_device(duthost): + logging.info("skip this check on Mellanox as ASIC resources are shared") + else: pytest_assert( crm_after["available"] == 0, - "Unused NHOP group resource: {}, used:{}".format( + "Unused NHOP group resource:{}, used:{}".format( crm_after["available"], crm_after["used"] ) ) + diff --git a/tests/ixia/README.md b/tests/ixia/README.md new file mode 100644 index 00000000000..04a4b88bc20 --- /dev/null +++ b/tests/ixia/README.md @@ -0,0 +1,5 @@ +# Important Note for Contributors + +Test cases in this directory have been migrated to /tests/snappi_tests. **Since December 2022, Snappi has become our primary API (through `snappi`) for interacting with hardware traffic generators**, while we are deprecating the existing `ixia` API from our test repository. Any new test cases from contributors requiring the use of any *hardware traffic generators* should utilise the `snappi` API, and any new pull requests which still calls the `ixia` API will be disregarded. + +You can find more information on the `snappi` API such as its usage [here](https://github.com/open-traffic-generator/snappi-ixnetwork) or by viewing the code in the [tests/snappi](https://github.com/sonic-net/sonic-mgmt/tree/master/tests/snappi_tests) directory of this repository. diff --git a/tests/ixia/ecn/conftest.py b/tests/ixia/ecn/conftest.py index ecbd784a105..6b88da097a3 100644 --- a/tests/ixia/ecn/conftest.py +++ b/tests/ixia/ecn/conftest.py @@ -1,4 +1,8 @@ from args.ecn_args import add_ecn_args +from tests.common.devices.ptf import PTFHost +from tests.common.system_utils import docker + +import pytest def pytest_addoption(parser): ''' @@ -9,3 +13,50 @@ def pytest_addoption(parser): None ''' add_ecn_args(parser) + +@pytest.fixture(scope="module", autouse=True) +def ptfhost(ansible_adhoc, request): + ptf_name = request.config.getoption("--ixia_ptf_name") + if ptf_name: + + return PTFHost(ansible_adhoc, ptf_name) + else: + print("No ixia_ptf_name argument is given, No ptf access will work.") + return + +@pytest.fixture(scope="module", autouse=True) +def prepare_ptf(ptfhost): + if not ptfhost: + yield + else: + ptfhost.copy(src="ptftests", dest="/root/ixia_ptftests") + ptfhost.copy(src="saitests", dest="/root/ixia_saitests") + + yield + ptfhost.file(path='/root/ixia_ptftests', state="absent") + ptfhost.file(path='/root/ixia_saitests', state="absent") + +# Pulled from qos_sai_base.py +@pytest.fixture(scope='module', autouse=True) +def swapSyncd(request, ptfhost, duthosts, rand_one_dut_hostname, creds): + """ + Swap syncd on DUT host + + Args: + request (Fixture): pytest request object + duthost (AnsibleHost): Device Under Test (DUT) + Returns: + None + """ + duthost = duthosts[rand_one_dut_hostname] + if not ptfhost: + yield + else: + swapSyncd = request.config.getoption("--qos_swap_syncd") + try: + if swapSyncd: + docker.swap_syncd(duthost, creds) + yield + finally: + if swapSyncd: + docker.restore_default_syncd(duthost, creds) diff --git a/tests/ixia/ecn/test_dequeue_ecn.py b/tests/ixia/ecn/test_dequeue_ecn.py index 45cffa3bf7e..f12a81cee4f 100644 --- a/tests/ixia/ecn/test_dequeue_ecn.py +++ b/tests/ixia/ecn/test_dequeue_ecn.py @@ -8,6 +8,8 @@ from tests.common.ixia.qos_fixtures import prio_dscp_map, lossless_prio_list from files.helper import run_ecn_test, is_ecn_marked +from tests.common.cisco_data import get_markings_dut, setup_markings_dut +from tests.ixia.ptf_utils import get_sai_attributes pytestmark = [ pytest.mark.topology('tgen') ] @@ -17,6 +19,8 @@ def test_dequeue_ecn(request, conn_graph_facts, fanout_graph_facts, duthosts, + ptfhost, + localhost, rand_one_dut_hostname, rand_one_dut_portname_oper_up, rand_one_dut_lossless_prio, @@ -51,6 +55,7 @@ def test_dequeue_ecn(request, testbed_config, port_config_list = ixia_testbed_config duthost = duthosts[rand_one_dut_hostname] lossless_prio = int(lossless_prio) + cisco_platform = (duthost.facts['asic_type'] == "cisco-8000") kmin = 50000 kmax = 51000 @@ -58,7 +63,22 @@ def test_dequeue_ecn(request, pkt_size = 1024 pkt_cnt = 100 - ip_pkts = run_ecn_test(api=ixia_api, + if cisco_platform: + original_ecn_markings = get_markings_dut(duthost) + setup_markings_dut(duthost, localhost, ecn_dequeue_marking = True, ecn_latency_marking = False) + oq_cell_count = 100 # Number of cells in OQ for this lossless priority + cell_size = 384 + cell_per_pkt = (pkt_size + cell_size - 1) // cell_size + margin_cells = 25 + margin = margin_cells // cell_per_pkt + pkt_to_oq = (oq_cell_count//cell_per_pkt) + margin # Packets forwarded to OQ + pkt_to_check = pkt_to_oq + 1 + get_sai_attributes(duthost, ptfhost, dut_port, [], clear_only=True) + else: + pkt_to_check = 0 + + try: + ip_pkts = run_ecn_test(api=ixia_api, testbed_config=testbed_config, port_config_list=port_config_list, conn_data=conn_graph_facts, @@ -75,13 +95,19 @@ def test_dequeue_ecn(request, iters=1)[0] - """ Check if we capture all the packets """ - pytest_assert(len(ip_pkts) == pkt_cnt, - 'Only capture {}/{} IP packets'.format(len(ip_pkts), pkt_cnt)) + """ Check if we capture all the packets """ + pytest_assert(len(ip_pkts) == pkt_cnt, + 'Only capture {}/{} IP packets'.format(len(ip_pkts), pkt_cnt)) + + """ Check if the first packet is marked """ + pytest_assert(is_ecn_marked(ip_pkts[pkt_to_check]), "The first packet should be marked") - """ Check if the first packet is marked """ - pytest_assert(is_ecn_marked(ip_pkts[0]), "The first packet should be marked") + """ Check if the last packet is not marked """ + pytest_assert(not is_ecn_marked(ip_pkts[-1]), + "The last packet should not be marked") + if cisco_platform: + print(get_sai_attributes(duthost, ptfhost, dut_port, ["SAI_QUEUE_STAT_PACKETS","SAI_QUEUE_STAT_WRED_ECN_MARKED_PACKETS"], clear_only=False)) - """ Check if the last packet is not marked """ - pytest_assert(not is_ecn_marked(ip_pkts[-1]), - "The last packet should not be marked") + finally: + if cisco_platform: + setup_markings_dut(duthost, localhost, **original_ecn_markings) diff --git a/tests/ixia/ecn/test_red_accuracy.py b/tests/ixia/ecn/test_red_accuracy.py index 3c280c9bf43..681dbb10c8e 100644 --- a/tests/ixia/ecn/test_red_accuracy.py +++ b/tests/ixia/ecn/test_red_accuracy.py @@ -9,6 +9,7 @@ from tests.common.ixia.qos_fixtures import prio_dscp_map, lossless_prio_list from files.helper import run_ecn_test, is_ecn_marked +from tests.common.cisco_data import get_markings_dut, setup_markings_dut pytestmark = [ pytest.mark.topology('tgen') ] @@ -18,6 +19,7 @@ def test_red_accuracy(request, conn_graph_facts, fanout_graph_facts, duthosts, + localhost, rand_one_dut_hostname, rand_one_dut_portname_oper_up, rand_one_dut_lossless_prio, @@ -53,6 +55,7 @@ def test_red_accuracy(request, testbed_config, port_config_list = ixia_testbed_config duthost = duthosts[rand_one_dut_hostname] lossless_prio = int(lossless_prio) + cisco_platform = (duthost.facts['asic_type'] == "cisco-8000") kmin = 500000 kmax = 2000000 @@ -62,7 +65,12 @@ def test_red_accuracy(request, iters = 100 result_file_name = 'result.txt' - ip_pkts_list = run_ecn_test(api=ixia_api, + if cisco_platform: + original_ecn_markings = get_markings_dut(duthost) + setup_markings_dut(duthost, localhost, dequeue = True, latency = False) + + try: + ip_pkts_list = run_ecn_test(api=ixia_api, testbed_config=testbed_config, port_config_list=port_config_list, conn_data=conn_graph_facts, @@ -78,31 +86,35 @@ def test_red_accuracy(request, prio_dscp_map=prio_dscp_map, iters=iters) - """ Check if we capture packets of all the rounds """ - pytest_assert(len(ip_pkts_list) == iters, - 'Only capture {}/{} rounds of packets'.format(len(ip_pkts_list), iters)) - - queue_mark_cnt = {} - for i in range(pkt_cnt): - queue_len = (pkt_cnt - i) * pkt_size - queue_mark_cnt[queue_len] = 0 - - for i in range(iters): - ip_pkts = ip_pkts_list[i] - """ Check if we capture all the packets in each round """ - pytest_assert(len(ip_pkts) == pkt_cnt, - 'Only capture {}/{} packets in round {}'.format(len(ip_pkts), pkt_cnt, i)) - - for j in range(pkt_cnt): - ip_pkt = ip_pkts[j] - queue_len = (pkt_cnt - j) * pkt_size - - if is_ecn_marked(ip_pkt): - queue_mark_cnt[queue_len] += 1 - - """ Dump queue length vs. ECN marking probability into a file """ - queue_mark_cnt = collections.OrderedDict(sorted(queue_mark_cnt.items())) - f = open(result_file_name, 'w') - for queue, mark_cnt in queue_mark_cnt.iteritems(): - f.write('{} {}\n'.format(queue, float(mark_cnt)/iters)) - f.close() + """ Check if we capture packets of all the rounds """ + pytest_assert(len(ip_pkts_list) == iters, + 'Only capture {}/{} rounds of packets'.format(len(ip_pkts_list), iters)) + + queue_mark_cnt = {} + for i in range(pkt_cnt): + queue_len = (pkt_cnt - i) * pkt_size + queue_mark_cnt[queue_len] = 0 + + for i in range(iters): + ip_pkts = ip_pkts_list[i] + """ Check if we capture all the packets in each round """ + pytest_assert(len(ip_pkts) == pkt_cnt, + 'Only capture {}/{} packets in round {}'.format(len(ip_pkts), pkt_cnt, i)) + + for j in range(pkt_cnt): + ip_pkt = ip_pkts[j] + queue_len = (pkt_cnt - j) * pkt_size + + if is_ecn_marked(ip_pkt): + queue_mark_cnt[queue_len] += 1 + + """ Dump queue length vs. ECN marking probability into a file """ + queue_mark_cnt = collections.OrderedDict(sorted(queue_mark_cnt.items())) + f = open(result_file_name, 'w') + for queue, mark_cnt in queue_mark_cnt.iteritems(): + f.write('{} {}\n'.format(queue, float(mark_cnt)/iters)) + f.close() + finally: + if cisco_platform: + setup_markings_dut(duthost, localhost, **original_ecn_markings) + diff --git a/tests/ixia/pfc/files/pfc_congestion_helper.py b/tests/ixia/pfc/files/pfc_congestion_helper.py new file mode 100644 index 00000000000..34df4d425a4 --- /dev/null +++ b/tests/ixia/pfc/files/pfc_congestion_helper.py @@ -0,0 +1,290 @@ +import time + +from tests.common.helpers.assertions import pytest_assert +from tests.common.ixia.ixia_helpers import get_dut_port_id +from tests.common.ixia.common_helpers import \ + stop_pfcwd, disable_packet_aging +from tests.common.ixia.port import select_ports + +from abstract_open_traffic_generator.flow import ( + TxRx, Flow, Header, Size, Rate, Duration, FixedSeconds, PortTxRx) +from abstract_open_traffic_generator.flow_ipv4 import Priority, Dscp +from abstract_open_traffic_generator.flow import Pattern as FieldPattern +from abstract_open_traffic_generator.flow import Ipv4 as Ipv4Header +from abstract_open_traffic_generator.flow import Ethernet as EthernetHeader +from abstract_open_traffic_generator.control import ( + State, ConfigState, FlowTransmitState) +from abstract_open_traffic_generator.result import FlowRequest + +LOSSLESS_FLOW_NAME = 'Test Flow' +FLOW_RATE_PERCENT = 80 +LOSSY_FLOW_NAME = 'Lossy Flow' +DATA_PKT_SIZE = 1024 +DATA_FLOW_DURATION_SEC = 5 +IXIA_POLL_DELAY_SEC = 2 + + +def run_pfc_congestion( + api, + testbed_config, + port_config_list, + conn_data, + fanout_data, + duthost, + dut_port, + lossless_prio_list, + lossy_prio_list, + prio_dscp_map): + """ + Run a PFC congestion test. + - Inject both lossless and lossy traffic with combined rate above the + line rate + - The combined rate is above linerate. + - The individual rate is below the linerate. + - Expect the lossless traffic to not have any drops. + - The lossy traffic should be dropped depending on the rate. + + Args: + api (obj): IXIA session + testbed_config (obj): testbed L1/L2/L3 configuration + port_config_list (list): list of port configuration + conn_data (dict): the dictionary returned by conn_graph_fact. + fanout_data (dict): the dictionary returned by fanout_graph_fact. + duthost (Ansible host instance): device under test + dut_port (str): DUT port to test + lossless_prio_list (list): priorities of test flows + prio_dscp_map (dict): Priority vs. DSCP map (key = priority). + + Returns: + N/A + """ + + pytest_assert( + testbed_config is not None, + 'Fail to get L2/3 testbed config') + + stop_pfcwd(duthost) + disable_packet_aging(duthost) + + """ Get the ID of the port to test """ + port_id = get_dut_port_id(dut_hostname=duthost.hostname, + dut_port=dut_port, + conn_data=conn_data, + fanout_data=fanout_data) + + pytest_assert(port_id is not None, + 'Fail to get ID for port {}'.format(dut_port)) + + """ Rate percent must be an integer """ + flow_rate_percent = int(FLOW_RATE_PERCENT) + + pkt_size = DATA_PKT_SIZE + + """ Generate traffic config """ + flows = __gen_traffic(testbed_config=testbed_config, + port_config_list=port_config_list, + port_id=port_id, + lossless_flow_name=LOSSLESS_FLOW_NAME, + lossless_prio_list=lossless_prio_list, + flow_rate_percent=flow_rate_percent, + lossy_flow_name=LOSSY_FLOW_NAME, + lossy_flow_prio_list=lossy_prio_list, + data_flow_dur_sec=DATA_FLOW_DURATION_SEC, + data_pkt_size=pkt_size, + prio_dscp_map=prio_dscp_map) + + """ Tgen config = testbed config + flow config """ + config = testbed_config + config.flows = flows + + all_flow_names = [flow.name for flow in flows] + + """ Run traffic """ + flow_stats = __run_traffic(api=api, + config=config, + all_flow_names=all_flow_names, + exp_dur_sec=DATA_FLOW_DURATION_SEC) + + """ Verify experiment results """ + __verify_results(rows=flow_stats, + lossless_flow_name=LOSSLESS_FLOW_NAME, + lossy_flow_name=LOSSY_FLOW_NAME) + + +def sec_to_nanosec(x): + return (x * 1e9) + + +def __gen_traffic(testbed_config, + port_config_list, + port_id, + lossless_flow_name, + lossless_prio_list, + flow_rate_percent, + lossy_flow_name, + lossy_flow_prio_list, + data_flow_dur_sec, + data_pkt_size, + prio_dscp_map): + """ + Generate configurations of flows, including test flows, and background + flows. Test flows and background flows are also known as data flows. + + Args: + testbed_config (obj): testbed L1/L2/L3 configuration + port_config_list (list): list of port configuration + port_id (int): ID of DUT port to test + lossless_flow_name (str): name of test flows + lossless_prio_list (list): priorities of test flows + flow_rate_percent (int): rate percentage for each test flow + lossy_flow_name (str): name of background flows + lossy_flow_prio_list (list): priorities of background flows + data_flow_dur_sec (int): duration of data flows in second + data_pkt_size (int): packet size of data flows in byte + prio_dscp_map (dict): Priority vs. DSCP map (key = priority). + + Returns: + flows configurations (list): the list should have configurations of + len(lossless_prio_list) test flow, len(lossy_flow_prio_list) background + flows. + """ + + result = list() + + rx_port_id = port_id + tx_port_id_list, rx_port_id_list = select_ports( + port_config_list=port_config_list, + pattern="many to one", + rx_port_id=rx_port_id) + + pytest_assert(len(tx_port_id_list) > 0, "Cannot find any TX ports") + + rx_port_config = next( + (x for x in port_config_list if x.id == rx_port_id), None) + + """ Test flows """ + tx_port_count = 0 + n_tx_port = len(tx_port_id_list) + n_of_lossless_prio = len(lossless_prio_list) + for prio in lossless_prio_list + lossy_flow_prio_list: + tx_port_id = tx_port_id_list[tx_port_count % n_tx_port] + tx_port_config = \ + next((x for x in port_config_list if x.id == tx_port_id), None) + tx_port_count += 1 + tx_mac = tx_port_config.mac + if tx_port_config.gateway == rx_port_config.gateway and \ + tx_port_config.prefix_len == rx_port_config.prefix_len: + """ If source and destination port are in the same subnet """ + rx_mac = rx_port_config.mac + else: + rx_mac = tx_port_config.gateway_mac + + data_endpoint = PortTxRx( + tx_port_name=testbed_config.ports[tx_port_id].name, + rx_port_name=testbed_config.ports[rx_port_id].name) + + eth_hdr = EthernetHeader(src=FieldPattern(tx_mac), + dst=FieldPattern(rx_mac), + pfc_queue=FieldPattern([prio])) + + ip_prio = Priority( + Dscp(phb=FieldPattern(choice=prio_dscp_map[prio]), + ecn=FieldPattern(choice=Dscp.ECN_CAPABLE_TRANSPORT_1))) + + ipv4_hdr = Ipv4Header(src=FieldPattern(tx_port_config.ip), + dst=FieldPattern(rx_port_config.ip), + priority=ip_prio) + + lossless = prio in lossless_prio_list + result.append(Flow( + name='{} Prio {}'.format( + lossless_flow_name if lossless else lossy_flow_name, prio), + tx_rx=TxRx(data_endpoint), + packet=[Header(choice=eth_hdr), Header(choice=ipv4_hdr)], + size=Size(data_pkt_size), + rate=Rate( + 'line', + flow_rate_percent/n_of_lossless_prio + if lossless else flow_rate_percent), + duration=Duration(FixedSeconds(seconds=data_flow_dur_sec)))) + + return result + + +def __run_traffic(api, + config, + all_flow_names, + exp_dur_sec): + + """ + Run traffic and dump per-flow statistics + + Args: + api (obj): IXIA session + config (obj): experiment config (testbed config + flow config) + all_flow_names (list): list of names of all the flows + exp_dur_sec (int): experiment duration in second + + Returns: + per-flow statistics (list) + """ + api.set_state(State(ConfigState(config=config, state='set'))) + api.set_state(State(FlowTransmitState(state='start'))) + time.sleep(exp_dur_sec) + + attempts = 0 + max_attempts = 20 + + while attempts < max_attempts: + rows = api.get_flow_results(FlowRequest(flow_names=all_flow_names)) + + """ If all the data flows have stopped """ + transmit_states = [row['transmit'] for row in rows] + if len(rows) == len(all_flow_names) and\ + list(set(transmit_states)) == ['stopped']: + time.sleep(IXIA_POLL_DELAY_SEC) + break + else: + time.sleep(1) + attempts += 1 + + pytest_assert(attempts < max_attempts, + "Flows do not stop in {} seconds".format(max_attempts)) + + """ Dump per-flow statistics """ + time.sleep(5) + rows = api.get_flow_results(FlowRequest(flow_names=all_flow_names)) + api.set_state(State(FlowTransmitState(state='stop'))) + + return rows + + +def __verify_results(rows, + lossless_flow_name, + lossy_flow_name): + + """ + Verify if we get expected experiment results + + Args: + rows (list): per-flow statistics + lossless_flow_name (str): name of test flows + lossy_flow_name (str): name of background flows + + Returns: + N/A + """ + + """ Check background flows """ + for row in rows: + tx_frames = row['frames_tx'] + rx_frames = row['frames_rx'] + + if lossy_flow_name in row['name']: + pytest_assert( + tx_frames != rx_frames, + '{} should have dropped packet'.format(row['name'])) + else: + pytest_assert( + tx_frames == rx_frames, + '{} should not have any dropped packet'.format(row['name'])) diff --git a/tests/ixia/pfc/test_pfc_congestion.py b/tests/ixia/pfc/test_pfc_congestion.py new file mode 100644 index 00000000000..c27be448a8a --- /dev/null +++ b/tests/ixia/pfc/test_pfc_congestion.py @@ -0,0 +1,72 @@ +import pytest + +from files.pfc_congestion_helper import run_pfc_congestion +from tests.common.cisco_data import is_cisco_device # noqa: F401 +from tests.common.helpers.assertions import pytest_require # noqa: F401 +from tests.common.fixtures.conn_graph_facts import ( # noqa: F401 + conn_graph_facts, + fanout_graph_facts) +from tests.common.ixia.ixia_fixtures import ( # noqa: F401 + ixia_api_serv_ip, + ixia_api_serv_port, + ixia_api_serv_user, + ixia_api_serv_passwd, + ixia_api, + ixia_testbed_config) +from tests.common.ixia.qos_fixtures import ( # noqa: F401 + prio_dscp_map, + all_prio_list, + lossless_prio_list) + +pytestmark = [pytest.mark.topology('tgen')] + + +def test_pfc_congestion(ixia_api, # noqa: F811 + ixia_testbed_config, # noqa: F811 + conn_graph_facts, # noqa: F811 + fanout_graph_facts, # noqa: F811 + duthosts, + rand_one_dut_hostname, + rand_one_dut_portname_oper_up, + lossless_prio_list, # noqa: F811 + all_prio_list, # noqa: F811 + prio_dscp_map): # noqa: F811 + """ + Test if Lossless Traffic is not dropped when there is congestion. + + Args: + ixia_api (pytest fixture): IXIA session + ixia_testbed_config (pytest fixture): testbed configuration information + conn_graph_facts (pytest fixture): connection graph + fanout_graph_facts (pytest fixture): fanout graph + duthosts (pytest fixture): list of DUTs + rand_one_dut_hostname (str): hostname of DUT + rand_one_dut_portname_oper_up (str): port to test, e.g., 'dut|eth1' + lossless_prio_list : list of lossless priorities + all_prio_list (pytest fixture): list of all the priorities + prio_dscp_map (pytest fixture): priority vs. DSCP map (key = priority). + + Returns: + N/A + """ + + dut_hostname, dut_port = rand_one_dut_portname_oper_up.split('|') + pytest_require(rand_one_dut_hostname == dut_hostname, + "Priority and port are not mapped to the expected DUT") + + testbed_config, port_config_list = ixia_testbed_config + duthost = duthosts[rand_one_dut_hostname] + lossy_prio_list = \ + list(set([p for p in all_prio_list]) - set(lossless_prio_list)) + + run_pfc_congestion(api=ixia_api, + testbed_config=testbed_config, + port_config_list=port_config_list, + conn_data=conn_graph_facts, + fanout_data=fanout_graph_facts, + duthost=duthost, + dut_port=dut_port, + lossless_prio_list=lossless_prio_list, + lossy_prio_list=lossy_prio_list, + prio_dscp_map=prio_dscp_map + ) diff --git a/tests/ixia/pfcwd/conftest.py b/tests/ixia/pfcwd/conftest.py new file mode 100644 index 00000000000..5e91779a334 --- /dev/null +++ b/tests/ixia/pfcwd/conftest.py @@ -0,0 +1,11 @@ +import pytest +from tests.common.cisco_data import is_cisco_device +@pytest.fixture +def setup_cgm_alpha_cisco(duthost): + if not is_cisco_device(duthost): + return + duthost.shell("mmuconfig -p pg_lossless_100000_300m_profile -a -6") + yield + duthost.shell("mmuconfig -p pg_lossless_100000_300m_profile -a -2") + + diff --git a/tests/ixia/pfcwd/files/pfcwd_multi_node_helper.py b/tests/ixia/pfcwd/files/pfcwd_multi_node_helper.py index eedb70e6bc7..ba0b4a9dd8a 100644 --- a/tests/ixia/pfcwd/files/pfcwd_multi_node_helper.py +++ b/tests/ixia/pfcwd/files/pfcwd_multi_node_helper.py @@ -10,6 +10,7 @@ from tests.common.ixia.common_helpers import pfc_class_enable_vector,\ start_pfcwd, enable_packet_aging, get_pfcwd_poll_interval, get_pfcwd_detect_time from tests.common.ixia.port import select_ports +from tests.common.cisco_data import is_cisco_device from abstract_open_traffic_generator.flow import TxRx, Flow, Header,Size, Rate,\ Duration, FixedSeconds, FixedPackets, PortTxRx, PfcPause @@ -144,7 +145,8 @@ def run_pfcwd_multi_node_test(api, data_pkt_size=DATA_PKT_SIZE, trigger_pfcwd=trigger_pfcwd, pause_port_id=port_id, - tolerance=TOLERANCE_THRESHOLD) + tolerance=TOLERANCE_THRESHOLD, + duthost=duthost) def __data_flow_name(name_prefix, src_id, dst_id, prio): """ @@ -514,7 +516,8 @@ def __verify_results(rows, data_pkt_size, trigger_pfcwd, pause_port_id, - tolerance): + tolerance, + duthost): """ Verify if we get expected experiment results @@ -531,6 +534,7 @@ def __verify_results(rows, trigger_pfcwd (bool): if PFC watchdog is expected to be triggered pause_port_id (int): ID of the port to send PFC pause frames tolerance (float): maximum allowable deviation + duthost (obj): AnsibleHost object for dut. Returns: N/A @@ -565,8 +569,10 @@ def __verify_results(rows, exp_test_flow_rx_pkts = test_flow_rate_percent / 100.0 * speed_gbps \ * 1e9 * data_flow_dur_sec / 8.0 / data_pkt_size - if trigger_pfcwd and\ - (src_port_id == pause_port_id or dst_port_id == pause_port_id): + ports_to_check = [dst_port_id, src_port_id] + if is_cisco_device(duthost): + ports_to_check = [dst_port_id] + if trigger_pfcwd and pause_port_id in ports_to_check: """ Once PFC watchdog is triggered, it will impact bi-directional traffic """ pytest_assert(tx_frames > rx_frames, '{} should have dropped packets'.format(flow_name)) diff --git a/tests/ixia/pfcwd/test_pfcwd_a2a.py b/tests/ixia/pfcwd/test_pfcwd_a2a.py index d92261be6c0..efa84a5747c 100644 --- a/tests/ixia/pfcwd/test_pfcwd_a2a.py +++ b/tests/ixia/pfcwd/test_pfcwd_a2a.py @@ -13,6 +13,7 @@ pytestmark = [ pytest.mark.topology('tgen') ] + @pytest.mark.parametrize("trigger_pfcwd", [True, False]) def test_pfcwd_all_to_all(ixia_api, ixia_testbed_config, @@ -21,6 +22,7 @@ def test_pfcwd_all_to_all(ixia_api, duthosts, rand_one_dut_hostname, rand_one_dut_portname_oper_up, + setup_cgm_alpha_cisco, rand_one_dut_lossless_prio, lossy_prio_list, prio_dscp_map, diff --git a/tests/ixia/pfcwd/test_pfcwd_basic.py b/tests/ixia/pfcwd/test_pfcwd_basic.py index bf5c6036257..60a419fd3d9 100644 --- a/tests/ixia/pfcwd/test_pfcwd_basic.py +++ b/tests/ixia/pfcwd/test_pfcwd_basic.py @@ -1,3 +1,4 @@ +import logging import pytest from tests.common.helpers.assertions import pytest_require, pytest_assert diff --git a/tests/ixia/pfcwd/test_pfcwd_burst_storm.py b/tests/ixia/pfcwd/test_pfcwd_burst_storm.py index 9ac618cb344..00f473abc85 100644 --- a/tests/ixia/pfcwd/test_pfcwd_burst_storm.py +++ b/tests/ixia/pfcwd/test_pfcwd_burst_storm.py @@ -1,3 +1,4 @@ +import logging import pytest from tests.common.helpers.assertions import pytest_require diff --git a/tests/ixia/pfcwd/test_pfcwd_m2o.py b/tests/ixia/pfcwd/test_pfcwd_m2o.py index 1b1170900d0..199b4a79513 100644 --- a/tests/ixia/pfcwd/test_pfcwd_m2o.py +++ b/tests/ixia/pfcwd/test_pfcwd_m2o.py @@ -13,6 +13,7 @@ pytestmark = [ pytest.mark.topology('tgen') ] + @pytest.mark.parametrize("trigger_pfcwd", [True, False]) def test_pfcwd_many_to_one(ixia_api, ixia_testbed_config, @@ -20,6 +21,7 @@ def test_pfcwd_many_to_one(ixia_api, fanout_graph_facts, duthosts, rand_one_dut_hostname, + setup_cgm_alpha_cisco, rand_one_dut_portname_oper_up, rand_one_dut_lossless_prio, lossy_prio_list, diff --git a/tests/ixia/ptf_utils.py b/tests/ixia/ptf_utils.py new file mode 100644 index 00000000000..2fb5fa803fe --- /dev/null +++ b/tests/ixia/ptf_utils.py @@ -0,0 +1,13 @@ + +def get_sai_attributes(duthost, ptfhost, dut_port, sai_values, clear_only=False): + if ptfhost is None: + return + cmd = '''ptf --test-dir ixia_saitests/saitests sai_rpc_caller.RPC_Caller --platform-dir ixia_ptftests/ptftests/ --platform remote -t 'dutport={};port_map="0@0";server="{}";sai_values=[{}];clear_only={}' --relax --debug info --log-file log_file'''.format(int(dut_port[8:]), duthost.host.options['inventory_manager'].get_host(duthost.hostname).vars['ansible_host'], ",".join(['"{}"'.format(x) for x in sai_values]), clear_only) + + result = ptfhost.shell(cmd, chdir="/root", module_ignore_errors=True) + + if result['rc']: + raise RuntimeError("Ptf runner is failing. Pls check if the DUT is running syncd-rpc image. (check netstat -an | grep 9092) :{}".format(result)) + else: + print ("Got the values: {}".format(result['stdout_lines'])) + return result['stdout_lines'] diff --git a/tests/kvmtest.sh b/tests/kvmtest.sh index 319ab298021..816664d4879 100755 --- a/tests/kvmtest.sh +++ b/tests/kvmtest.sh @@ -88,7 +88,8 @@ RUNTEST_CLI_COMMON_OPTS="\ -q 1 \ -a False \ -O \ --r" +-r \ +-e --allow_recover" if [ -n "$exit_on_error" ]; then RUNTEST_CLI_COMMON_OPTS="$RUNTEST_CLI_COMMON_OPTS -E" @@ -100,62 +101,67 @@ test_t0() { tgname=1vlan if [ x$section == x"part-1" ]; then tests="\ - monit/test_monit_status.py \ - platform_tests/test_advanced_reboot.py::test_warm_reboot \ - test_interfaces.py \ - arp/test_arp_dualtor.py \ + arp/test_arp_extended.py \ + arp/test_neighbor_mac.py \ + arp/test_neighbor_mac_noptf.py\ bgp/test_bgp_fact.py \ bgp/test_bgp_gr_helper.py::test_bgp_gr_helper_routes_perserved \ bgp/test_bgp_speaker.py \ + bgp/test_bgpmon.py \ bgp/test_bgp_update_timer.py \ - cacl/test_ebtables_application.py \ + container_checker/test_container_checker.py \ cacl/test_cacl_application.py \ cacl/test_cacl_function.py \ + cacl/test_ebtables_application.py \ dhcp_relay/test_dhcp_relay.py \ dhcp_relay/test_dhcpv6_relay.py \ + iface_namingmode/test_iface_namingmode.py \ lldp/test_lldp.py \ + monit/test_monit_status.py \ ntp/test_ntp.py \ pc/test_po_cleanup.py \ pc/test_po_update.py \ + platform_tests/test_advanced_reboot.py::test_warm_reboot \ + platform_tests/test_cpu_memory_usage.py \ route/test_default_route.py \ route/test_static_route.py \ - arp/test_neighbor_mac.py \ - arp/test_neighbor_mac_noptf.py\ snmp/test_snmp_cpu.py \ + snmp/test_snmp_default_route.py \ snmp/test_snmp_interfaces.py \ snmp/test_snmp_lldp.py \ + snmp/test_snmp_loopback.py \ snmp/test_snmp_pfc_counters.py \ snmp/test_snmp_queue.py \ - snmp/test_snmp_loopback.py \ - snmp/test_snmp_default_route.py \ - tacacs/test_rw_user.py \ - tacacs/test_ro_user.py \ - tacacs/test_ro_disk.py \ - tacacs/test_jit_user.py \ + ssh/test_ssh_ciphers.py \ + syslog/test_syslog.py\ + tacacs/test_accounting.py \ tacacs/test_authorization.py \ - tacacs/test_accounting.py" + tacacs/test_jit_user.py \ + tacacs/test_ro_disk.py \ + tacacs/test_ro_user.py \ + tacacs/test_rw_user.py \ + telemetry/test_telemetry.py \ + test_features.py \ + test_interfaces.py \ + test_procdockerstatsd.py" pushd $SONIC_MGMT_DIR/tests ./run_tests.sh $RUNTEST_CLI_COMMON_OPTS -c "$tests" -p logs/$tgname popd else tests="\ - ssh/test_ssh_stress.py \ - ssh/test_ssh_ciphers.py \ - syslog/test_syslog.py\ - telemetry/test_telemetry.py \ - test_features.py \ - test_procdockerstatsd.py \ - iface_namingmode/test_iface_namingmode.py \ - platform_tests/test_cpu_memory_usage.py \ - bgp/test_bgpmon.py \ - container_checker/test_container_checker.py \ - process_monitoring/test_critical_process_monitoring.py \ - system_health/test_system_status.py \ + generic_config_updater/test_aaa.py \ + generic_config_updater/test_bgpl.py \ + generic_config_updater/test_cacl.py \ + generic_config_updater/test_dhcp_relay.py \ + generic_config_updater/test_ipv6.py \ generic_config_updater/test_lo_interface.py \ - generic_config_updater/test_vlan_interface.py \ generic_config_updater/test_portchannel_interface.py \ - show_techsupport/test_techsupport_no_secret.py" + generic_config_updater/test_syslog.py \ + generic_config_updater/test_vlan_interface.py \ + process_monitoring/test_critical_process_monitoring.py \ + show_techsupport/test_techsupport_no_secret.py \ + system_health/test_system_status.py" pushd $SONIC_MGMT_DIR/tests ./run_tests.sh $RUNTEST_CLI_COMMON_OPTS -c "$tests" -p logs/$tgname @@ -209,25 +215,25 @@ test_t2() { test_t1_lag() { tgname=t1_lag tests="\ - monit/test_monit_status.py \ - test_interfaces.py \ - bgp/test_bgp_fact.py \ bgp/test_bgp_allow_list.py \ - bgp/test_bgp_multipath_relax.py \ bgp/test_bgp_bbr.py \ bgp/test_bgp_bounce.py \ + bgp/test_bgp_fact.py \ + bgp/test_bgp_multipath_relax.py \ bgp/test_bgp_update_timer.py \ + bgp/test_bgpmon.py \ bgp/test_traffic_shift.py \ + container_checker/test_container_checker.py \ http/test_http_copy.py \ ipfwd/test_mtu.py \ lldp/test_lldp.py \ - route/test_default_route.py \ + monit/test_monit_status.py \ + pc/test_lag_2.py \ platform_tests/test_cpu_memory_usage.py \ - bgp/test_bgpmon.py \ - container_checker/test_container_checker.py \ process_monitoring/test_critical_process_monitoring.py \ + route/test_default_route.py \ scp/test_scp_copy.py \ - pc/test_lag_2.py" + test_interfaces.py" pushd $SONIC_MGMT_DIR/tests ./run_tests.sh $RUNTEST_CLI_COMMON_OPTS -c "$tests" -p logs/$tgname @@ -238,16 +244,16 @@ test_multi_asic_t1_lag() { tgname=multi_asic_t1_lag tests="\ bgp/test_bgp_fact.py \ + snmp/test_snmp_default_route.py \ + snmp/test_snmp_loopback.py \ snmp/test_snmp_pfc_counters.py \ snmp/test_snmp_queue.py \ - snmp/test_snmp_loopback.py \ - snmp/test_snmp_default_route.py \ - tacacs/test_rw_user.py \ - tacacs/test_ro_user.py \ - tacacs/test_ro_disk.py \ - tacacs/test_jit_user.py \ + tacacs/test_accounting.py \ tacacs/test_authorization.py \ - tacacs/test_accounting.py" + tacacs/test_jit_user.py \ + tacacs/test_ro_disk.py \ + tacacs/test_ro_user.py \ + tacacs/test_rw_user.py" pushd $SONIC_MGMT_DIR/tests # TODO: Remove disable of loganaler and sanity check once multi-asic testbed is stable. @@ -280,6 +286,8 @@ export ANSIBLE_LIBRARY=$SONIC_MGMT_DIR/ansible/library/ # workaround for issue https://github.com/Azure/sonic-mgmt/issues/1659 export ANSIBLE_KEEP_REMOTE_FILES=1 +export GIT_USER_NAME=$GIT_USER_NAME +export GIT_API_TOKEN=$GIT_API_TOKEN # clear logs from previous test runs rm -rf $SONIC_MGMT_DIR/tests/logs diff --git a/tests/lldp/test_lldp.py b/tests/lldp/test_lldp.py index 3a5f5a6bab7..18eac1419ec 100644 --- a/tests/lldp/test_lldp.py +++ b/tests/lldp/test_lldp.py @@ -4,7 +4,7 @@ logger = logging.getLogger(__name__) pytestmark = [ - pytest.mark.topology('t0', 't1', 't2'), + pytest.mark.topology('t0', 't1', 't2', 'm0'), pytest.mark.device_type('vs') ] @@ -74,6 +74,8 @@ def test_lldp_neighbor(duthosts, enum_rand_one_per_hwsku_frontend_hostname, loca nei_lldp_facts = localhost.lldp_facts(host=hostip, version='v2c', community=eos['snmp_rocommunity'])['ansible_facts'] neighbor_interface = v['port']['ifname'] + logger.info("lldp facts for interface {}:{}".format(neighbor_interface, + nei_lldp_facts['ansible_lldp_facts'][neighbor_interface])) # Verify the published DUT system name field is correct assert nei_lldp_facts['ansible_lldp_facts'][neighbor_interface]['neighbor_sys_name'] == duthost.hostname # Verify the published DUT chassis id field is not empty diff --git a/tests/memory_checker/test_memory_checker.py b/tests/memory_checker/test_memory_checker.py index 573437910c5..934f336469f 100644 --- a/tests/memory_checker/test_memory_checker.py +++ b/tests/memory_checker/test_memory_checker.py @@ -1,5 +1,12 @@ """ -Test the feature of memory checker. +The 'stress' utility is leveraged to increase the memory usage of a container continuously, then +1) Test whether that container can be restarted by the script ran by Monit. +2) Test whether that container can be restarted by the script ran by Monit; If that container + was restarted, then test the script ran by Monit was unable to restart the container anymore + due to Monit failed to reset its internal counter. +3) Test whether that container can be restarted by the script ran by Monit; If that container + was restarted, then test the script ran by Monit was able to restart the container with the + help of new Monit syntax although Monit failed to reset its internal counter. """ import logging from multiprocessing.pool import ThreadPool @@ -12,6 +19,7 @@ from tests.common.helpers.assertions import pytest_require from tests.common.helpers.dut_utils import check_container_state from tests.common.helpers.dut_utils import decode_dut_and_container_name +from tests.common.helpers.dut_utils import is_container_running from tests.common.plugins.loganalyzer.loganalyzer import LogAnalyzer logger = logging.getLogger(__name__) @@ -24,59 +32,148 @@ CONTAINER_STOP_THRESHOLD_SECS = 200 CONTAINER_RESTART_THRESHOLD_SECS = 180 CONTAINER_CHECK_INTERVAL_SECS = 1 +MONIT_RESTART_THRESHOLD_SECS = 320 +MONIT_CHECK_INTERVAL_SECS = 5 +WAITING_SYSLOG_MSG_SECS = 130 -@pytest.fixture(autouse=True) -def modify_monit_config_and_restart(duthosts, enum_dut_feature_container, enum_rand_one_per_hwsku_frontend_hostname): +def remove_container(duthost, container_name): + """Removes the specified container on DuT. - """Backup Monit configuration files, then customize and restart it before testing. - Restore original Monit configuration files and restart Monit service after testing. + Args: + duthost: The AnsibleHost object of DuT. + container_name: A string represents name of the container. + + Returns: + None. + """ + if not is_container_running(duthost, container_name): + pytest.fail("'{}' container is not running on DuT '{}'!".format(container_name, duthost.hostname)) + + logger.info("Stopping '{}' container ...".format(container_name)) + duthost.shell("systemctl stop {}.service".format(container_name)) + logger.info("'{}' container is stopped.".format(container_name)) + + logger.info("Removing '{}' container ...".format(container_name)) + duthost.shell("docker rm {}".format(container_name)) + logger.info("'{}' container is removed.".format(container_name)) + + +def restart_container(duthost, container_name): + """Restarts the specified container on DuT. Args: - duthost: Hostname of DuT. + duthost: The AnsibleHost object of DuT. + container_name: A string represents name of the container. Returns: None. """ - dut_name, container_name = decode_dut_and_container_name(enum_dut_feature_container) - pytest_require(dut_name == enum_rand_one_per_hwsku_frontend_hostname and container_name == "telemetry", - "Skips testing memory_checker of container '{}' on the DuT '{}' since another DuT '{}' was chosen." - .format(container_name, dut_name, enum_rand_one_per_hwsku_frontend_hostname)) - duthost = duthosts[dut_name] + logger.info("Restarting '{}' container ...".format(container_name)) + duthost.shell("systemctl restart {}.service".format(container_name)) - logger.info("Back up Monit configuration files on DuT '{}' ...".format(duthost.hostname)) - duthost.shell("sudo cp -f /etc/monit/monitrc /tmp/") - duthost.shell("sudo cp -f /etc/monit/conf.d/monit_telemetry /tmp/") + logger.info("Waiting for '{}' container to be restarted ...".format(container_name)) + restarted = wait_until(CONTAINER_RESTART_THRESHOLD_SECS, + CONTAINER_CHECK_INTERVAL_SECS, + 0, + check_container_state, duthost, container_name, True) + pytest_assert(restarted, "Failed to restart '{}' container!".format(container_name)) + logger.info("'{}' container is restarted.".format(container_name)) + + +def backup_monit_config_files(duthost): + """Backs up Monit configuration files on DuT. - temp_config_line = ' if status == 3 for 5 times within 10 cycles then exec "/usr/bin/restart_service telemetry"' + Args: + duthost: The AnsibleHost object of DuT. + + Returns: + None. + """ + logger.info("Backing up Monit configuration files on DuT '{}' ...".format(duthost.hostname)) + duthost.shell("cp -f /etc/monit/monitrc /tmp/") + duthost.shell("mv -f /etc/monit/conf.d/monit_* /tmp/") + duthost.shell("cp -f /tmp/monit_telemetry /etc/monit/conf.d/") + logger.info("Monit configuration files on DuT '{}' is backed up.".format(duthost.hostname)) + + +def customize_monit_config_files(duthost, temp_config_line): + """Customizes the Monit configuration file on DuT. + + Args: + duthost: The AnsibleHost object of DuT. + temp_config_line: A stirng to replace the initial Monit configuration. + + Returns: + None. + """ logger.info("Modifying Monit config to eliminate start delay and decrease interval ...") - duthost.shell("sudo sed -i '$s/^./#/' /etc/monit/conf.d/monit_telemetry") - duthost.shell("echo '{}' | sudo tee -a /etc/monit/conf.d/monit_telemetry".format(temp_config_line)) - duthost.shell("sudo sed -i 's/set daemon 60/set daemon 10/' /etc/monit/monitrc") - duthost.shell("sudo sed -i '/with start delay 300/s/^./#/' /etc/monit/monitrc") + duthost.shell("sed -i '$s/^./#/' /etc/monit/conf.d/monit_telemetry") + duthost.shell("echo '{}' | tee -a /etc/monit/conf.d/monit_telemetry".format(temp_config_line)) + duthost.shell("sed -i '/with start delay 300/s/^./#/' /etc/monit/monitrc") + logger.info("Modifying Monit config to eliminate start delay and decrease interval are done.") - logger.info("Restart Monit service ...") - duthost.shell("sudo systemctl restart monit") - yield +def restore_monit_config_files(duthost): + """Restores the initial Monit configuration file on DuT. + + Args: + duthost: The AnsibleHost object of DuT. - logger.info("Restore original Monit configuration files on DuT '{}' ...".format(duthost.hostname)) - duthost.shell("sudo mv -f /tmp/monitrc /etc/monit/") - duthost.shell("sudo mv -f /tmp/monit_telemetry /etc/monit/conf.d/") + Returns: + None. + """ + logger.info("Restoring original Monit configuration files on DuT '{}' ...".format(duthost.hostname)) + duthost.shell("mv -f /tmp/monitrc /etc/monit/") + duthost.shell("mv -f /tmp/monit_* /etc/monit/conf.d/") + logger.info("Original Monit configuration files on DuT '{}' are restored.".format(duthost.hostname)) - logger.info("Restart Monit service ...") - duthost.shell("sudo systemctl restart monit") - logger.info("Restore bgp neighbours ...") - duthost.shell("config bgp startup all") +def check_monit_running(duthost): + """Checks whether Monit is running or not. + + Args: + duthost: The AnsibleHost object of DuT. + + Returns: + Returns True if Monit is running; Otherwist, returns False. + """ + monit_services_status = duthost.get_monit_services_status() + if not monit_services_status: + return False + + return True + + +def restart_monit_service(duthost): + """Restarts Monit service and polls Monit running status. + + Args: + duthost: The AnsibleHost object of DuT. + + Returns: + None. + """ + logger.info("Restarting Monit service ...") + duthost.shell("systemctl restart monit") + logger.info("Monit service is restarted.") + + logger.info("Checks whether Monit is running or not after restarted ...") + is_monit_running = wait_until(MONIT_RESTART_THRESHOLD_SECS, + MONIT_CHECK_INTERVAL_SECS, + 0, + check_monit_running, + duthost) + pytest_assert(is_monit_running, "Monit is not running after restarted!") + logger.info("Monit is running!") def install_stress_utility(duthost, creds, container_name): - """Installs the 'stress' utility in container. + """Installs 'stress' utility in the container on DuT. Args: duthost: The AnsibleHost object of DuT. - container_name: Name of container. + container_name: A string represents name of the container. Returns: None. @@ -88,9 +185,12 @@ def install_stress_utility(duthost, creds, container_name): https_proxy = creds.get('proxy_env', {}).get('https_proxy', '') # Shutdown bgp for having ability to install stress tool + logger.info("Shutting down all BGP sessions ...") duthost.shell("config bgp shutdown all") + logger.info("All BGP sessions are shut down!...") install_cmd_result = duthost.shell("docker exec {} bash -c 'export http_proxy={} \ && export https_proxy={} \ + && apt-get update -y \ && apt-get install stress -y'".format(container_name, http_proxy, https_proxy)) exit_code = install_cmd_result["rc"] @@ -99,21 +199,100 @@ def install_stress_utility(duthost, creds, container_name): def remove_stress_utility(duthost, container_name): - """Removes the 'stress' utility from container. + """Removes the 'stress' utility from container and brings up BGP sessions + on DuT. Args: duthost: The AnsibleHost object of DuT. - container_name: Name of container. + container_name: A string represents the name of container. Returns: None. """ logger.info("Removing 'stress' utility from '{}' container ...".format(container_name)) - remove_cmd_result = duthost.shell("docker exec {} apt-get remove stress -y".format(container_name)) + remove_cmd_result = duthost.shell("docker exec {} apt-get purge stress -y".format(container_name)) exit_code = remove_cmd_result["rc"] pytest_assert(exit_code == 0, "Failed to remove 'stress' utility!") logger.info("'stress' utility was removed.") + logger.info("Bringing up all BGP sessions ...") + duthost.shell("config bgp startup all") + logger.info("BGP sessions are started up.") + + +@pytest.fixture +def test_setup_and_cleanup(duthosts, creds, enum_dut_feature_container, + enum_rand_one_per_hwsku_frontend_hostname, request): + """Backups Monit configuration files, customizes Monit configuration files and + restarts Monit service before testing. Restores original Monit configuration files + and restart Monit service after testing. + + Args: + duthost: Hostname of DuT. + + Returns: + None. + """ + dut_name, container_name = decode_dut_and_container_name(enum_dut_feature_container) + pytest_require(dut_name == enum_rand_one_per_hwsku_frontend_hostname, + "Skips testing memory_checker of container '{}' on the DuT '{}' since another DuT '{}' was chosen." + .format(container_name, dut_name, enum_rand_one_per_hwsku_frontend_hostname)) + + pytest_require(container_name == "telemetry", + "Skips testing memory_checker of container '{}' since memory monitoring is only enabled for 'telemetry'." + .format(container_name)) + + duthost = duthosts[dut_name] + + install_stress_utility(duthost, creds, container_name) + + backup_monit_config_files(duthost) + customize_monit_config_files(duthost, request.param) + restart_monit_service(duthost) + + yield + + try: + restore_monit_config_files(duthost) + finally: + restart_monit_service(duthost) + + restart_container(duthost, container_name) + remove_stress_utility(duthost, container_name) + postcheck_critical_processes(duthost, container_name) + + +@pytest.fixture +def remove_and_restart_container(duthosts, creds, enum_dut_feature_container, + enum_rand_one_per_hwsku_frontend_hostname): + """Removes and restarts 'telemetry' container from DuT. + + Args: + duthosts: The fixture returns list of DuTs. + enum_rand_one_per_hwsku_frontend_hostname: The fixture randomly pick up + a frontend DuT from testbed. + + + Returns: + None. + """ + dut_name, container_name = decode_dut_and_container_name(enum_dut_feature_container) + pytest_require(dut_name == enum_rand_one_per_hwsku_frontend_hostname, + "Skips testing memory_checker of container '{}' on the DuT '{}' since another DuT '{}' was chosen." + .format(container_name, dut_name, enum_rand_one_per_hwsku_frontend_hostname)) + + pytest_require(container_name == "telemetry", + "Skips testing memory_checker of container '{}' since memory monitoring is only enabled for 'telemetry'." + .format(container_name)) + + duthost = duthosts[dut_name] + remove_container(duthost, container_name) + + yield + + restart_container(duthost, container_name) + postcheck_critical_processes(duthost, container_name) + def consume_memory(duthost, container_name, vm_workers): """Consumes memory more than the threshold value of specified container. @@ -131,26 +310,74 @@ def consume_memory(duthost, container_name, vm_workers): duthost.shell("docker exec {} stress -m {}".format(container_name, vm_workers), module_ignore_errors=True) -def consume_memory_and_restart_container(duthost, container_name, vm_workers, loganalyzer, marker): +def check_critical_processes(duthost, container_name): + """Checks whether the critical processes are running after container was restarted. + + Args: + duthost: The AnsibleHost object of DuT. + container_name: Name of container. + + Returns: + None. + """ + status_result = duthost.critical_process_status(container_name) + if status_result["status"] is False or len(status_result["exited_critical_process"]) > 0: + return False + + return True + + +def postcheck_critical_processes(duthost, container_name): + """Checks whether the critical processes are running after container was restarted. + + Args: + duthost: The AnsibleHost object of DuT. + container_name: Name of container. + + Returns: + None. + """ + logger.info("Checking the running status of critical processes in '{}' container ..." + .format(container_name)) + is_succeeded = wait_until(CONTAINER_RESTART_THRESHOLD_SECS, CONTAINER_CHECK_INTERVAL_SECS, 0, + check_critical_processes, duthost, container_name) + if not is_succeeded: + pytest.fail("Not all critical processes in '{}' container are running!" + .format(container_name)) + logger.info("All critical processes in '{}' container are running.".format(container_name)) + + +def consumes_memory_and_checks_container_restart(duthost, container_name, vm_workers): """Invokes the 'stress' utility to consume memory more than the threshold asynchronously - and checks whether the container can be stopped and restarted. Loganalyzer was leveraged + and checks whether the container can be stopped and restarted. Loganalyzer is leveraged to check whether the log messages related to container stopped were generated. Args: duthost: The AnsibleHost object of DuT. - container_name: Name of container. + container_name: A string represents the name of container. vm_workers: Number of workers which does the spinning on malloc()/free() to consume memory. Returns: None. - """ + expected_alerting_messages = [] + expected_alerting_messages.append(".*restart_service.*Restarting service 'telemetry'.*") + expected_alerting_messages.append(".*Stopping Telemetry container.*") + expected_alerting_messages.append(".*Stopped Telemetry container.*") + expected_alerting_messages.append(".*Starting Telemetry container.*") + expected_alerting_messages.append(".*Started Telemetry container.*") + + loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="container_restart_due_to_memory") + loganalyzer.expect_regex = [] + loganalyzer.expect_regex.extend(expected_alerting_messages) + marker = loganalyzer.init() + thread_pool = ThreadPool() thread_pool.apply_async(consume_memory, (duthost, container_name, vm_workers)) - logger.info("Sleep 100 seconds to wait for the alerting messages from syslog...") - time.sleep(100) + logger.info("Sleep '{}' seconds to wait for the alerting messages from syslog ...".format(WAITING_SYSLOG_MSG_SECS)) + time.sleep(WAITING_SYSLOG_MSG_SECS) logger.info("Checking the alerting messages related to container stopped ...") loganalyzer.analyze(marker) @@ -165,47 +392,109 @@ def consume_memory_and_restart_container(duthost, container_name, vm_workers, lo logger.info("'{}' container is restarted.".format(container_name)) -def check_critical_processes(duthost, container_name): - """Checks whether the critical processes are running after container was restarted. +def get_container_mem_usage(duthost, container_name): + """Gets the memory usage of a container. Args: duthost: The AnsibleHost object of DuT. - container_name: Name of container. + container_name: A string represents the name of container. Returns: - None. + mem_usage: A string represents memory usage. """ - status_result = duthost.critical_process_status(container_name) - if status_result["status"] is False or len(status_result["exited_critical_process"]) > 0: - return False + get_mem_usage_cmd = "docker stats --no-stream --format \{{\{{.MemUsage\}}\}} {}".format(container_name) + cmd_result = duthost.shell(get_mem_usage_cmd) - return True + exit_code = cmd_result["rc"] + pytest_assert(exit_code == 0, "Failed to get memory usage of '{}'!".format(container_name)) + mem_info = cmd_result["stdout_lines"] + mem_usage = mem_info[0].split()[0] -def postcheck_critical_processes(duthost, container_name): - """Checks whether the critical processes are running after container was restarted. + return mem_usage + + +def consumes_memory_and_checks_monit(duthost, container_name, vm_workers, new_syntax_enabled): + """Invokes the 'stress' utility to consume memory more than the threshold asynchronously + and checks whether the container can be stopped and restarted. After container was restarted, + 'stress' utility will be invoked again to consume memory and checks whether Monit was able to + restart this container with or without help of new syntax. + Loganalyzer is leveraged to check whether the log messages related to container stopped + and started were generated. Args: duthost: The AnsibleHost object of DuT. container_name: Name of container. + vm_workers: Number of workers which does the spinning on malloc()/free() + to consume memory. + new_syntax_enabled: Checks to make sure container will be restarted if it is set to be + `True`. Returns: None. """ - logger.info("Checking the running status of critical processes in '{}' container ..." - .format(container_name)) - is_succeeded = wait_until(CONTAINER_RESTART_THRESHOLD_SECS, CONTAINER_CHECK_INTERVAL_SECS, 0, - check_critical_processes, duthost, container_name) - if not is_succeeded: - pytest.fail("Not all critical processes in '{}' container are running!" - .format(container_name)) - logger.info("All critical processes in '{}' container are running.".format(container_name)) + expected_alerting_messages = [] + expected_alerting_messages.append(".*restart_service.*Restarting service 'telemetry'.*") + expected_alerting_messages.append(".*Stopping Telemetry container.*") + expected_alerting_messages.append(".*Stopped Telemetry container.*") + expected_alerting_messages.append(".*Starting Telemetry container.*") + expected_alerting_messages.append(".*Started Telemetry container.*") + + loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="test_memory_checker") + loganalyzer.expect_regex = [] + loganalyzer.expect_regex.extend(expected_alerting_messages) + marker = loganalyzer.init() + thread_pool = ThreadPool() + thread_pool.apply_async(consume_memory, (duthost, container_name, vm_workers)) + + logger.info("Sleep '{}' seconds to wait for the alerting messages from syslog ...".format(WAITING_SYSLOG_MSG_SECS)) + time.sleep(WAITING_SYSLOG_MSG_SECS) + + logger.info("Checking the alerting messages related to container restart ...") + loganalyzer.analyze(marker) + logger.info("Found all the expected alerting messages from syslog!") + + logger.info("Waiting for '{}' container to be restarted ...".format(container_name)) + restarted = wait_until(CONTAINER_RESTART_THRESHOLD_SECS, + CONTAINER_CHECK_INTERVAL_SECS, + 0, + check_container_state, duthost, container_name, True) + pytest_assert(restarted, "Failed to restart '{}' container!".format(container_name)) + logger.info("'{}' container is restarted.".format(container_name)) + + logger.info("Running 'stress' utility again in '{}' ...".format(container_name)) + thread_pool.apply_async(consume_memory, (duthost, container_name, vm_workers)) -def test_memory_checker(duthosts, enum_dut_feature_container, creds, enum_rand_one_per_hwsku_frontend_hostname): - """Checks whether the telemetry container can be restarted or not if the memory - usage of it is beyond the threshold. The `stress` utility is leveraged as - the memory stressing tool. + check_counter = 0 + marker = loganalyzer.update_marker_prefix("test_monit_counter") + logger.info("Checking memory usage of '{}' every 30 seconds for 6 times ...".format(container_name)) + while check_counter < 6: + check_counter += 1 + mem_usage = get_container_mem_usage(duthost, container_name) + logger.info("Memory usage of '{}' is '{}'".format(container_name, mem_usage)) + time.sleep(30) + + logger.info("Analyzing syslog messages to verify whether '{}' is restarted ...".format(container_name)) + analyzing_result = loganalyzer.analyze(marker, fail=False) + if not new_syntax_enabled: + pytest_assert(analyzing_result["total"]["expected_match"] == 0, + "Monit can reset counter and restart '{}'!".format(container_name)) + logger.info("Monit was unable to reset its counter and '{}' can not be restarted!".format(container_name)) + else: + pytest_assert(analyzing_result["total"]["expected_match"] == len(expected_alerting_messages), + "Monit still can not restart '{}' with the help of new syntax!".format(container_name)) + logger.info("Monit was able to restart '{}' with the help of new syntax!".format(container_name)) + + +@pytest.mark.parametrize("test_setup_and_cleanup", + [' if status == 3 for 1 times within 2 cycles then exec "/usr/bin/restart_service telemetry"'], + indirect=["test_setup_and_cleanup"]) +def test_memory_checker(duthosts, enum_dut_feature_container, test_setup_and_cleanup, + enum_rand_one_per_hwsku_frontend_hostname): + """Checks whether the container can be restarted or not if the memory + usage of it is beyond its threshold for specfic times within a sliding window. + The `stress` utility is leveraged as the memory stressing tool. Args: duthosts: The fixture returns list of DuTs. @@ -216,34 +505,198 @@ def test_memory_checker(duthosts, enum_dut_feature_container, creds, enum_rand_o None. """ dut_name, container_name = decode_dut_and_container_name(enum_dut_feature_container) - pytest_require(dut_name == enum_rand_one_per_hwsku_frontend_hostname and container_name == "telemetry", + pytest_require(dut_name == enum_rand_one_per_hwsku_frontend_hostname, "Skips testing memory_checker of container '{}' on the DuT '{}' since another DuT '{}' was chosen." .format(container_name, dut_name, enum_rand_one_per_hwsku_frontend_hostname)) + + pytest_require(container_name == "telemetry", + "Skips testing memory_checker of container '{}' since memory monitoring is only enabled for 'telemetry'." + .format(container_name)) + duthost = duthosts[dut_name] # TODO: Currently we only test 'telemetry' container which has the memory threshold 400MB # and number of vm_workers is hard coded. We will extend this testing on all containers after # the feature 'memory_checker' is fully implemented. container_name = "telemetry" - vm_workers = 4 + vm_workers = 6 pytest_require("Celestica-E1031" not in duthost.facts["hwsku"] and (("20191130" in duthost.os_version and parse_version(duthost.os_version) > parse_version("20191130.72")) or parse_version(duthost.kernel_version) > parse_version("4.9.0")), "Test is not supported for platform Celestica E1031, 20191130.72 and older image versions!") + if not is_container_running(duthost, container_name): + pytest.fail("'{}' is nor running!".format(container_name)) + + consumes_memory_and_checks_container_restart(duthost, container_name, vm_workers) + + +@pytest.mark.parametrize("test_setup_and_cleanup", + [' if status == 3 for 1 times within 2 cycles then exec "/usr/bin/restart_service telemetry"'], + indirect=["test_setup_and_cleanup"]) +def test_monit_reset_counter_failure(duthosts, enum_dut_feature_container, test_setup_and_cleanup, + enum_rand_one_per_hwsku_frontend_hostname): + """Checks that Monit was unable to reset its counter. Specifically Monit will restart + the contanier if memory usage of it is larger than the threshold for specific times within + a sliding window. However, Monit was unable to restart the container anymore if memory usage is + still larger than the threshold continuoulsy since Monit failed to reset its internal counter. + The `stress` utility is leveraged as the memory stressing tool. + + Args: + duthosts: The fixture returns list of DuTs. + test_setup_and_cleanup: Fixture to setup prerequisites before and after testing. + enum_rand_one_per_hwsku_frontend_hostname: The fixture randomly pick up + a frontend DuT from testbed. + + Returns: + None. + """ + dut_name, container_name = decode_dut_and_container_name(enum_dut_feature_container) + pytest_require(dut_name == enum_rand_one_per_hwsku_frontend_hostname, + "Skips testing memory_checker of container '{}' on the DuT '{}' since another DuT '{}' was chosen." + .format(container_name, dut_name, enum_rand_one_per_hwsku_frontend_hostname)) + + pytest_require(container_name == "telemetry", + "Skips testing memory_checker of container '{}' since memory monitoring is only enabled for 'telemetry'." + .format(container_name)) + + duthost = duthosts[dut_name] + + # TODO: Currently we only test 'telemetry' container which has the memory threshold 400MB + # and number of vm_workers is hard coded. We will extend this testing on all containers after + # the feature 'memory_checker' is fully implemented. + container_name = "telemetry" + vm_workers = 6 + + pytest_require("Celestica-E1031" not in duthost.facts["hwsku"] + and ("20201231" in duthost.os_version or parse_version(duthost.kernel_version) > parse_version("4.9.0")), + "Test is not supported for platform Celestica E1031, 20191130 and older image versions!") + + logger.info("Checks whether '{}' is running ...".format(container_name)) + is_running = wait_until(CONTAINER_RESTART_THRESHOLD_SECS, + CONTAINER_CHECK_INTERVAL_SECS, + 0, + check_container_state, duthost, container_name, True) + pytest_assert(is_running, "'{}' is not running on DuT!".format(container_name)) + logger.info("'{}' is running on DuT!".format(container_name)) + + consumes_memory_and_checks_monit(duthost, container_name, vm_workers, False) + + +@pytest.mark.parametrize("test_setup_and_cleanup", + [' if status == 3 for 1 times within 2 cycles then exec "/usr/bin/restart_service telemetry" repeat every 2 cycles'], + indirect=["test_setup_and_cleanup"]) +def test_monit_new_syntax(duthosts, enum_dut_feature_container, test_setup_and_cleanup, + enum_rand_one_per_hwsku_frontend_hostname): + """Checks that new syntax of Monit can mitigate the issue which shows Monit was unable + to restart container due to failing reset its internal counter. With the help of this syntax, + the culprit container can be restarted by Monit if memory usage of it is larger than the threshold + for specific times continuously. + + Args: + duthosts: The fixture returns list of DuTs. + test_setup_and_cleanup: Fixture to setup prerequisites before and after testing. + enum_rand_one_per_hwsku_frontend_hostname: The fixture randomly pick up + a frontend DuT from testbed. + + Returns: + None. + """ + dut_name, container_name = decode_dut_and_container_name(enum_dut_feature_container) + pytest_require(dut_name == enum_rand_one_per_hwsku_frontend_hostname, + "Skips testing memory_checker of container '{}' on the DuT '{}' since another DuT '{}' was chosen." + .format(container_name, dut_name, enum_rand_one_per_hwsku_frontend_hostname)) + + pytest_require(container_name == "telemetry", + "Skips testing memory_checker of container '{}' since memory monitoring is only enabled for 'telemetry'." + .format(container_name)) + + duthost = duthosts[dut_name] + + # TODO: Currently we only test 'telemetry' container which has the memory threshold 400MB + # and number of vm_workers is hard coded. We will extend this testing on all containers after + # the feature 'memory_checker' is fully implemented. + container_name = "telemetry" + vm_workers = 6 + + pytest_require("Celestica-E1031" not in duthost.facts["hwsku"] + and (("20191130" in duthost.os_version and parse_version(duthost.os_version) > parse_version("20191130.72")) + or parse_version(duthost.kernel_version) > parse_version("4.9.0")), + "Test is not supported for platform Celestica E1031, 20191130.72 and older image versions!") + + logger.info("Checks whether '{}' is running ...".format(container_name)) + is_running = wait_until(CONTAINER_RESTART_THRESHOLD_SECS, + CONTAINER_CHECK_INTERVAL_SECS, + 0, + check_container_state, duthost, container_name, True) + pytest_assert(is_running, "'{}' is not running on DuT!".format(container_name)) + logger.info("'{}' is running on DuT!".format(container_name)) + + consumes_memory_and_checks_monit(duthost, container_name, vm_workers, True) + + +def check_log_message(duthost, container_name): + """Leverages LogAanlyzer to check whether `memory_checker` can log the specific message + into syslog or not. + + Args: + duthost: The AnsibleHost object of DuT. + container_name: A string represents the name of container. + + Returns: + None. + """ expected_alerting_messages = [] - loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="container_restart_due_to_memory") - loganalyzer.expect_regex = [] - expected_alerting_messages.append(".*restart_service.*Restarting service 'telemetry'.*") - expected_alerting_messages.append(".*Stopping Telemetry container.*") - expected_alerting_messages.append(".*Stopped Telemetry container.*") + expected_alerting_messages.append(".*\[memory_checker\] Exits without checking memory usage.*'telemetry'.*") + loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="memory_checker_skip_removed_container") + loganalyzer.expect_regex = [] loganalyzer.expect_regex.extend(expected_alerting_messages) marker = loganalyzer.init() - install_stress_utility(duthost, creds, container_name) - consume_memory_and_restart_container(duthost, container_name, vm_workers, loganalyzer, marker) + logger.info("Sleep '{}' seconds to wait for the message from syslog ...".format(WAITING_SYSLOG_MSG_SECS)) + time.sleep(WAITING_SYSLOG_MSG_SECS) - remove_stress_utility(duthost, container_name) - postcheck_critical_processes(duthost, container_name) + logger.info("Checking the syslog message written by 'memory_checker' ...") + loganalyzer.analyze(marker) + logger.info("Found the expected message from syslog!") + + +def test_memory_checker_without_container_created(duthosts, enum_dut_feature_container, remove_and_restart_container, + enum_rand_one_per_hwsku_frontend_hostname): + """Checks whether 'memory_checker' script can log an message into syslog if + one container is not created during device is booted/reooted. This test case will + remove a container explicitly to simulate the scenario in which the container was not created + successfully. + + Args: + duthosts: The fixture returns list of DuTs. + enum_rand_one_per_hwsku_frontend_hostname: The fixture randomly pick up + a frontend DuT from testbed. + + Returns: + None. + """ + dut_name, container_name = decode_dut_and_container_name(enum_dut_feature_container) + pytest_require(dut_name == enum_rand_one_per_hwsku_frontend_hostname, + "Skips testing memory_checker of container '{}' on the DuT '{}' since another DuT '{}' was chosen." + .format(container_name, dut_name, enum_rand_one_per_hwsku_frontend_hostname)) + + pytest_require(container_name == "telemetry", + "Skips testing memory_checker of container '{}' since memory monitoring is only enabled for 'telemetry'." + .format(container_name)) + + duthost = duthosts[dut_name] + + # TODO: Currently we only test 'telemetry' container which has the memory threshold 400MB + # and number of vm_workers is hard coded. We will extend this testing on all containers after + # the feature 'memory_checker' is fully implemented. + container_name = "telemetry" + + pytest_require("Celestica-E1031" not in duthost.facts["hwsku"] + and (("20191130" in duthost.os_version and parse_version(duthost.os_version) > parse_version("20191130.72")) + or parse_version(duthost.kernel_version) > parse_version("4.9.0")), + "Test is not supported for platform Celestica E1031, 20191130.72 and older image versions!") + + check_log_message(duthost, container_name) diff --git a/tests/ntp/test_ntp.py b/tests/ntp/test_ntp.py index 9cc229f1ca1..e16403a8298 100644 --- a/tests/ntp/test_ntp.py +++ b/tests/ntp/test_ntp.py @@ -55,6 +55,11 @@ def setup_ntp(ptfhost, duthosts, rand_one_dut_hostname): duthost.command("config ntp del %s" % ptfhost.mgmt_ip) for ntp_server in ntp_servers: duthost.command("config ntp add %s" % ntp_server) + # The time jump leads to exception in lldp_syncd. The exception has been handled by lldp_syncd, + # but it will leave error messages in syslog, which will cause subsequent test cases to fail. + # So we need to wait for a while to make sure the error messages are flushed. + # The default update interval of lldp_syncd is 10 seconds, so we wait for 20 seconds here. + time.sleep(20) @pytest.fixture def setup_long_jump_config(duthosts, rand_one_dut_hostname): diff --git a/tests/pc/test_lag_2.py b/tests/pc/test_lag_2.py index 3e4b04953ed..493c6fb26a3 100644 --- a/tests/pc/test_lag_2.py +++ b/tests/pc/test_lag_2.py @@ -11,6 +11,9 @@ from tests.common.helpers.assertions import pytest_require from tests.common.helpers.dut_ports import decode_dut_port_name from tests.common.fixtures.duthost_utils import disable_route_checker_module +from tests.common.helpers.dut_ports import get_duthost_with_name +from tests.common.config_reload import config_reload +from tests.common.helpers.constants import DEFAULT_ASIC_ID logger = logging.getLogger(__name__) @@ -261,14 +264,14 @@ def has_lags(dut): @pytest.mark.parametrize("testcase", ["single_lag", "lacp_rate", "fallback"]) -def test_lag(common_setup_teardown, duthosts, tbinfo, nbrhosts, fanouthosts, conn_graph_facts, enum_dut_portchannel, testcase): +def test_lag(common_setup_teardown, duthosts, tbinfo, nbrhosts, fanouthosts, conn_graph_facts, enum_dut_portchannel_with_completeness_level, testcase): # We can't run single_lag test on vtestbed since there is no leaffanout if testcase == "single_lag" and is_vtestbed(duthosts[0]): pytest.skip("Skip single_lag test on vtestbed") ptfhost = common_setup_teardown - dut_name, dut_lag = decode_dut_port_name(enum_dut_portchannel) + dut_name, dut_lag = decode_dut_port_name(enum_dut_portchannel_with_completeness_level) some_test_ran = False for duthost in duthosts: @@ -311,3 +314,207 @@ def test_lag(common_setup_teardown, duthosts, tbinfo, nbrhosts, fanouthosts, con test_instance.run_lag_fallback_test(lag_name, lag_facts) pytest_assert(some_test_ran, "Didn't run any test.") + +@pytest.fixture(scope='function') +def ignore_expected_loganalyzer_exceptions(duthosts, rand_one_dut_hostname, loganalyzer): + """ + Ignore expected failures logs during test execution. + + LAG tests are triggering following orchagent complaints but the don't cause + harm to DUT. + Args: + duthosts: list of DUTs. + rand_one_dut_hostname: Hostname of a random chosen dut + loganalyzer: Loganalyzer utility fixture + """ + # When loganalyzer is disabled, the object could be None + duthost = duthosts[rand_one_dut_hostname] + if loganalyzer: + ignoreRegex = [ + ".*ERR swss[0-9]*#orchagent: :- getPortOperSpeed.*", # Valid test_lag_db_status and test_lag_db_status_with_po_update + ] + loganalyzer[duthost.hostname].ignore_regex.extend(ignoreRegex) + +@pytest.fixture(scope='function') +def teardown(duthost): + """Recover testbed if case of test_lag_db_status_with_po_update failed""" + original_lag_facts = {} + + original_lag_facts[duthost.hostname] = duthost.lag_facts(host = duthost.hostname)['ansible_facts']['lag_facts'] + yield + # After test, compare lag_facts to check if port status is unchanged, + # otherwise recover DUT by reloading minigraph + try: + original_data = original_lag_facts[duthost.hostname] + lag_facts = duthost.lag_facts(host = duthost.hostname)['ansible_facts']['lag_facts'] + for lag_name in original_data['lags'].keys(): + for po_intf, port_info in original_data['lags'][lag_name]['po_stats']['ports'].items(): + if port_info['link']['up'] == lag_facts['lags'][lag_name]['po_stats']['ports'][po_intf]['link']['up']: + logger.info("{} of {} is up, ignore it.".format(po_intf, lag_name)) + continue + else: + logger.info("{}'s lag_facts is changed, original_data {}\n, lag_facts {}".format(duthost.hostname, original_data, lag_facts)) + raise Exception("Raise exception for config_reload in next step.") + except Exception as e: + # If port was removed from portchannel, it will throw KeyError exception, or catch exception in previous steps, + # reload DUT to recover it + logger.info("{}'s lag_facts is changed, comparison failed with exception: {}".format(duthost.hostname, repr(e))) + config_reload(duthost, config_source="minigraph") + return + + +def get_oper_status_from_db(asichost, port_name): + """Get netdev_oper_status from state_db for interface""" + cmd = asichost.get_docker_cmd("redis-cli -n 6 hget \"PORT_TABLE|{}\" netdev_oper_status".format(port_name), "database") + status = asichost.sonichost.shell(cmd, module_ignore_errors=False)['stdout'] + # If PORT_TABLE in STATE_DB doesn't have key netdev_oper_status, + # check oper_status in APPL_DB instead. This scenario happens on 202012. + if not status: + cmd = asichost.get_docker_cmd("redis-cli -n 0 hget \"PORT_TABLE:{}\" oper_status".format(port_name), "database") + status = asichost.sonichost.shell(cmd, module_ignore_errors=False)['stdout'] + return status + +def get_admin_status_from_db(asichost, port_name): + """Get netdev_oper_status from state_db for interface""" + cmd = asichost.get_docker_cmd("redis-cli -n 6 hget \"PORT_TABLE|{}\" admin_status".format(port_name), "database") + status = asichost.sonichost.shell(cmd, module_ignore_errors=False)['stdout'] + # If PORT_TABLE in STATE_DB doesn't have key admin_status, + # check admin_status in APPL_DB instead. This scenario happens on 202012. + if not status: + cmd = asichost.get_docker_cmd("redis-cli -n 0 hget \"PORT_TABLE:{}\" admin_status".format(port_name), "database") + status = asichost.sonichost.shell(cmd, module_ignore_errors=False)['stdout'] + return status + +def check_status_is_syncd(asichost, po_intf, port_info, lag_name): + """Check if interface's status is synced with the netdev_oper_status in state_db""" + port_status = port_info['link']['up'] if port_info['link'] else False + status_from_db = True if str(get_oper_status_from_db(asichost, po_intf)) == 'up' else False + return status_from_db == port_status + +def check_link_is_up(duthost, asichost, po_intf, port_info, lag_name): + """Check if interface's status and the netdev_oper_status in state_db are both up""" + new_lag_facts = duthost.lag_facts(host = duthost.hostname)['ansible_facts']['lag_facts'] + port_info = new_lag_facts['lags'][lag_name]['po_stats']['ports'][po_intf] + port_status = port_info['link']['up'] if port_info['link'] else False + oper_status_from_db = True if str(get_oper_status_from_db(asichost, po_intf)) == 'up' else False + admin_status_from_db = True if str(get_admin_status_from_db(asichost, po_intf)) == 'up' else False + return port_status and oper_status_from_db and admin_status_from_db + +def check_link_is_down(asichost, po_intf): + """Check if interface's status and the netdev_oper_status in state_db are both up""" + oper_status = get_oper_status_from_db(asichost, po_intf) + admin_status = get_admin_status_from_db(asichost, po_intf) + + return str(oper_status) == 'down' and str(admin_status) == 'down' + +def test_lag_db_status(duthosts, enum_dut_portchannel_with_completeness_level, ignore_expected_loganalyzer_exceptions): + # Test state_db status for lag interfaces + dut_name, dut_lag = decode_dut_port_name(enum_dut_portchannel_with_completeness_level) + logger.info("Start test_lag_db_status test on dut {} for lag {}".format(dut_name, dut_lag)) + duthost = get_duthost_with_name(duthosts, dut_name) + if duthost is None: + pytest.fail("Failed with duthost is not found for dut name {}.".format(dut_name)) + + test_lags = [] + try: + lag_facts = duthost.lag_facts(host = duthost.hostname)['ansible_facts']['lag_facts'] + namespace_id = lag_facts['lags'][dut_lag]['po_namespace_id'] + if namespace_id: + asic_index = int(lag_facts['lags'][dut_lag]['po_namespace_id']) + else: + asic_index = DEFAULT_ASIC_ID + asichost = duthost.asic_instance(asic_index) + # Test for each lag + if dut_lag == "unknown": + test_lags = lag_facts['names'] + else: + pytest_require(dut_lag in lag_facts['names'], "No lag {} configuration found in {}".format(dut_lag, duthost.hostname)) + test_lags = [ dut_lag ] + # 1. Check if status of interface is in sync with state_db after bootup. + for lag_name in test_lags: + for po_intf, port_info in lag_facts['lags'][lag_name]['po_stats']['ports'].items(): + if not check_status_is_syncd(asichost, po_intf, port_info, lag_name): + pytest.fail("{} member {}'s status is not synced with oper_status in state_db.".format(lag_name, po_intf)) + + # 2. Check if status of interface is in sync with state_db after shutdown/no shutdown. + for lag_name in test_lags: + for po_intf, port_info in lag_facts['lags'][lag_name]['po_stats']['ports'].items(): + asichost.shutdown_interface(po_intf) + # Retrieve lag_facts after shutdown interface + new_lag_facts = duthost.lag_facts(host = duthost.hostname)['ansible_facts']['lag_facts'] + port_info = new_lag_facts['lags'][lag_name]['po_stats']['ports'][po_intf] + pytest_assert(wait_until(15, 1, 0, check_link_is_down, asichost, po_intf), + "{} member {}'s admin_status or oper_status in state_db is not down.".format(lag_name, po_intf)) + + # Retrieve lag_facts after no shutdown interface + asichost.startup_interface(po_intf) + # Sometimes, it has to wait seconds for booting up interface + pytest_assert(wait_until(60, 1, 0, check_link_is_up, duthost, asichost, po_intf, port_info, lag_name), + "{} member {}'s status or netdev_oper_status in state_db is not up.".format(lag_name, po_intf)) + finally: + # Recover interfaces in case of failure + lag_facts = duthost.lag_facts(host = duthost.hostname)['ansible_facts']['lag_facts'] + namespace_id = lag_facts['lags'][dut_lag]['po_namespace_id'] + for lag_name in test_lags: + if namespace_id: + asic_index = int(lag_facts['lags'][dut_lag]['po_namespace_id']) + else: + asic_index = DEFAULT_ASIC_ID + asichost = duthost.asic_instance(asic_index) + for po_intf, port_info in lag_facts['lags'][lag_name]['po_stats']['ports'].items(): + if port_info['link']['up']: + logger.info("{} of {} is up, ignore it.".format(po_intf, lag_name)) + continue + else: + logger.info("Interface {} of {} is down, no shutdown to recover it.".format(po_intf, lag_name)) + asichost.startup_interface(po_intf) + +def test_lag_db_status_with_po_update(duthosts, teardown, enum_dut_portchannel_with_completeness_level, ignore_expected_loganalyzer_exceptions): + """ + test port channel add/deletion and check interface status in state_db + """ + dut_name, dut_lag = decode_dut_port_name(enum_dut_portchannel_with_completeness_level) + logger.info("Start test_lag_db_status test on dut {} for lag {}".format(dut_name, dut_lag)) + duthost = get_duthost_with_name(duthosts, dut_name) + if duthost is None: + pytest.fail("Failed with duthost is not found for dut name {}.".format(dut_name)) + + lag_facts = duthost.lag_facts(host=duthost.hostname)['ansible_facts']['lag_facts'] + namespace_id = lag_facts['lags'][dut_lag]['po_namespace_id'] + if namespace_id: + asic_index = int(lag_facts['lags'][dut_lag]['po_namespace_id']) + else: + asic_index = DEFAULT_ASIC_ID + asichost = duthost.asic_instance(asic_index) + # Test for each lag + if dut_lag == "unknown": + test_lags = lag_facts['names'] + else: + pytest_require(dut_lag in lag_facts['names'], "No lag {} configuration found in {}".format(dut_lag, duthost.hostname)) + test_lags = [ dut_lag ] + + # Check if status of interface is in sync with state_db after removing/adding member. + for lag_name in test_lags: + for po_intf, port_info in lag_facts['lags'][lag_name]['po_stats']['ports'].items(): + # 1 Remove port member from portchannel + asichost.config_portchannel_member(lag_name, po_intf, "del") + + # 2 Shutdown this port to check if status is down + asichost.shutdown_interface(po_intf) + pytest_assert(wait_until(15, 1, 0, check_link_is_down, asichost, po_intf), + "{} member {}'s admin_status or oper_status in state_db is not down.".format(lag_name, po_intf)) + + # 3 Add this port back into portchannel and check if status is synced + asichost.config_portchannel_member(lag_name, po_intf, "add") + + # 4 Retrieve lag_facts after shutdown interface and check if status is synced + new_lag_facts = duthost.lag_facts(host = duthost.hostname)['ansible_facts']['lag_facts'] + port_info = new_lag_facts['lags'][lag_name]['po_stats']['ports'][po_intf] + pytest_assert(wait_until(15, 1, 0, check_status_is_syncd, asichost, po_intf, port_info, lag_name), + "{} member {}'s status is not synced with oper_status in state_db.".format(lag_name, po_intf)) + + # 5 No shutdown this port to check if status is up + asichost.startup_interface(po_intf) + # Sometimes, it has to wait seconds for booting up interface + pytest_assert(wait_until(60, 1, 0, check_link_is_up, duthost, asichost, po_intf, port_info, lag_name), + "{} member {}'s admin_status or oper_status in state_db is not up.".format(lag_name, po_intf)) diff --git a/tests/pc/test_po_cleanup.py b/tests/pc/test_po_cleanup.py index 797ca1c8517..40e7deb466e 100644 --- a/tests/pc/test_po_cleanup.py +++ b/tests/pc/test_po_cleanup.py @@ -47,24 +47,24 @@ def check_topo_and_restore(duthosts, enum_rand_one_per_hwsku_frontend_hostname, if len(mg_facts['minigraph_portchannels'].keys()) == 0 and not duthost.is_multi_asic: pytest.skip("Skip test due to there is no portchannel exists in current topology.") - -def test_po_cleanup(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_asic_index, tbinfo): + +def test_po_cleanup(duthosts, enum_rand_one_per_hwsku_frontend_hostname, tbinfo): """ test port channel are cleaned up correctly and teammgrd and teamsyncd process handle SIGTERM gracefully """ duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] - logging.info("Disable swss/teamd Feature") - duthost.asic_instance(enum_asic_index).stop_service("swss") + logging.info("Disable swss/teamd Feature in all asics") + # Following will call "sudo systemctl stop swss@0", same for swss@1 .. + duthost.stop_service("swss") # Check if Linux Kernel Portchannel Interface teamdev are clean up - if not wait_until(10, 1, 0, check_kernel_po_interface_cleaned, duthost, enum_asic_index): - fail_msg = "PortChannel interface still exists in kernel" - pytest.fail(fail_msg) - # Restore swss service. - duthost.asic_instance(enum_asic_index).start_service("swss") - assert wait_until(300, 20, 0, duthost.critical_services_fully_started),\ - "Not all critical services are fully started" - + for asic_id in duthost.get_asic_ids(): + if not wait_until(10, 1, 0, check_kernel_po_interface_cleaned, duthost, asic_id): + fail_msg = "PortChannel interface still exists in kernel" + pytest.fail(fail_msg) + # Restore config services + config_reload(duthost) + def test_po_cleanup_after_reload(duthosts, enum_rand_one_per_hwsku_frontend_hostname, tbinfo): """ test port channel are cleaned up correctly after config reload, with system under stress. diff --git a/tests/pc/test_po_update.py b/tests/pc/test_po_update.py index 273255c2f25..012c1dba856 100644 --- a/tests/pc/test_po_update.py +++ b/tests/pc/test_po_update.py @@ -11,9 +11,9 @@ from tests.common import config_reload import ipaddress -from tests.common.platform.processes_utils import wait_critical_processes from tests.common.utilities import wait_until from tests.common.helpers.assertions import pytest_assert +from tests.voq.voq_helpers import verify_no_routes_from_nexthop pytestmark = [ pytest.mark.topology('any'), @@ -46,15 +46,16 @@ def ignore_expected_loganalyzer_exceptions(enum_rand_one_per_hwsku_frontend_host @pytest.fixture(scope="function") -def reload_testbed(duthosts, enum_rand_one_per_hwsku_frontend_hostname): +def reload_testbed_on_failed(request, duthosts, enum_rand_one_per_hwsku_frontend_hostname, loganalyzer): """ Reload dut after test function finished """ duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] yield None - logging.info("Reloading config and restarting swss...") - config_reload(duthost) - wait_critical_processes(duthost) + if request.node.rep_call.failed: + # if test case failed, means bgp session down or port channel status not recovered, execute config reload + logging.info("Reloading config and restarting swss...") + config_reload(duthost, safe_reload=True, ignore_loganalyzer=loganalyzer) def test_po_update(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_frontend_asic_index, tbinfo): @@ -142,41 +143,54 @@ def test_po_update(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_fro pytest_assert(wait_until(120, 10, 0, asichost.check_bgp_statistic, 'ipv4_idle', 0)) -def test_po_update_io_no_loss(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_frontend_asic_index, tbinfo, - reload_testbed, ptfadapter): +def test_po_update_io_no_loss(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_frontend_asic_index, tbinfo, ptfadapter, reload_testbed_on_failed): # GIVEN a lag topology, keep sending packets between 2 port channels # WHEN delete/add different members of a port channel # THEN no packets shall loss duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] asichost = duthost.asic_instance(enum_frontend_asic_index) - mg_facts = duthost.get_extended_minigraph_facts(tbinfo) + mg_facts = asichost.get_extended_minigraph_facts(tbinfo) - if len(mg_facts["minigraph_portchannel_interfaces"]) < 2: + dut_mg_facts = duthost.get_extended_minigraph_facts(tbinfo) + + if len(dut_mg_facts["minigraph_portchannel_interfaces"]) < 2: pytest.skip("Skip test due to there isn't enough port channel exists in current topology.") # generate ip-pc pairs, be like:[("10.0.0.56", "10.0.0.57", "PortChannel0001")] peer_ip_pc_pair = [(pc["addr"], pc["peer_addr"], pc["attachto"]) for pc in - mg_facts["minigraph_portchannel_interfaces"] + dut_mg_facts["minigraph_portchannel_interfaces"] if ipaddress.ip_address(pc['peer_addr']).version == 4] # generate pc tuples, fill in members, # be like:[("10.0.0.56", "10.0.0.57", "PortChannel0001", ["Ethernet48", "Ethernet52"])] - pcs = [(pair[0], pair[1], pair[2], mg_facts["minigraph_portchannels"][pair[2]]["members"]) for pair in - peer_ip_pc_pair - if len(mg_facts["minigraph_portchannels"][pair[2]]["members"]) >= 2] + pcs = [(pair[0], pair[1], pair[2], dut_mg_facts["minigraph_portchannels"][pair[2]]["members"]) for pair in + peer_ip_pc_pair] if len(pcs) < 2: pytest.skip( "Skip test due to there is no enough port channel with at least 2 members exists in current topology.") + # generate out_pc tuples similar to pc tuples, but that are on the same asic as asichost + out_pcs = [(pair[0], pair[1], pair[2], mg_facts["minigraph_portchannels"][pair[2]]["members"]) for pair in + peer_ip_pc_pair + if pair[2] in mg_facts['minigraph_portchannels'] and len(mg_facts["minigraph_portchannels"][pair[2]]["members"]) >= 2] + + if len(out_pcs) < 1: + pytest.skip( + "Skip test as there are no port channels on asic {} on dut {}".format(enum_frontend_asic_index, duthost)) + # Select out pc from the port channels that are on the same asic as asichost + out_pc = random.sample(out_pcs, k=1)[0] selected_pcs = random.sample(pcs, k=2) in_pc = selected_pcs[0] - out_pc = selected_pcs[1] + # Make sure the picked in_pc is not the same as the selected out_pc + if in_pc[2] == out_pc[2]: + in_pc = selected_pcs[1] + # use first port of in_pc as input port # all ports in out_pc will be output/forward ports pc, pc_members = out_pc[2], out_pc[3] - in_ptf_index = mg_facts["minigraph_ptf_indices"][in_pc[3][0]] + in_ptf_index = dut_mg_facts["minigraph_ptf_indices"][in_pc[3][0]] out_ptf_indices = map(lambda port: mg_facts["minigraph_ptf_indices"][port], out_pc[3]) logging.info( "selected_pcs is: %s, in_ptf_index is %s, out_ptf_indices is %s" % ( @@ -186,86 +200,114 @@ def test_po_update_io_no_loss(duthosts, enum_rand_one_per_hwsku_frontend_hostnam pc_ip = out_pc[0] in_peer_ip = in_pc[1] out_peer_ip = out_pc[1] + try: + # Step 1: Remove port channel members from port channel + for member in pc_members: + asichost.config_portchannel_member(pc, member, "del") + remove_pc_members = True + + # Step 2: Remove port channel ip from port channel + asichost.config_ip_intf(pc, pc_ip + "/31", "remove") + remove_pc_ip = True + verify_no_routes_from_nexthop(duthosts, out_peer_ip) + time.sleep(15) + int_facts = asichost.interface_facts()['ansible_facts'] + pytest_assert(not int_facts['ansible_interface_facts'][pc]['link']) + pytest_assert(wait_until(120, 10, 0, asichost.check_bgp_statistic, 'ipv4_idle', 1)) - # Step 1: Remove port channel members from port channel - for member in pc_members: - asichost.config_portchannel_member(pc, member, "del") - - # Step 2: Remove port channel ip from port channel - asichost.config_ip_intf(pc, pc_ip + "/31", "remove") + # Step 3: Create tmp port channel with default min-links(1) + asichost.config_portchannel(tmp_pc, "add") + create_tmp_pc = True - time.sleep(30) - int_facts = asichost.interface_facts()['ansible_facts'] - pytest_assert(not int_facts['ansible_interface_facts'][pc]['link']) - pytest_assert(wait_until(120, 10, 0, asichost.check_bgp_statistic, 'ipv4_idle', 1)) + # Step 4: Add port channel members to tmp port channel + for member in pc_members: + asichost.config_portchannel_member(tmp_pc, member, "add") + add_tmp_pc_members = True - # Step 3: Create tmp port channel with default min-links(1) - asichost.config_portchannel(tmp_pc, "add") + # Step 5: Add port channel ip to tmp port channel + asichost.config_ip_intf(tmp_pc, pc_ip + "/31", "add") + add_tmp_pc_ip = True - # Step 4: Add port channel members to tmp port channel - for member in pc_members: - asichost.config_portchannel_member(tmp_pc, member, "add") + int_facts = asichost.interface_facts()['ansible_facts'] + pytest_assert(int_facts['ansible_interface_facts'][tmp_pc]['ipv4']['address'] == pc_ip) - # Step 5: Add port channel ip to tmp port channel - asichost.config_ip_intf(tmp_pc, pc_ip + "/31", "add") - int_facts = asichost.interface_facts()['ansible_facts'] - pytest_assert(int_facts['ansible_interface_facts'][tmp_pc]['ipv4']['address'] == pc_ip) + time.sleep(15) + int_facts = asichost.interface_facts()['ansible_facts'] + pytest_assert(int_facts['ansible_interface_facts'][tmp_pc]['link']) + pytest_assert(wait_until(120, 10, 0, asichost.check_bgp_statistic, 'ipv4_idle', 0)) - time.sleep(30) - int_facts = asichost.interface_facts()['ansible_facts'] - pytest_assert(int_facts['ansible_interface_facts'][tmp_pc]['link']) - pytest_assert(wait_until(120, 10, 0, asichost.check_bgp_statistic, 'ipv4_idle', 0)) - - # Keep sending packets, and add/del different members during that time, observe whether packets lose - pkt = testutils.simple_ip_packet( - eth_dst=duthost.facts["router_mac"], - eth_src=ptfadapter.dataplane.get_mac(0, in_ptf_index), - ip_src=in_peer_ip, - ip_dst=out_peer_ip) - - exp_pkt = pkt.copy() - exp_pkt = mask.Mask(exp_pkt) - - exp_pkt.set_do_not_care_scapy(packet.Ether, 'dst') - exp_pkt.set_do_not_care_scapy(packet.Ether, 'src') - exp_pkt.set_do_not_care_scapy(packet.IP, 'chksum') - exp_pkt.set_do_not_care_scapy(packet.IP, 'ttl') - - ptfadapter.dataplane.flush() - member_update_finished_flag = Queue(1) - packet_sending_flag = Queue(1) - - def del_add_members(): - # wait for packets sending started, then starts to update pc members - while packet_sending_flag.empty() or (not packet_sending_flag.get()): - time.sleep(0.5) - asichost.config_portchannel_member(tmp_pc, pc_members[0], "del") - time.sleep(2) - asichost.config_portchannel_member(tmp_pc, pc_members[0], "add") + # Keep sending packets, and add/del different members during that time, observe whether packets lose + pkt = testutils.simple_ip_packet( + eth_dst=duthost.facts["router_mac"], + eth_src=ptfadapter.dataplane.get_mac(0, in_ptf_index), + ip_src=in_peer_ip, + ip_dst=out_peer_ip) + + exp_pkt = pkt.copy() + exp_pkt = mask.Mask(exp_pkt) + + exp_pkt.set_do_not_care_scapy(packet.Ether, 'dst') + exp_pkt.set_do_not_care_scapy(packet.Ether, 'src') + exp_pkt.set_do_not_care_scapy(packet.IP, 'chksum') + exp_pkt.set_do_not_care_scapy(packet.IP, 'ttl') + + ptfadapter.dataplane.flush() + member_update_finished_flag = Queue(1) + packet_sending_flag = Queue(1) + + def del_add_members(): + # wait for packets sending started, then starts to update pc members + while packet_sending_flag.empty() or (not packet_sending_flag.get()): + time.sleep(0.2) + asichost.config_portchannel_member(tmp_pc, pc_members[0], "del") + time.sleep(2) + asichost.config_portchannel_member(tmp_pc, pc_members[0], "add") + time.sleep(4) + asichost.config_portchannel_member(tmp_pc, pc_members[1], "del") + time.sleep(2) + asichost.config_portchannel_member(tmp_pc, pc_members[1], "add") + time.sleep(2) + member_update_finished_flag.put(True) + + t = threading.Thread(target=del_add_members, name="del_add_members_thread") + t.start() + t_max = time.time() + 60 + send_count = 0 + stop_sending = False + ptfadapter.dataplane.flush() + time.sleep(1) + while not stop_sending: + # After 100 packets send, awake del_add_members thread, it happens only once. + if send_count == 100: + packet_sending_flag.put(True) + + testutils.send(ptfadapter, in_ptf_index, pkt) + send_count += 1 + member_update_thread_finished = (not member_update_finished_flag.empty()) and member_update_finished_flag.get() + reach_max_time = time.time() > t_max + stop_sending = reach_max_time or member_update_thread_finished + t.join(20) time.sleep(2) - asichost.config_portchannel_member(tmp_pc, pc_members[1], "del") - time.sleep(2) - asichost.config_portchannel_member(tmp_pc, pc_members[1], "add") - time.sleep(5) - member_update_finished_flag.put(True) - - t = threading.Thread(target=del_add_members, name="del_add_members_thread") - t.start() - t_max = time.time() + 60 - send_count = 0 - stop_sending = False - while not stop_sending: - # After 100 packets send, awake del_add_members thread, it happens only once. - if send_count == 100: - packet_sending_flag.put(True) - - testutils.send(ptfadapter, in_ptf_index, pkt) - send_count += 1 - member_update_thread_finished = (not member_update_finished_flag.empty()) and member_update_finished_flag.get() - reach_max_time = time.time() > t_max - stop_sending = reach_max_time or member_update_thread_finished - t.join(20) - match_cnt = testutils.count_matched_packets_all_ports(ptfadapter, exp_pkt, ports=out_ptf_indices) - - pytest_assert(match_cnt > 0, "Packets not send") - pytest_assert(match_cnt == send_count, "Packets lost during pc members add/removal") + match_count = testutils.count_matched_packets_all_ports(ptfadapter, exp_pkt, ports=out_ptf_indices, timeout=10) + logging.info("match_count: {}, send_count: {}".format(match_count, send_count)) + max_loss_rate = 0.01 + pytest_assert(match_count > send_count * (1 - max_loss_rate), + "Packets lost rate > {} during pc members add/removal, send_count: {}, match_count: {}".format( + max_loss_rate, send_count, match_count)) + finally: + if add_tmp_pc_ip: + asichost.config_ip_intf(tmp_pc, pc_ip + "/31", "remove") + time.sleep(2) + if add_tmp_pc_members: + for member in pc_members: + asichost.config_portchannel_member(tmp_pc, member, "del") + time.sleep(2) + if create_tmp_pc: + asichost.config_portchannel(tmp_pc, "del") + pytest_assert(wait_until(120, 10, 0, asichost.check_bgp_statistic, 'ipv4_idle', 1)) + if remove_pc_ip: + asichost.config_ip_intf(pc, pc_ip + "/31", "add") + if remove_pc_members: + for member in pc_members: + asichost.config_portchannel_member(pc, member, "add") + pytest_assert(wait_until(120, 10, 0, asichost.check_bgp_statistic, 'ipv4_idle', 0)) diff --git a/tests/pfc/test_unknown_mac.py b/tests/pfc/test_unknown_mac.py index 1d184bb3d15..36b684f6394 100644 --- a/tests/pfc/test_unknown_mac.py +++ b/tests/pfc/test_unknown_mac.py @@ -207,10 +207,9 @@ def _checkFdbEntryMiss(self): """ Check if the FDB entry is missing for the port """ - result = self.duthost.command("show mac -p {}".format(self.dst_port), - module_ignore_errors=True) + result = self.duthost.command("show mac") out = result['stdout'] - pytest_assert("not in list" in out, "{} present in FDB".format(self.arp_entry[self.dst_ip])) + pytest_assert(self.arp_entry[self.dst_ip].lower() not in out.lower(), "{} present in FDB".format(self.arp_entry[self.dst_ip])) logger.info("'{}' not present in fdb as expected".format(self.arp_entry[self.dst_ip])) def verifyArpFdb(self): @@ -285,7 +284,7 @@ def _constructPacket(self): self.exp_pkts.append(tmp_pkt) # if inft is a sub interface, tuple be like ("Eth0.10", "Eth0") # if inft is a general interface, tuple be like ("Eth0", "Eth0") - self.pkt_map[pkt] = (intf, get_intf_by_sub_intf(intf, vlan_id)) + self.pkt_map[str(pkt)] = (intf, get_intf_by_sub_intf(intf, vlan_id), pkt) def _parseCntrs(self): """ @@ -333,7 +332,7 @@ def runTest(self): self._verifyIntfCounters(pretest=True) for pkt, exp_pkt in zip(self.pkts, self.exp_pkts): self.ptfadapter.dataplane.flush() - out_intf = self.pkt_map[pkt][0] + out_intf = self.pkt_map[str(pkt)][0] src_port = self.ptf_ports[out_intf][0] logger.info("Sending traffic on intf {}".format(out_intf)) testutils.send(self.ptfadapter, src_port, pkt, count=TEST_PKT_CNT) diff --git a/tests/pfcwd/conftest.py b/tests/pfcwd/conftest.py index 3126d00cc8a..ad692ef7d95 100644 --- a/tests/pfcwd/conftest.py +++ b/tests/pfcwd/conftest.py @@ -32,14 +32,6 @@ def pytest_addoption(parser): parser.addoption('--two-queues', action='store_true', default=True, help='Run test with sending traffic to both queues [3, 4]') -@pytest.fixture(scope="module", autouse=True) -def skip_pfcwd_test_dualtor(tbinfo): - if 'dualtor' in tbinfo['topo']['name']: - pytest.skip("Pfcwd tests skipped on dual tor testbed") - - yield - - @pytest.fixture(scope="module") def two_queues(request): """ diff --git a/tests/pfcwd/test_pfcwd_function.py b/tests/pfcwd/test_pfcwd_function.py index 9d11100fe7a..9ef614ea3b5 100644 --- a/tests/pfcwd/test_pfcwd_function.py +++ b/tests/pfcwd/test_pfcwd_function.py @@ -5,13 +5,14 @@ import time from tests.common.fixtures.conn_graph_facts import fanout_graph_facts -from tests.common.helpers.assertions import pytest_assert +from tests.common.helpers.assertions import pytest_assert, pytest_require from tests.common.helpers.pfc_storm import PFCStorm from tests.common.plugins.loganalyzer.loganalyzer import LogAnalyzer from .files.pfcwd_helper import start_wd_on_ports from tests.ptf_runner import ptf_runner from tests.common import port_toggle from tests.common import constants +from tests.common.dualtor.dual_tor_utils import is_tunnel_qos_remap_enabled PTF_PORT_MAPPING_MODE = 'use_orig_interface' @@ -19,9 +20,9 @@ TEMPLATES_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "templates") EXPECT_PFC_WD_DETECT_RE = ".* detected PFC storm .*" EXPECT_PFC_WD_RESTORE_RE = ".*storm restored.*" -WD_ACTION_MSG_PFX = { "dontcare": "Verify PFCWD detection when queue buffer is not empty and proper function of drop action", - "drop": "Verify proper function of drop action", - "forward": "Verify proper function of forward action" +WD_ACTION_MSG_PFX = { "dontcare": "Verify PFCWD detection when queue buffer is not empty and proper function of pfcwd drop action", + "drop": "Verify proper function of pfcwd drop action", + "forward": "Verify proper function of pfcwd forward action" } MMU_ACTIONS = ['change', 'noop', 'restore', 'noop'] DB_SEPARATORS = {'0': ':', '4': '|'} @@ -153,18 +154,23 @@ def get_mmu_params(dut, port): """ logger.info("Retreiving pg profile and dynamic threshold for port: {}".format(port)) + if is_tunnel_qos_remap_enabled(dut): + queue_range = '2-4' + else: + queue_range = '3-4' + asic = dut.get_port_asic_instance(port) if PfcCmd.isBufferInApplDb(asic): db = "0" - pg_pattern = "BUFFER_PG_TABLE:{}:3-4" + pg_pattern = "BUFFER_PG_TABLE:{}:{}" else: db = "4" - pg_pattern = "BUFFER_PG|{}|3-4" + pg_pattern = "BUFFER_PG|{}|{}" pg_profile = asic.run_redis_cmd( argv = [ "redis-cli", "-n", db, "HGET", - pg_pattern.format(port), "profile" + pg_pattern.format(port, queue_range), "profile" ] )[0].encode("utf-8") @@ -183,32 +189,35 @@ def get_mmu_params(dut, port): class PfcPktCntrs(object): """ PFCwd counter retrieval and verifications """ - def __init__(self, dut, action): + def __init__(self, dut, rx_action, tx_action): """ Args: dut(AnsibleHost) : dut instance action(string): PFCwd action for traffic test """ self.dut = dut - self.action = action if action != "dontcare" else "drop" - if self.action != "forward": + self.rx_action = rx_action + self.tx_action = tx_action + if self.tx_action != "forward": self.pkt_cntrs_tx = ['PFC_WD_QUEUE_STATS_TX_DROPPED_PACKETS', 'PFC_WD_QUEUE_STATS_TX_DROPPED_PACKETS_LAST'] - self.pkt_cntrs_rx = ['PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS', 'PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS_LAST'] self.err_msg_tx = [("Tx drop cnt check failed: Tx drop before: {} Tx drop after: {} " "Expected (diff): {} Obtained: {}"), "Tx drop last cnt check failed: Expected: {} Obtained: {}" ] - self.err_msg_rx = [("Rx drop cnt check failed: Rx drop before: {} Rx drop after: {} " - "Expected (diff): {} Obtained: {}"), - "Rx drop last cnt check failed: Expected: {} Obtained: {}" - ] else: self.pkt_cntrs_tx = ['PFC_WD_QUEUE_STATS_TX_PACKETS', 'PFC_WD_QUEUE_STATS_TX_PACKETS_LAST'] - self.pkt_cntrs_rx = ['PFC_WD_QUEUE_STATS_RX_PACKETS', 'PFC_WD_QUEUE_STATS_RX_PACKETS_LAST'] self.err_msg_tx = [("Tx forward cnt check failed: Tx forward before: {} Tx forward after: {} " "Expected (diff): {} Obtained: {}"), "Tx forward last cnt check failed: Expected: {} Obtained: {}" ] + if self.rx_action != "forward": + self.pkt_cntrs_rx = ['PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS', 'PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS_LAST'] + self.err_msg_rx = [("Rx drop cnt check failed: Rx drop before: {} Rx drop after: {} " + "Expected (diff): {} Obtained: {}"), + "Rx drop last cnt check failed: Expected: {} Obtained: {}" + ] + else: + self.pkt_cntrs_rx = ['PFC_WD_QUEUE_STATS_RX_PACKETS', 'PFC_WD_QUEUE_STATS_RX_PACKETS_LAST'] self.err_msg_rx = [("Rx forward cnt check failed: Rx forward before: {} Rx forward after: {} " "Expected (diff): {} Obtained: {}"), "Rx forward last cnt check failed: Expected: {} Obtained: {}" @@ -241,7 +250,7 @@ def verify_pkt_cnts(self, port_type, pkt_cnt): port_type(string) : the type of port (eg. portchannel, vlan, interface) pkt_cnt(int) : Number of test packets sent from the PTF """ - logger.info("--- Checking Tx {} cntrs ---".format(self.action)) + logger.info("--- Checking Tx {} cntrs ---".format(self.tx_action)) tx_diff = self.cntr_val["tx_end"] - self.cntr_val["tx_begin"] if (port_type in ['vlan', 'interface'] and tx_diff != pkt_cnt) or tx_diff <= 0: err_msg = self.err_msg_tx[0].format(self.cntr_val["tx_begin"], self.cntr_val["tx_end"], pkt_cnt, tx_diff) @@ -251,7 +260,7 @@ def verify_pkt_cnts(self, port_type, pkt_cnt): err_msg = self.err_msg_tx[1].format(pkt_cnt, self.cntr_val["tx_last"]) pytest_assert(err_msg) - logger.info("--- Checking Rx {} cntrs ---".format(self.action)) + logger.info("--- Checking Rx {} cntrs ---".format(self.rx_action)) rx_diff = self.cntr_val["rx_end"] - self.cntr_val["rx_begin"] if (port_type in ['vlan', 'interface'] and rx_diff != pkt_cnt) or rx_diff <= 0: err_msg = self.err_msg_rx[0].format(self.cntr_val["rx_begin"], self.cntr_val["rx_end"], pkt_cnt, rx_diff) @@ -263,7 +272,7 @@ def verify_pkt_cnts(self, port_type, pkt_cnt): class SetupPfcwdFunc(object): """ Test setup per port """ - def setup_test_params(self, port, vlan, init=False, mmu_params=False): + def setup_test_params(self, port, vlan, init=False, mmu_params=False, detect=True, toggle=False): """ Sets up test parameters associated with a DUT port @@ -273,14 +282,16 @@ def setup_test_params(self, port, vlan, init=False, mmu_params=False): init(bool) : If the fanout needs to be initialized or not """ logger.info("--- Setting up test params for port {} ---".format(port)) - self.setup_port_params(port, init=init) + self.setup_port_params(port, init=init, detect=detect) + if toggle: + self.update_queue(port) if mmu_params: self.setup_mmu_params(port) self.resolve_arp(vlan) if not self.pfc_wd['fake_storm']: - self.storm_setup(init=init) + self.storm_setup(init=init, detect=detect) - def setup_port_params(self, port, init=False): + def setup_port_params(self, port, init=False, detect=True): """ Gather all the parameters needed for storm generation and ptf test based off the DUT port @@ -308,6 +319,8 @@ def setup_port_params(self, port, init=False): self.pfc_wd['test_port_vlan_id'] = self.ports[port].get('test_port_vlan_id') self.pfc_wd['rx_port_vlan_id'] = self.ports[port].get('rx_port_vlan_id') self.queue_oid = self.dut.get_queue_oid(port, self.pfc_wd['queue_index']) + if init and detect: + self.log_handle = dict() def update_queue(self, port): """ @@ -370,7 +383,7 @@ def resolve_arp(self, vlan): self.ptf.command("ping {} -c 10".format(vlan['addr'])) self.dut.command("docker exec -i swss arping {} -c 5".format(self.pfc_wd['test_neighbor_addr'])) - def storm_setup(self, init=False): + def storm_setup(self, init=False, detect=True): """ Prepare fanout for the storm generation @@ -386,7 +399,7 @@ def storm_setup(self, init=False): self.peer_dev_list[self.peer_device] = peer_info['hwsku'] # get pfc storm handle - if init: + if init and detect: self.storm_hndle = PFCStorm(self.dut, self.fanout_info, self.fanout, pfc_queue_idx=self.pfc_wd['queue_index'], pfc_frames_number=self.pfc_wd['frames_number'], @@ -574,16 +587,16 @@ def fill_buffer(self): ptf_runner(self.ptf, "ptftests", "pfc_wd.PfcWdTest", "ptftests", params=ptf_params, log_file=log_file) - def verify_wd_func(self, action): + def verify_wd_func(self, action, rx_action, tx_action): """ PTF traffic send and verify Args: action(string) : PTF traffic test action """ - logger.info("--- Verify PFCwd function for action {} ---".format(action)) - self.verify_tx_egress(action) - self.verify_rx_ingress(action) + logger.info("--- Verify PFCwd function for pfcwd action {}, Tx traffic action {}, Rx traffic action {} ---".format(action, tx_action, rx_action)) + self.verify_tx_egress(tx_action) + self.verify_rx_ingress(rx_action) self.verify_other_pfc_queue() self.verify_other_pfc_pg() @@ -636,7 +649,7 @@ def storm_detect_path(self, dut, port, action): loganalyzer.analyze(marker) self.stats.get_pkt_cnts(self.queue_oid, begin=True) # test pfcwd functionality on a storm - self.traffic_inst.verify_wd_func(action if action != "dontcare" else "drop") + self.traffic_inst.verify_wd_func(action, self.rx_action, self.tx_action) return loganalyzer def storm_restore_path(self, dut, loganalyzer, port, action): @@ -666,7 +679,7 @@ def storm_restore_path(self, dut, loganalyzer, port, action): loganalyzer.analyze(marker) self.stats.get_pkt_cnts(self.queue_oid, begin=False) - def run_test(self, dut, port, action, mmu_action=None): + def run_test(self, dut, port, action, mmu_action=None, detect=True, restore=True): """ Test method that invokes the storm detection and restoration path which includes the traffic test and associated counter verifications @@ -676,16 +689,28 @@ def run_test(self, dut, port, action, mmu_action=None): port(string) : DUT port action(string) : PTF test action """ - logger.info("--- Storm detection path for port {} ---".format(port)) - loganalyzer = self.storm_detect_path(dut, port, action) + if detect: + logger.info("--- Storm detection path for port {} ---".format(port)) + loganalyzer = self.storm_detect_path(dut, port, action) + self.log_handle[port] = loganalyzer if mmu_action is not None: self.update_mmu_params(mmu_action, port) - logger.info("--- Storm restoration path for port {} ---".format(port)) - self.storm_restore_path(dut, loganalyzer, port, action) - logger.info("--- Verify PFCwd counters for port {} ---".format(port)) - self.stats.verify_pkt_cnts(self.pfc_wd['port_type'], self.pfc_wd['test_pkt_count']) + if restore: + loganalyzer = self.log_handle[port] + logger.info("--- Storm restoration path for port {} ---".format(port)) + self.storm_restore_path(dut, loganalyzer, port, action) + logger.info("--- Verify PFCwd counters for port {} ---".format(port)) + self.stats.verify_pkt_cnts(self.pfc_wd['port_type'], self.pfc_wd['test_pkt_count']) + + def set_traffic_action(self, duthost, action): + action = action if action != "dontcare" else "drop" + if duthost.facts["asic_type"] in ["mellanox", "cisco-8000"] or is_tunnel_qos_remap_enabled(duthost): + self.rx_action = "forward" + else: + self.rx_action = action + self.tx_action = action def test_pfcwd_actions(self, request, fake_storm, setup_pfc_test, fanout_graph_facts, ptfhost, duthosts, rand_one_dut_hostname, fanouthosts): """ @@ -713,6 +738,8 @@ def test_pfcwd_actions(self, request, fake_storm, setup_pfc_test, fanout_graph_f self.peer_dev_list = dict() self.fake_storm = fake_storm self.storm_hndle = None + self.rx_action = None + self.tx_action = None for idx, port in enumerate(self.ports): logger.info("") @@ -722,10 +749,14 @@ def test_pfcwd_actions(self, request, fake_storm, setup_pfc_test, fanout_graph_f pfc_wd_restore_time_large = request.config.getoption("--restore-time") # wait time before we check the logs for the 'restore' signature. 'pfc_wd_restore_time_large' is in ms. self.timers['pfc_wd_wait_for_restore_time'] = int(pfc_wd_restore_time_large / 1000 * 2) - for action in ['dontcare', 'drop', 'forward']: + actions = ['dontcare', 'drop', 'forward'] + if duthost.sonichost._facts['asic_type']=="cisco-8000": + actions = ['dontcare', 'drop'] + for action in actions: try: - self.stats = PfcPktCntrs(self.dut, action) - logger.info("{} on port {}".format(WD_ACTION_MSG_PFX[action], port)) + self.set_traffic_action(duthost, action) + self.stats = PfcPktCntrs(self.dut, self.rx_action, self.tx_action) + logger.info("{} on port {}: Tx traffic action {}, Rx traffic action {} ".format(WD_ACTION_MSG_PFX[action], port, self.tx_action, self.rx_action)) self.run_test(self.dut, port, action) except Exception as e: pytest.fail(str(e)) @@ -740,6 +771,76 @@ def test_pfcwd_actions(self, request, fake_storm, setup_pfc_test, fanout_graph_f logger.info("--- Stop PFC WD ---") self.dut.command("pfcwd stop") + def test_pfcwd_multi_port(self, request, fake_storm, setup_pfc_test, fanout_graph_facts, ptfhost, duthosts, rand_one_dut_hostname, fanouthosts): + """ + Tests pfcwd behavior when 2 ports are under pfc storm one after the other + + Test runs 2 iterations - 2 ports same queue, 2 ports different queue + 1. Select first port + 2. Start pfcwd on the selected test port + 3. Start pfc storm on selected test port/lossless queue + 4. Verify if the storm detected msg is seen in the logs + 5. Send traffic with test port/lossless queue as ingress/egress port and ensure that packets are dropped + 6. Send traffic with test port/other lossless queue as ingress/egress port and + ensure that packets are forwarded + 7. Select second port and repeat steps 2-6 + 8. Stop pfc storm on both the ports/lossless queues and verify if the storm restored msg is seen in the logs + + Args: + request(object) : pytest request object + fake_storm(fixture) : Module scoped fixture for enable/disable fake storm + setup_pfc_test(fixture) : Module scoped autouse fixture for PFCwd + fanout_graph_facts(fixture) : fanout graph info + ptfhost(AnsibleHost) : ptf host instance + duthost(AnsibleHost) : DUT instance + rand_one_dut_hostname(string) : randomly pick a dut in multi DUT setup + fanouthosts(AnsibleHost): fanout instance + """ + duthost = duthosts[rand_one_dut_hostname] + setup_info = setup_pfc_test + self.fanout_info = fanout_graph_facts + self.ptf = ptfhost + self.dut = duthost + self.fanout = fanouthosts + self.timers = setup_info['pfc_timers'] + pfc_wd_restore_time_large = request.config.getoption("--restore-time") + # wait time before we check the logs for the 'restore' signature. 'pfc_wd_restore_time_large' is in ms. + self.timers['pfc_wd_wait_for_restore_time'] = int(pfc_wd_restore_time_large / 1000 * 2) + self.ports = setup_info['selected_test_ports'] + selected_ports = list(self.ports.keys())[:2] + pytest_require(len(selected_ports) == 2, 'Pfcwd multi port test needs at least 2 ports') + self.neighbors = setup_info['neighbors'] + self.peer_dev_list = dict() + dut_facts = self.dut.facts + self.fake_storm = fake_storm + self.storm_hndle = None + self.rx_action = None + self.tx_action = None + self.set_traffic_action(duthost, "drop") + self.stats = PfcPktCntrs(self.dut, self.rx_action, self.tx_action) + + for count in range(2): + try: + for idx, port in enumerate(selected_ports): + logger.info("") + logger.info("--- Testing on {} ---".format(port)) + self.setup_test_params(port, setup_info['vlan'], init=not idx, toggle=idx and count) + self.traffic_inst = SendVerifyTraffic(self.ptf, dut_facts['router_mac'], self.pfc_wd) + self.run_test(self.dut, port, "drop", restore=False) + for idx, port in enumerate(selected_ports): + logger.info("") + logger.info("--- Testing on {} ---".format(port)) + self.setup_test_params(port, setup_info['vlan'], init=not idx, detect=False, toggle=idx and count) + self.run_test(self.dut, port, "drop", detect=False) + + except Exception as e: + pytest.fail(str(e)) + + finally: + logger.info("--- Stop PFC WD ---") + self.dut.command("pfcwd stop") + + def test_pfcwd_mmu_change(self, request, fake_storm, setup_pfc_test, fanout_graph_facts, ptfhost, duthosts, rand_one_dut_hostname, fanouthosts): """ Tests if mmu changes impact Pfcwd functionality @@ -781,7 +882,10 @@ def test_pfcwd_mmu_change(self, request, fake_storm, setup_pfc_test, fanout_grap self.storm_hndle = None logger.info("---- Testing on port {} ----".format(port)) self.setup_test_params(port, setup_info['vlan'], init=True, mmu_params=True) - self.stats = PfcPktCntrs(self.dut, "drop") + self.rx_action = None + self.tx_action = None + self.set_traffic_action(duthost, "drop") + self.stats = PfcPktCntrs(self.dut, self.rx_action, self.tx_action) try: for idx, mmu_action in enumerate(MMU_ACTIONS): @@ -845,6 +949,8 @@ def test_pfcwd_port_toggle(self, request, fake_storm, setup_pfc_test, fanout_gra self.peer_dev_list = dict() self.fake_storm = fake_storm self.storm_hndle = None + self.rx_action = None + self.tx_action = None action = "dontcare" for idx, port in enumerate(self.ports): @@ -857,9 +963,10 @@ def test_pfcwd_port_toggle(self, request, fake_storm, setup_pfc_test, fanout_gra self.timers['pfc_wd_wait_for_restore_time'] = int(pfc_wd_restore_time_large / 1000 * 2) try: + self.set_traffic_action(duthost, action) # Verify that PFC storm is detected and restored - self.stats = PfcPktCntrs(self.dut, action) - logger.info("{} on port {}".format(WD_ACTION_MSG_PFX[action], port)) + self.stats = PfcPktCntrs(self.dut, self.rx_action, self.tx_action) + logger.info("{} on port {}. Tx traffic action {}, Rx traffic action {}".format(WD_ACTION_MSG_PFX[action], port, self.tx_action, self.rx_action)) self.run_test(self.dut, port, action) # Toggle test port and verify that PFC storm is not detected diff --git a/tests/pfcwd/test_pfcwd_timer_accuracy.py b/tests/pfcwd/test_pfcwd_timer_accuracy.py index 747c06ce986..e431fac6019 100644 --- a/tests/pfcwd/test_pfcwd_timer_accuracy.py +++ b/tests/pfcwd/test_pfcwd_timer_accuracy.py @@ -129,7 +129,7 @@ def set_storm_params(dut, fanout_info, fanout, peer_params): """ logger.info("Setting up storm params") pfc_queue_index = 4 - pfc_frames_count = 300000 + pfc_frames_count = 1000000 storm_handle = PFCStorm(dut, fanout_info, fanout, pfc_queue_idx=pfc_queue_index, pfc_frames_number=pfc_frames_count, peer_info=peer_params) storm_handle.deploy_pfc_gen() @@ -153,7 +153,7 @@ def run_test(self): storm_start_ms = self.retrieve_timestamp("[P]FC_STORM_START") storm_detect_ms = self.retrieve_timestamp("[d]etected PFC storm") logger.info("Wait for PFC storm end marker to appear in logs") - time.sleep(1) + time.sleep(8) storm_end_ms = self.retrieve_timestamp("[P]FC_STORM_END") storm_restore_ms = self.retrieve_timestamp("[s]torm restored") real_detect_time = storm_detect_ms - storm_start_ms diff --git a/tests/pfcwd/test_pfcwd_warm_reboot.py b/tests/pfcwd/test_pfcwd_warm_reboot.py index d6ba73a7cd8..6bf107deb36 100644 --- a/tests/pfcwd/test_pfcwd_warm_reboot.py +++ b/tests/pfcwd/test_pfcwd_warm_reboot.py @@ -276,7 +276,7 @@ def verify_rx_ingress(self, wd_action): ptf_runner(self.ptf, "ptftests", "pfc_wd.PfcWdTest", "ptftests", params=ptf_params, log_file=log_file) - def verify_wd_func(self, detect=True): + def verify_wd_func(self, dut, detect=True): """ PTF traffic send and verify @@ -284,12 +284,20 @@ def verify_wd_func(self, detect=True): detect(bool) : if the current iteration is a storm detect or not (default: True) """ if detect: + rx_action = "drop" + tx_action = "drop" wd_action="drop" else: + rx_action = "forward" + tx_action = "forward" wd_action = "forward" - logger.info("--- Verify PFCwd function for action {} ---".format(wd_action)) - self.verify_tx_egress(wd_action) - self.verify_rx_ingress(wd_action) + + if dut.facts['asic_type'] in ['mellanox', 'cisco-8000']: + rx_action = "forward" + + logger.info("--- Verify PFCwd function for pfcwd action {}, Tx traffic {}, Rx traffic {} ---".format(wd_action, tx_action, rx_action)) + self.verify_tx_egress(tx_action) + self.verify_rx_ingress(rx_action) class TestPfcwdWb(SetupPfcwdFunc): @@ -411,7 +419,7 @@ def run_test(self, port, queue, detect=True, storm_start=True, first_detect_afte logger.info("--- Storm restoration path for port {} queue {} ---".format(port, queue)) self.storm_restore_path(port, queue) # test pfcwd functionality on a storm/restore - self.traffic_inst.verify_wd_func(detect=detect) + self.traffic_inst.verify_wd_func(self.dut, detect=detect) @pytest.fixture(autouse=True) def pfcwd_wb_test_cleanup(self): diff --git a/tests/platform_tests/api/conftest.py b/tests/platform_tests/api/conftest.py index ef59bed69f5..b6da8f80676 100644 --- a/tests/platform_tests/api/conftest.py +++ b/tests/platform_tests/api/conftest.py @@ -1,5 +1,4 @@ import os -import time import pytest import httplib @@ -11,6 +10,7 @@ IPTABLES_PREPEND_RULE_CMD = 'iptables -I INPUT 1 -p tcp -m tcp --dport {} -j ACCEPT'.format(SERVER_PORT) IPTABLES_DELETE_RULE_CMD = 'iptables -D INPUT -p tcp -m tcp --dport {} -j ACCEPT'.format(SERVER_PORT) + @pytest.fixture(scope='function') def start_platform_api_service(duthosts, enum_rand_one_per_hwsku_hostname, localhost, request): duthost = duthosts[enum_rand_one_per_hwsku_hostname] @@ -20,22 +20,17 @@ def start_platform_api_service(duthosts, enum_rand_one_per_hwsku_hostname, local port=SERVER_PORT, state='started', delay=1, - timeout=5, + timeout=10, module_ignore_errors=True) - if 'exception' in res: - # TODO: Remove this check once we no longer need to support Python 2 - if request.cls.__name__ == "TestSfpApi" and duthost.facts.get("asic_type") == "mellanox" \ - and duthost.sonic_release in ['202012', '202106']: - # On Mellanox platform, the SFP APIs are not migrated to python3 yet, - # thus we have to make it as an exception here. - py3_platform_api_available = False - else: - res = duthost.command('docker exec -i pmon python3 -c "import sonic_platform"', module_ignore_errors=True) - py3_platform_api_available = not res['failed'] + if res['failed'] is True: + + res = duthost.command('docker exec -i pmon python3 -c "import sonic_platform"', module_ignore_errors=True) + py3_platform_api_available = not res['failed'] supervisor_conf = [ '[program:platform_api_server]', - 'command=/usr/bin/python{} /opt/platform_api_server.py --port {}'.format('3' if py3_platform_api_available else '2', SERVER_PORT), + 'command=/usr/bin/python{} /opt/platform_api_server.py --port {}'.format('3' if py3_platform_api_available + else '2', SERVER_PORT), 'autostart=True', 'autorestart=True', 'stdout_logfile=syslog', @@ -59,8 +54,8 @@ def start_platform_api_service(duthosts, enum_rand_one_per_hwsku_hostname, local duthost.command('docker exec -i pmon supervisorctl reread') duthost.command('docker exec -i pmon supervisorctl update') - res = localhost.wait_for(host=dut_ip, port=SERVER_PORT, state='started', delay=1, timeout=5) - assert 'exception' not in res + res = localhost.wait_for(host=dut_ip, port=SERVER_PORT, state='started', delay=1, timeout=10) + assert res['failed'] is False @pytest.fixture(scope='module', autouse=True) @@ -100,12 +95,12 @@ def platform_api_conn(duthosts, enum_rand_one_per_hwsku_hostname, start_platform finally: conn.close() + @pytest.fixture(autouse=True) def check_not_implemented_warnings(duthosts, enum_rand_one_per_hwsku_hostname): duthost = duthosts[enum_rand_one_per_hwsku_hostname] - loganalyzer = LogAnalyzer(ansible_host=duthost, - marker_prefix="platformapi_test") + loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="platformapi_test") marker = loganalyzer.init() yield loganalyzer.match_regex.extend(['WARNING pmon#platform_api_server.py: API.+not implemented']) diff --git a/tests/platform_tests/api/test_component.py b/tests/platform_tests/api/test_component.py index cb7aaa83b96..3f67498d8ba 100644 --- a/tests/platform_tests/api/test_component.py +++ b/tests/platform_tests/api/test_component.py @@ -172,7 +172,8 @@ def test_get_firmware_update_notification(self, duthosts, enum_rand_one_per_hwsk for image in image_list: notif = component.get_firmware_update_notification(platform_api_conn, i, image) # Can return "None" if no update required. - pytest_assert(isinstance(notif, STRING_TYPE), "Component {}: Firmware update notification appears to be incorrect from image {}".format(i, image)) + if notif is not None: + pytest_assert(isinstance(notif, STRING_TYPE), "Component {}: Firmware update notification appears to be incorrect from image {}".format(i, image)) def test_install_firmware(self, duthosts, enum_rand_one_per_hwsku_hostname, localhost, platform_api_conn): duthost = duthosts[enum_rand_one_per_hwsku_hostname] diff --git a/tests/platform_tests/api/test_fan_drawer_fans.py b/tests/platform_tests/api/test_fan_drawer_fans.py index 5715a3206b3..539b42b355a 100644 --- a/tests/platform_tests/api/test_fan_drawer_fans.py +++ b/tests/platform_tests/api/test_fan_drawer_fans.py @@ -8,7 +8,7 @@ from tests.common.helpers.assertions import pytest_assert from tests.common.helpers.platform_api import chassis, fan_drawer, fan_drawer_fan - +from tests.platform_tests.thermal_control_test_helper import start_thermal_control_daemon, stop_thermal_control_daemon from platform_api_test_base import PlatformApiTestBase ################################################### @@ -48,16 +48,21 @@ class TestFanDrawerFans(PlatformApiTestBase): # level, so we must do the same here to prevent a scope mismatch. @pytest.fixture(scope="function", autouse=True) - def setup(self, platform_api_conn): + def setup(self, platform_api_conn, duthost): if self.num_fan_drawers is None: try: self.num_fan_drawers = chassis.get_num_fan_drawers(platform_api_conn) except: - pytest.fail("num_fans is not an integer") + if "201811" in duthost.os_version or "201911" in duthost.os_version: + pytest.skip("Image version {} does not support API: num_fan_drawers, test will be skipped".format(duthost.os_version)) + else: + pytest.fail("num_fans is not an integer") else: if self.num_fan_drawers == 0: pytest.skip("No fan drawers found on device") - + stop_thermal_control_daemon(duthost) + yield + start_thermal_control_daemon(duthost) # # Helper functions # diff --git a/tests/platform_tests/api/test_psu_fans.py b/tests/platform_tests/api/test_psu_fans.py index fd38ff608c5..7819af6c65b 100644 --- a/tests/platform_tests/api/test_psu_fans.py +++ b/tests/platform_tests/api/test_psu_fans.py @@ -107,7 +107,27 @@ def test_get_name(self, duthosts, enum_rand_one_per_hwsku_hostname, localhost, p if self.expect(name is not None, "Unable to retrieve psu {} fan {} name".format(j, i)): self.expect(isinstance(name, STRING_TYPE), "psu {} fan {} name appears incorrect".format(j, i)) - self.compare_value_with_platform_facts(duthost, 'name', name, j, i) + self.expect(duthost._facts.get("platform") is not None, "Unable to retrieve platform name") + # + # Check whether platform.json file exists for this specific platform. If yes compare names. + # If not, skip comparison. + # + platform_file_path = os.path.join("/usr/share/sonic/device", duthost._facts.get("platform"), "platform.json") + platform_file_check = {} + try: + # + # Check if the JSON file exists in the specific path. Return 0 if it DOES exist. + # The command function throws exception if rc is non-zero, so handle it. + # + platform_file_check = duthost.command("[ -f {} ]".format(platform_file_path)) + except: + # The JSON file does not exist, so set rc to 1. + platform_file_check['rc'] = 1 + if platform_file_check.get('rc') == 0: + logging.info("{} has a platform.json file. Running comparison with platform facts.".format(duthost._facts.get("platform"))) + self.compare_value_with_platform_facts(duthost, 'name', name, j, i) + else: + logging.info("{} does not have a platform.json file. Skipping comparison with platform facts.".format(duthost._facts.get("platform"))) self.assert_expectations() diff --git a/tests/platform_tests/api/test_sfp.py b/tests/platform_tests/api/test_sfp.py index fbae28efa36..f3657bd9cc8 100644 --- a/tests/platform_tests/api/test_sfp.py +++ b/tests/platform_tests/api/test_sfp.py @@ -151,7 +151,8 @@ def is_xcvr_optical(self, xcvr_info_dict): #For QSFP-DD specification compliance will return type as passive or active if xcvr_info_dict["type_abbrv_name"] == "QSFP-DD" or xcvr_info_dict["type_abbrv_name"] == "OSFP-8X" \ or xcvr_info_dict["type_abbrv_name"] == "QSFP+C": - if xcvr_info_dict["specification_compliance"] == "Passive Copper Cable": + if xcvr_info_dict["specification_compliance"] == "Passive Copper Cable" or \ + xcvr_info_dict["specification_compliance"] == "passive_copper_media_interface": return False else: spec_compliance_dict = ast.literal_eval(xcvr_info_dict["specification_compliance"]) @@ -253,7 +254,9 @@ def test_get_transceiver_info(self, duthosts, enum_rand_one_per_hwsku_hostname, actual_keys = info_dict.keys() duthost = duthosts[enum_rand_one_per_hwsku_hostname] - if duthost.sonic_release == "202012" or duthost.sonic_release == "202111": + # NOTE: No more releases to be added here. Platform should use SFP-refactor. + # 'hardware_rev' is ONLY applicable to QSFP-DD/OSFP modules + if duthost.sonic_release in ["201811", "201911", "202012", "202106", "202111"]: EXPECTED_XCVR_INFO_KEYS = [key if key != 'vendor_rev' else 'hardware_rev' for key in self.EXPECTED_XCVR_INFO_KEYS] self.EXPECTED_XCVR_INFO_KEYS = EXPECTED_XCVR_INFO_KEYS @@ -333,6 +336,12 @@ def test_get_rx_los(self, duthosts, enum_rand_one_per_hwsku_hostname, localhost, skip_release_for_platform(duthost, ["202012"], ["arista", "mlnx"]) for i in self.sfp_setup["sfp_test_port_indices"]: + info_dict = sfp.get_transceiver_info(platform_api_conn, i) + + if not self.is_xcvr_optical(info_dict): + logger.info("test_get_rx_los: Skipping transceiver {} (not applicable for this transceiver type)".format(i)) + continue + rx_los = sfp.get_rx_los(platform_api_conn, i) if self.expect(rx_los is not None, "Unable to retrieve transceiver {} RX loss-of-signal data".format(i)): self.expect(isinstance(rx_los, list) and (all(isinstance(item, bool) for item in rx_los)), @@ -345,6 +354,12 @@ def test_get_tx_fault(self, duthosts, enum_rand_one_per_hwsku_hostname, localhos skip_release_for_platform(duthost, ["202012"], ["arista", "mlnx"]) for i in self.sfp_setup["sfp_test_port_indices"]: + info_dict = sfp.get_transceiver_info(platform_api_conn, i) + + if not self.is_xcvr_optical(info_dict): + logger.info("test_get_tx_fault: Skipping transceiver {} (not applicable for this transceiver type)".format(i)) + continue + tx_fault = sfp.get_tx_fault(platform_api_conn, i) if self.expect(tx_fault is not None, "Unable to retrieve transceiver {} TX fault data".format(i)): self.expect(isinstance(tx_fault, list) and (all(isinstance(item, bool) for item in tx_fault)), @@ -357,6 +372,12 @@ def test_get_temperature(self, duthosts, enum_rand_one_per_hwsku_hostname, local skip_release_for_platform(duthost, ["202012"], ["arista", "mlnx"]) for i in self.sfp_setup["sfp_test_port_indices"]: + info_dict = sfp.get_transceiver_info(platform_api_conn, i) + + if not self.is_xcvr_optical(info_dict): + logger.info("test_get_temperature: Skipping transceiver {} (not applicable for this transceiver type)".format(i)) + continue + temp = sfp.get_temperature(platform_api_conn, i) if self.expect(temp is not None, "Unable to retrieve transceiver {} temperatue".format(i)): self.expect(isinstance(temp, float), "Transceiver {} temperature appears incorrect".format(i)) @@ -368,6 +389,12 @@ def test_get_voltage(self, duthosts, enum_rand_one_per_hwsku_hostname, localhost skip_release_for_platform(duthost, ["202012"], ["arista", "mlnx"]) for i in self.sfp_setup["sfp_test_port_indices"]: + info_dict = sfp.get_transceiver_info(platform_api_conn, i) + + if not self.is_xcvr_optical(info_dict): + logger.info("test_get_voltage: Skipping transceiver {} (not applicable for this transceiver type)".format(i)) + continue + voltage = sfp.get_voltage(platform_api_conn, i) if self.expect(voltage is not None, "Unable to retrieve transceiver {} voltage".format(i)): self.expect(isinstance(voltage, float), "Transceiver {} voltage appears incorrect".format(i)) @@ -419,13 +446,18 @@ def test_get_tx_power(self, duthosts, enum_rand_one_per_hwsku_hostname, localhos skip_release_for_platform(duthost, ["202012"], ["arista", "mlnx"]) for i in self.sfp_setup["sfp_test_port_indices"]: + info_dict = sfp.get_transceiver_info(platform_api_conn, i) + + if not self.is_xcvr_optical(info_dict): + logger.info("test_get_tx_power: Skipping transceiver {} (not applicable for this transceiver type)".format(i)) + continue + tx_power = sfp.get_tx_power(platform_api_conn, i) if self.expect(tx_power is not None, "Unable to retrieve transceiver {} TX power data".format(i)): continue # Determine whether the transceiver type supports RX power # If the transceiver is non-optical, e.g., DAC, we should receive a list of "N/A" strings - info_dict = sfp.get_transceiver_info(platform_api_conn, i) if not self.expect(info_dict is not None, "Unable to retrieve transceiver {} info".format(i)): continue @@ -493,11 +525,19 @@ def test_tx_disable_channel(self, duthosts, enum_rand_one_per_hwsku_hostname, lo logger.warning("test_tx_disable_channel: Skipping transceiver {} (not applicable for this transceiver type)".format(i)) continue - # Test all TX disable combinations for a four-channel transceiver (i.e., 0x0 through 0xF) + if info_dict["type_abbrv_name"] == "QSFP-DD" or info_dict["type_abbrv_name"] == "OSFP-8X": + # Test all channels for a eight-channel transceiver + all_channel_mask = 0xFF + expected_mask = 0x80 + else: + # Test all channels for a four-channel transceiver + all_channel_mask = 0XF + expected_mask = 0x8 + # We iterate in reverse here so that we end with 0x0 (no channels disabled) - for expected_mask in range(0xF, 0x0, -1): + while expected_mask >= 0: # Enable TX on all channels - ret = sfp.tx_disable_channel(platform_api_conn, i, 0xF, False) + ret = sfp.tx_disable_channel(platform_api_conn, i, all_channel_mask, False) self.expect(ret is True, "Failed to enable TX on all channels for transceiver {}".format(i)) ret = sfp.tx_disable_channel(platform_api_conn, i, expected_mask, True) @@ -506,6 +546,11 @@ def test_tx_disable_channel(self, duthosts, enum_rand_one_per_hwsku_hostname, lo tx_disable_chan_mask = sfp.get_tx_disable_channel(platform_api_conn, i) if self.expect(tx_disable_chan_mask is not None, "Unable to retrieve transceiver {} TX disabled channel data".format(i)): self.expect(tx_disable_chan_mask == expected_mask, "Transceiver {} TX disabled channel data is incorrect".format(i)) + + if expected_mask == 0: + break + else: + expected_mask = expected_mask >> 1 self.assert_expectations() def _check_lpmode_status(self, sfp,platform_api_conn, i, state): @@ -514,8 +559,8 @@ def _check_lpmode_status(self, sfp,platform_api_conn, i, state): def test_lpmode(self, duthosts, enum_rand_one_per_hwsku_hostname, localhost, platform_api_conn): """This function tests both the get_lpmode() and set_lpmode() APIs""" for i in self.sfp_setup["sfp_test_port_indices"]: - # First ensure that the transceiver type supports low-power mode info_dict = sfp.get_transceiver_info(platform_api_conn, i) + # Ensure that the transceiver type supports low-power mode if not self.expect(info_dict is not None, "Unable to retrieve transceiver {} info".format(i)): continue diff --git a/tests/platform_tests/api/watchdog.yml b/tests/platform_tests/api/watchdog.yml index d7bae737c1b..55f8e86f69d 100644 --- a/tests/platform_tests/api/watchdog.yml +++ b/tests/platform_tests/api/watchdog.yml @@ -55,7 +55,7 @@ x86_64-mlnx_msn2700-r0: x86_64-dell.*: default: greater_timeout: 180 - too_big_timeout: 660 + too_big_timeout: 1200 # Arista watchdog x86_64-arista.*: diff --git a/tests/platform_tests/cli/test_show_platform.py b/tests/platform_tests/cli/test_show_platform.py index 28e3ed1314b..4968a1dece8 100644 --- a/tests/platform_tests/cli/test_show_platform.py +++ b/tests/platform_tests/cli/test_show_platform.py @@ -95,7 +95,7 @@ def test_show_platform_syseeprom(duthosts, enum_rand_one_per_hwsku_hostname, dut @summary: Verify output of `show platform syseeprom` """ duthost = duthosts[enum_rand_one_per_hwsku_hostname] - skip_release_for_platform(duthost, ["202012", "201911", "201811"], ["arista_7050","arista_7260"]) + skip_release_for_platform(duthost, ["202012", "201911", "201811"], ["arista_7050", "arista_7260", "arista_7060"]) cmd = " ".join([CMD_SHOW_PLATFORM, "syseeprom"]) logging.info("Verifying output of '{}' on '{}' ...".format(cmd, duthost.hostname)) @@ -106,7 +106,7 @@ def test_show_platform_syseeprom(duthosts, enum_rand_one_per_hwsku_hostname, dut """ Gather expected data from a inventory file instead if 'syseeprom_info' is defined in the inventory # Sample inventory with syseeprom: - + str-msn2700-01: ansible_host: 10.251.0.188 model: MSN2700-CS2FO @@ -221,7 +221,7 @@ def test_show_platform_psustatus_json(duthosts, enum_supervisor_dut_hostname): psu_info_list = json.loads(psu_status_output) # TODO: Compare against expected platform-specific output - if duthost.facts["platform"] == "x86_64-dellemc_z9332f_d1508-r0": + if duthost.facts["platform"] == "x86_64-dellemc_z9332f_d1508-r0" or duthost.facts['asic_type'] == "cisco-8000": led_status_list = ["N/A"] else: led_status_list = ["green", "amber", "red", "off"] diff --git a/tests/platform_tests/conftest.py b/tests/platform_tests/conftest.py index 4f36c53e2da..d6e15c5ec4b 100644 --- a/tests/platform_tests/conftest.py +++ b/tests/platform_tests/conftest.py @@ -7,7 +7,8 @@ from collections import OrderedDict from datetime import datetime -from tests.platform_tests.reboot_timing_constants import SERVICE_PATTERNS, OTHER_PATTERNS, SAIREDIS_PATTERNS, OFFSET_ITEMS, TIME_SPAN_ITEMS +from tests.platform_tests.reboot_timing_constants import SERVICE_PATTERNS, OTHER_PATTERNS,\ + SAIREDIS_PATTERNS, OFFSET_ITEMS, TIME_SPAN_ITEMS, REQUIRED_PATTERNS from tests.common.fixtures.advanced_reboot import get_advanced_reboot from tests.common.mellanox_data import is_mellanox_device from tests.common.broadcom_data import is_broadcom_device @@ -20,7 +21,8 @@ FMT_SHORT = "%b %d %H:%M:%S" SMALL_DISK_SKUS = [ "Arista-7060CX-32S-C32", - "Arista-7060CX-32S-Q32" + "Arista-7060CX-32S-Q32", + "Arista-7060CX-32S-D48C8" ] @@ -91,10 +93,11 @@ def get_state_times(timestamp, state, state_times, first_after_offset=None): state_dict = state_times.get(state_name, {"timestamp": {}}) timestamps = state_dict.get("timestamp") if state_status in timestamps: - state_dict[state_status+" count"] = state_dict.get(state_status+" count", 1) + 1 + state_dict[state_status+" count"] = state_dict.get(state_status+" count") + 1 # capture last occcurence - useful in calculating events end time state_dict["last_occurence"] = time elif first_after_offset: + state_dict[state_status+" count"] = 1 # capture the first occurence as the one after offset timestamp and ignore the ones before # this is useful to find time after a specific instance, for eg. - kexec time or FDB disable time. if _parse_timestamp(first_after_offset) < _parse_timestamp(time): @@ -102,11 +105,12 @@ def get_state_times(timestamp, state, state_times, first_after_offset=None): else: # only capture timestamp of first occurence of the entity. Otherwise, just increment the count above. # this is useful in capturing start point. Eg., first neighbor entry, LAG ready, etc. + state_dict[state_status+" count"] = 1 timestamps[state_status] = time return {state_name: state_dict} -def get_report_summary(analyze_result, reboot_type): +def get_report_summary(duthost, analyze_result, reboot_type, reboot_oper, base_os_version): time_spans = analyze_result.get("time_span", {}) time_spans_summary = OrderedDict() kexec_offsets = analyze_result.get("offset_from_kexec", {}) @@ -136,17 +140,21 @@ def get_report_summary(analyze_result, reboot_type): _parse_timestamp(marker_first_time)).total_seconds() time_spans_summary.update({entity.lower(): str(time_taken)}) - lacp_sessions_waittime = analyze_result.get(\ - "controlplane", {"lacp_sessions": []}).pop("lacp_sessions") + lacp_sessions_dict = analyze_result.get("controlplane") + lacp_sessions_waittime = lacp_sessions_dict.pop("lacp_sessions")\ + if lacp_sessions_dict and "lacp_sessions" in lacp_sessions_dict else None controlplane_summary = {"downtime": "", "arp_ping": "", "lacp_session_max_wait": ""} - if len(lacp_sessions_waittime) > 0: + if lacp_sessions_waittime and len(lacp_sessions_waittime) > 0: max_lacp_session_wait = max(list(lacp_sessions_waittime.values())) analyze_result.get(\ "controlplane", controlplane_summary).update( {"lacp_session_max_wait": max_lacp_session_wait}) result_summary = { - "reboot_type": reboot_type, + "reboot_type": "{}-{}".format(reboot_type, reboot_oper) if reboot_oper else reboot_type, + "hwsku": duthost.facts["hwsku"], + "base_ver": base_os_version[0] if base_os_version and len(base_os_version) else "", + "target_ver": get_current_sonic_version(duthost), "dataplane": analyze_result.get("dataplane", {"downtime": "", "lost_packets": ""}), "controlplane": analyze_result.get("controlplane", controlplane_summary), "time_span": time_spans_summary, @@ -179,7 +187,7 @@ def analyze_log_file(duthost, messages, result, offset_from_kexec): elif is_mellanox_device(duthost): derived_patterns.update(OTHER_PATTERNS.get("MLNX")) # get image specific regexes - if "20191130" in duthost.os_version: + if "20191130" in get_current_sonic_version(duthost): derived_patterns.update(OTHER_PATTERNS.get("201911")) service_patterns.update(SERVICE_PATTERNS.get("201911")) else: @@ -201,7 +209,9 @@ def service_time_check(message, status): service_dict = service_restart_times.get(service_name, {"timestamp": {}}) timestamps = service_dict.get("timestamp") if status in timestamps: - service_dict[status+" count"] = service_dict.get(status+" count", 1) + 1 + service_dict[status+" count"] = service_dict.get(status+" count") + 1 + else: + service_dict[status+" count"] = 1 timestamps[status] = time service_restart_times.update({service_name: service_dict}) @@ -269,7 +279,15 @@ def analyze_sairedis_rec(messages, result, offset_from_kexec): state_name = state.split("|")[0].strip() reboot_time = result.get("reboot_time", {}).get("timestamp", {}).get("Start") if state_name + "|End" not in SAIREDIS_PATTERNS.keys(): - state_times = get_state_times(timestamp, state, offset_from_kexec, first_after_offset=reboot_time) + if "FDB_EVENT_OTHER_MAC_EXPIRY" in state_name or "FDB_EVENT_SCAPY_MAC_EXPIRY" in state_name: + fdb_aging_disable_start = result.get("time_span", {}).get("FDB_AGING_DISABLE", {})\ + .get("timestamp", {}).get("Start") + if not fdb_aging_disable_start: + break + first_after_offset = fdb_aging_disable_start + else: + first_after_offset = result.get("reboot_time", {}).get("timestamp", {}).get("Start") + state_times = get_state_times(timestamp, state, offset_from_kexec, first_after_offset=first_after_offset) offset_from_kexec.update(state_times) else: state_times = get_state_times(timestamp, state, sai_redis_state_times, first_after_offset=reboot_time) @@ -301,7 +319,7 @@ def get_data_plane_report(analyze_result, reboot_type, log_dir, reboot_oper): analyze_result.update(report) -def verify_mac_jumping(test_name, timing_data): +def verify_mac_jumping(test_name, timing_data, verification_errors): mac_jumping_other_addr = timing_data.get("offset_from_kexec", {})\ .get("FDB_EVENT_OTHER_MAC_EXPIRY",{}).get("Start count", 0) mac_jumping_scapy_addr = timing_data.get("offset_from_kexec", {})\ @@ -318,11 +336,11 @@ def verify_mac_jumping(test_name, timing_data): logging.info("MAC jumping is allowed. Jump count for expected mac: {}, unexpected MAC: {}"\ .format(mac_jumping_scapy_addr, mac_jumping_other_addr)) if not mac_jumping_scapy_addr: - pytest.fail("MAC jumping not detected when expected for address: 00-06-07-08-09-0A") + verification_errors.append("MAC jumping not detected when expected for address: 00-06-07-08-09-0A") else: # MAC jumping not allowed - do not allow the SCAPY default MAC to jump if mac_jumping_scapy_addr: - pytest.fail("MAC jumping is not allowed. Jump count for scapy mac: {}, other MAC: {}"\ + verification_errors.append("MAC jumping is not allowed. Jump count for scapy mac: {}, other MAC: {}"\ .format(mac_jumping_scapy_addr, mac_jumping_other_addr)) if mac_jumping_other_addr: # In both mac jump allowed and denied cases unexpected MAC addresses should NOT jump between @@ -332,7 +350,21 @@ def verify_mac_jumping(test_name, timing_data): " and FDB learning enabled at {}".format(fdb_aging_disable_end)) if _parse_timestamp(mac_expiry_start) > _parse_timestamp(fdb_aging_disable_start) and\ _parse_timestamp(mac_expiry_start) < _parse_timestamp(fdb_aging_disable_end): - pytest.fail("Mac expiry detected during the window when FDB ageing was disabled") + verification_errors.append("Mac expiry detected during the window when FDB ageing was disabled") + + +def verify_required_events(duthost, event_counters, timing_data, verification_errors): + for key in ["time_span", "offset_from_kexec"]: + for pattern in REQUIRED_PATTERNS.get(key): + observed_start_count = timing_data.get(key).get(pattern).get("Start count") + observed_end_count = timing_data.get(key).get(pattern).get("End count") + expected_count = event_counters.get(pattern) + if observed_start_count != expected_count: + verification_errors.append("FAIL: Event {} was found {} times, when expected exactly {} times".\ + format(pattern, observed_start_count, expected_count)) + if key == "time_span" and observed_start_count != observed_end_count: + verification_errors.append("FAIL: Event {} counters did not match. ".format(pattern) +\ + "Started {} times, and ended {} times".format(observed_start_count, observed_end_count)) def overwrite_script_to_backup_logs(duthost, reboot_type, bgpd_log): @@ -343,17 +375,21 @@ def overwrite_script_to_backup_logs(duthost, reboot_type, bgpd_log): # find the anchor string inside fast/warm-reboot script rebooting_log_line = "debug.*Rebooting with.*to.*" # Create a backup log command to be inserted right after the anchor string defined above - backup_log_cmds ="cp /var/log/syslog /host/syslog.99;" +\ - "cp /var/log/swss/sairedis.rec /host/sairedis.rec.99;" +\ - "cp /var/log/swss/swss.rec /host/swss.rec.99;" +\ - "cp {} /host/bgpd.log.99".format(bgpd_log) + backup_log_cmds ="cat /var/log/syslog.1 /var/log/syslog > /host/syslog.99 || true;" +\ + "cat /var/log/swss/sairedis.rec.1 /var/log/swss/sairedis.rec > /host/sairedis.rec.99 || true;" +\ + "cat /var/log/swss/swss.rec.1 /var/log/swss/swss.rec > /host/swss.rec.99 || true;" +\ + "cat {}.1 {} > /host/bgpd.log.99 || true".format(bgpd_log, bgpd_log) # Do find-and-replace on fast/warm-reboot script to insert the backup_log_cmds string insert_backup_command = "sed -i '/{}/a {}' {}".format(rebooting_log_line, backup_log_cmds, reboot_script_path) duthost.shell(insert_backup_command) +def get_current_sonic_version(duthost): + return duthost.shell('sonic_installer list | grep Current | cut -f2 -d " "')['stdout'] + + @pytest.fixture() -def advanceboot_loganalyzer(duthosts, rand_one_dut_hostname, request): +def advanceboot_loganalyzer(duthosts, enum_rand_one_per_hwsku_frontend_hostname, request): """ Advance reboot log analysis. This fixture starts log analysis at the beginning of the test. At the end, @@ -361,9 +397,9 @@ def advanceboot_loganalyzer(duthosts, rand_one_dut_hostname, request): Args: duthosts : List of DUT hosts - rand_one_dut_hostname: hostname of a randomly selected DUT + enum_rand_one_per_hwsku_frontend_hostname: hostname of a randomly selected DUT """ - duthost = duthosts[rand_one_dut_hostname] + duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] test_name = request.node.name if "warm" in test_name: reboot_type = "warm" @@ -377,25 +413,42 @@ def advanceboot_loganalyzer(duthosts, rand_one_dut_hostname, request): device_marks = [arg for mark in request.node.iter_markers(name='device_type') for arg in mark.args] if 'vs' not in device_marks: pytest.skip('Testcase not supported for kvm') - - current_os_version = duthost.shell('sonic_installer list | grep Current | cut -f2 -d " "')['stdout'] - if 'SONiC-OS-201811' in current_os_version: - bgpd_log = "/var/log/quagga/bgpd.log" - else: - bgpd_log = "/var/log/frr/bgpd.log" - hwsku = duthost.facts["hwsku"] - if hwsku in SMALL_DISK_SKUS: - # For small disk devices, /var/log in mounted in tmpfs. - # Hence, after reboot the preboot logs are lost. - # For log_analyzer to work, it needs logs from the shutdown path - # Below method inserts a step in reboot script to back up logs to /host/ - overwrite_script_to_backup_logs(duthost, reboot_type, bgpd_log) + logs_in_tmpfs = list() - loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="test_advanced_reboot_{}".format(test_name), - additional_files={'/var/log/swss/sairedis.rec': 'recording on: /var/log/swss/sairedis.rec', bgpd_log: ''}) + loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="test_advanced_reboot_{}".format(test_name)) + base_os_version = list() + + def bgpd_log_handler(preboot=False): + # check current OS version post-reboot. This can be different than preboot OS version in case of upgrade + current_os_version = get_current_sonic_version(duthost) + if preboot: + if 'SONiC-OS-201811' in current_os_version: + bgpd_log = "/var/log/quagga/bgpd.log" + else: + bgpd_log = "/var/log/frr/bgpd.log" + additional_files={'/var/log/swss/sairedis.rec': '', bgpd_log: ''} + loganalyzer.additional_files = list(additional_files.keys()) + loganalyzer.additional_start_str = list(additional_files.values()) + return bgpd_log + else: + # log_analyzer may start with quagga and end with frr, and frr.log might still have old logs. + # To avoid missing preboot log, or analyzing old logs, combine quagga and frr log into new file + duthost.shell("cat {} {} | sort -n > {}".format( + "/var/log/quagga/bgpd.log", "/var/log/frr/bgpd.log", "/var/log/bgpd.log"), module_ignore_errors=True) + loganalyzer.additional_files = ['/var/log/swss/sairedis.rec', '/var/log/bgpd.log'] def pre_reboot_analysis(): + log_filesystem = duthost.shell("df --output=fstype -h /var/log")['stdout'] + logs_in_tmpfs.append(True if (log_filesystem and "tmpfs" in log_filesystem) else False) + base_os_version.append(get_current_sonic_version(duthost)) + bgpd_log = bgpd_log_handler(preboot=True) + if hwsku in SMALL_DISK_SKUS or (len(logs_in_tmpfs) > 0 and logs_in_tmpfs[0] == True): + # For small disk devices, /var/log in mounted in tmpfs. + # Hence, after reboot the preboot logs are lost. + # For log_analyzer to work, it needs logs from the shutdown path + # Below method inserts a step in reboot script to back up logs to /host/ + overwrite_script_to_backup_logs(duthost, reboot_type, bgpd_log) marker = loganalyzer.init() loganalyzer.load_common_config() @@ -410,28 +463,18 @@ def pre_reboot_analysis(): loganalyzer.match_regex = [] return marker - def post_reboot_analysis(marker, reboot_oper=None, log_dir=None): - if hwsku in SMALL_DISK_SKUS: + def post_reboot_analysis(marker, event_counters=None, reboot_oper=None, log_dir=None): + bgpd_log_handler() + if hwsku in SMALL_DISK_SKUS or (len(logs_in_tmpfs) > 0 and logs_in_tmpfs[0] == True): restore_backup = "mv /host/syslog.99 /var/log/; " +\ "mv /host/sairedis.rec.99 /var/log/swss/; " +\ "mv /host/swss.rec.99 /var/log/swss/; " +\ - "mv /host/bgpd.log.99 /var/log/frr/" + "mv /host/bgpd.log.99 /var/log/" duthost.shell(restore_backup, module_ignore_errors=True) # find the fast/warm-reboot script path reboot_script_path = duthost.shell('which {}'.format("{}-reboot".format(reboot_type)))['stdout'] # restore original script. If the ".orig" file does not exist (upgrade path case), ignore the error. duthost.shell("mv {} {}".format(reboot_script_path + ".orig", reboot_script_path), module_ignore_errors=True) - - # check current OS version post-reboot. This can be different than preboot OS version in case of upgrade - current_os_version = duthost.shell('sonic_installer list | grep Current | cut -f2 -d " "')['stdout'] - if 'SONiC-OS-201811' in current_os_version: - bgpd_log = "/var/log/quagga/bgpd.log" - else: - bgpd_log = "/var/log/frr/bgpd.log" - additional_files={'/var/log/swss/sairedis.rec': 'recording on: /var/log/swss/sairedis.rec', bgpd_log: ''} - loganalyzer.additional_files = list(additional_files.keys()) - loganalyzer.additional_start_str = list(additional_files.values()) - result = loganalyzer.analyze(marker, fail=False) analyze_result = {"time_span": dict(), "offset_from_kexec": dict()} offset_from_kexec = dict() @@ -459,12 +502,12 @@ def post_reboot_analysis(marker, reboot_oper=None, log_dir=None): else: time_data["time_taken"] = "N/A" + if reboot_oper and not isinstance(reboot_oper, str): + reboot_oper = type(reboot_oper).__name__ get_data_plane_report(analyze_result, reboot_type, log_dir, reboot_oper) - result_summary = get_report_summary(analyze_result, reboot_type) + result_summary = get_report_summary(duthost, analyze_result, reboot_type, reboot_oper, base_os_version) logging.info(json.dumps(analyze_result, indent=4)) logging.info(json.dumps(result_summary, indent=4)) - if reboot_oper and not isinstance(reboot_oper, str): - reboot_oper = type(reboot_oper).__name__ if reboot_oper: report_file_name = request.node.name + "_" + reboot_oper + "_report.json" summary_file_name = request.node.name + "_" + reboot_oper + "_summary.json" @@ -485,13 +528,18 @@ def post_reboot_analysis(marker, reboot_oper=None, log_dir=None): json.dump(result_summary, fp, indent=4) # After generating timing data report, do some checks on the timing data - verify_mac_jumping(test_name, analyze_result) + verification_errors = list() + verify_mac_jumping(test_name, analyze_result, verification_errors) + if duthost.facts['platform'] != 'x86_64-kvm_x86_64-r0': + # TBD: expand this verification to KVM - extra port events in KVM which need to be filtered + verify_required_events(duthost, event_counters, analyze_result, verification_errors) + return verification_errors yield pre_reboot_analysis, post_reboot_analysis @pytest.fixture() -def advanceboot_neighbor_restore(duthosts, rand_one_dut_hostname, nbrhosts, tbinfo): +def advanceboot_neighbor_restore(duthosts, enum_rand_one_per_hwsku_frontend_hostname, nbrhosts, tbinfo): """ This fixture is invoked at the test teardown for advanced-reboot SAD cases. If a SAD case fails or crashes for some reason, the neighbor VMs can be left in @@ -499,13 +547,13 @@ def advanceboot_neighbor_restore(duthosts, rand_one_dut_hostname, nbrhosts, tbin and BGP sessions that were shutdown during the test. """ yield - duthost = duthosts[rand_one_dut_hostname] + duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] neighbor_vm_restore(duthost, nbrhosts, tbinfo) @pytest.fixture() -def capture_interface_counters(duthosts, rand_one_dut_hostname): - duthost = duthosts[rand_one_dut_hostname] +def capture_interface_counters(duthosts, enum_rand_one_per_hwsku_frontend_hostname): + duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] logging.info("Run commands to print logs") show_counter_cmds = [ @@ -538,7 +586,7 @@ def capture_interface_counters(duthosts, rand_one_dut_hostname): res.pop('stdout') res.pop('stderr') outputs.append(res) - logging.info("Counters before reboot test: dut={}, cmd_outputs={}".format(duthost.hostname,json.dumps(outputs, indent=4))) + logging.debug("Counters before reboot test: dut={}, cmd_outputs={}".format(duthost.hostname,json.dumps(outputs, indent=4))) yield @@ -548,7 +596,8 @@ def capture_interface_counters(duthosts, rand_one_dut_hostname): res.pop('stdout') res.pop('stderr') outputs.append(res) - logging.info("Counters after reboot test: dut={}, cmd_outputs={}".format(duthost.hostname,json.dumps(outputs, indent=4))) + logging.debug("Counters after reboot test: dut={}, cmd_outputs={}".format(duthost.hostname,json.dumps(outputs, indent=4))) + def pytest_generate_tests(metafunc): diff --git a/tests/platform_tests/daemon/test_pcied.py b/tests/platform_tests/daemon/test_pcied.py index c53a207e1a9..b423f02357e 100644 --- a/tests/platform_tests/daemon/test_pcied.py +++ b/tests/platform_tests/daemon/test_pcied.py @@ -71,10 +71,16 @@ def check_daemon_status(duthosts, rand_one_dut_hostname): duthost.start_pmon_daemon(daemon_name) time.sleep(10) +def check_pcie_devices_table_ready(duthost): + if duthost.shell("redis-cli -n 6 keys '*' | grep PCIE_DEVICES"): + return True + return False + @pytest.fixture(scope="module", autouse=True) def get_pcie_devices_tbl_key(duthosts,rand_one_dut_hostname): duthost = duthosts[rand_one_dut_hostname] skip_release(duthost, ["201811", "201911"]) + pytest_assert(wait_until(30, 10, 0, check_pcie_devices_table_ready, duthost), "PCIE_DEVICES table is empty") command_output = duthost.shell("redis-cli -n 6 keys '*' | grep PCIE_DEVICES") global pcie_devices_status_tbl_key @@ -92,12 +98,15 @@ def collect_data(duthost): dev_summary_status = duthost.get_pmon_daemon_db_value(pcie_devices_status_tbl_key, status_field) return {'status': dev_summary_status, 'devices': dev_data} -def wait_data(duthost): +def wait_data(duthost, expected_key_count): class shared_scope: data_after_restart = {} def _collect_data(): shared_scope.data_after_restart = collect_data(duthost) - return bool(shared_scope.data_after_restart['devices']) + device_keys_found = len(shared_scope.data_after_restart['devices']) + if device_keys_found != 0: + logger.info("Expected PCIE device keys :{}, Current device key count {}".format(expected_key_count, device_keys_found)) + return device_keys_found == expected_key_count pcied_pooling_interval = 60 wait_until(pcied_pooling_interval, 6, 0, _collect_data) return shared_scope.data_after_restart @@ -160,7 +169,7 @@ def test_pmon_pcied_stop_and_start_status(check_daemon_status, duthosts, rand_on pytest_assert(post_daemon_pid > pre_daemon_pid, "Restarted {} pid should be bigger than {} but it is {}".format(daemon_name, pre_daemon_pid, post_daemon_pid)) - data_after_restart = wait_data(duthost) + data_after_restart = wait_data(duthost, len(data_before_restart['devices'])) pytest_assert(data_after_restart == data_before_restart, 'DB data present before and after restart does not match') @@ -190,7 +199,7 @@ def test_pmon_pcied_term_and_start_status(check_daemon_status, duthosts, rand_on "{} expected pid is -1 but is {}".format(daemon_name, post_daemon_pid)) pytest_assert(post_daemon_pid > pre_daemon_pid, "Restarted {} pid should be bigger than {} but it is {}".format(daemon_name, pre_daemon_pid, post_daemon_pid)) - data_after_restart = wait_data(duthost) + data_after_restart = wait_data(duthost, len(data_before_restart['devices'])) pytest_assert(data_after_restart == data_before_restart, 'DB data present before and after restart does not match') @@ -217,5 +226,5 @@ def test_pmon_pcied_kill_and_start_status(check_daemon_status, duthosts, rand_on "{} expected pid is -1 but is {}".format(daemon_name, post_daemon_pid)) pytest_assert(post_daemon_pid > pre_daemon_pid, "Restarted {} pid should be bigger than {} but it is {}".format(daemon_name, pre_daemon_pid, post_daemon_pid)) - data_after_restart = wait_data(duthost) + data_after_restart = wait_data(duthost, len(data_before_restart['devices'])) pytest_assert(data_after_restart == data_before_restart, 'DB data present before and after restart does not match') diff --git a/tests/platform_tests/daemon/test_psud.py b/tests/platform_tests/daemon/test_psud.py index 61a56e6bf64..7888d27b68b 100644 --- a/tests/platform_tests/daemon/test_psud.py +++ b/tests/platform_tests/daemon/test_psud.py @@ -169,7 +169,7 @@ def test_pmon_psud_term_and_start_status(check_daemon_status, duthosts, rand_one duthost.stop_pmon_daemon(daemon_name, SIG_TERM, pre_daemon_pid) - wait_until(50, 10, 0, check_expected_daemon_status, duthost, expected_running_status) + wait_until(50, 10, 5, check_expected_daemon_status, duthost, expected_running_status) post_daemon_status, post_daemon_pid = duthost.get_pmon_daemon_status(daemon_name) pytest_assert(post_daemon_status == expected_running_status, diff --git a/tests/platform_tests/daemon/test_syseepromd.py b/tests/platform_tests/daemon/test_syseepromd.py index 2d74ad1f5a0..1152a2dd039 100644 --- a/tests/platform_tests/daemon/test_syseepromd.py +++ b/tests/platform_tests/daemon/test_syseepromd.py @@ -152,7 +152,10 @@ def test_pmon_syseepromd_stop_and_start_status(check_daemon_status, duthosts, ra "Restarted {} pid should be bigger than {} but it is {}".format(daemon_name, pre_daemon_pid, post_daemon_pid)) data_after_restart = wait_data(duthost) - pytest_assert(data_after_restart == data_before_restart, 'DB data present before and after restart does not match') + pytest_assert( + data_after_restart['data'] == data_before_restart['data'], + 'DB data present before and after restart does not match, data_after_restart {}, data_before_restart {}' + .format(data_after_restart['data'], data_before_restart['data'])) def test_pmon_syseepromd_term_and_start_status(check_daemon_status, duthosts, rand_one_dut_hostname, data_before_restart): @@ -178,7 +181,10 @@ def test_pmon_syseepromd_term_and_start_status(check_daemon_status, duthosts, ra pytest_assert(post_daemon_pid > pre_daemon_pid, "Restarted {} pid should be bigger than {} but it is {}".format(daemon_name, pre_daemon_pid, post_daemon_pid)) data_after_restart = wait_data(duthost) - pytest_assert(data_after_restart == data_before_restart, 'DB data present before and after restart does not match') + pytest_assert( + data_after_restart['data'] == data_before_restart['data'], + 'DB data present before and after restart does not match, data_after_restart {}, data_before_restart {}' + .format(data_after_restart['data'], data_before_restart['data'])) def test_pmon_syseepromd_kill_and_start_status(check_daemon_status, duthosts, rand_one_dut_hostname, data_before_restart): @@ -205,4 +211,7 @@ def test_pmon_syseepromd_kill_and_start_status(check_daemon_status, duthosts, ra pytest_assert(post_daemon_pid > pre_daemon_pid, "Restarted {} pid should be bigger than {} but it is {}".format(daemon_name, pre_daemon_pid, post_daemon_pid)) data_after_restart = wait_data(duthost) - pytest_assert(data_after_restart == data_before_restart, 'DB data present before and after restart does not match') + pytest_assert( + data_after_restart['data'] == data_before_restart['data'], + 'DB data present before and after restart does not match, data_after_restart {}, data_before_restart {}' + .format(data_after_restart['data'], data_before_restart['data'])) diff --git a/tests/platform_tests/daemon/test_thermalctld.py b/tests/platform_tests/daemon/test_thermalctld.py deleted file mode 100644 index f7f5ea1fc21..00000000000 --- a/tests/platform_tests/daemon/test_thermalctld.py +++ /dev/null @@ -1,208 +0,0 @@ -""" -Check daemon status inside PMON container. Each daemon status is checked under the conditions below in this script: -* Daemon Running Status -* Daemon Stop status -* Daemon Restart status - -This script is to cover the test case in the SONiC platform daemon and service test plan: -https://github.com/Azure/sonic-mgmt/blob/master/docs/testplan/PMON-Services-Daemons-test-plan.md -""" -import logging -import re -import time - -from datetime import datetime - -import pytest - -from tests.common.helpers.assertions import pytest_assert -from tests.common.platform.daemon_utils import check_pmon_daemon_enable_status -from tests.common.platform.processes_utils import wait_critical_processes, check_critical_processes -from tests.common.utilities import compose_dict_from_cli, skip_release, wait_until - -logger = logging.getLogger(__name__) - -pytestmark = [ - pytest.mark.topology('any'), - pytest.mark.sanity_check(skip_sanity=True), - pytest.mark.disable_loganalyzer -] - -expected_running_status = "RUNNING" -expected_stopped_status = "STOPPED" -expected_exited_status = "EXITED" - -daemon_name = "thermalctld" - -SIG_STOP_SERVICE = None -SIG_TERM = "-15" -SIG_KILL = "-9" - -STATE_DB = 6 -thermalctld_tbl_key = "" - -@pytest.fixture(scope="module", autouse=True) -def setup(duthosts, rand_one_dut_hostname): - duthost = duthosts[rand_one_dut_hostname] - daemon_en_status = check_pmon_daemon_enable_status(duthost, daemon_name) - if daemon_en_status is False: - pytest.skip("{} is not enabled in {}".format(daemon_name, duthost.facts['platform'], duthost.os_version)) - - -@pytest.fixture(scope="module", autouse=True) -def teardown_module(duthosts, rand_one_dut_hostname): - duthost = duthosts[rand_one_dut_hostname] - yield - - daemon_status, daemon_pid = duthost.get_pmon_daemon_status(daemon_name) - if daemon_status is not "RUNNING": - duthost.start_pmon_daemon(daemon_name) - time.sleep(10) - logger.info("Tearing down: to make sure all the critical services, interfaces and transceivers are good") - check_critical_processes(duthost, watch_secs=10) - - -@pytest.fixture -def check_daemon_status(duthosts, rand_one_dut_hostname): - duthost = duthosts[rand_one_dut_hostname] - daemon_status, daemon_pid = duthost.get_pmon_daemon_status(daemon_name) - if daemon_status is not "RUNNING": - duthost.start_pmon_daemon(daemon_name) - time.sleep(10) - -def check_expected_daemon_status(duthost, expected_daemon_status): - daemon_status, _ = duthost.get_pmon_daemon_status(daemon_name) - return daemon_status == expected_daemon_status - -def collect_data(duthost): - keys = duthost.shell('sonic-db-cli STATE_DB KEYS "PHYSICAL_ENTITY_INFO|*"')['stdout_lines'] - - dev_data = {} - for k in keys: - data = duthost.shell('sonic-db-cli STATE_DB HGETALL "{}"'.format(k))['stdout_lines'] - data = compose_dict_from_cli(data) - dev_data[k] = data - - return {'keys': keys, 'data': dev_data} - -def wait_data(duthost): - class shared_scope: - data_after_restart = {} - def _collect_data(): - shared_scope.data_after_restart = collect_data(duthost) - return bool(shared_scope.data_after_restart['data']) - thermalctld_pooling_interval = 60 - wait_until(thermalctld_pooling_interval, 6, 0, _collect_data) - return shared_scope.data_after_restart - -@pytest.fixture(scope='module') -def data_before_restart(duthosts, rand_one_dut_hostname): - duthost = duthosts[rand_one_dut_hostname] - - data = collect_data(duthost) - return data - - -def test_pmon_thermalctld_running_status(duthosts, rand_one_dut_hostname, data_before_restart): - """ - @summary: This test case is to check thermalctld status on dut - """ - duthost = duthosts[rand_one_dut_hostname] - daemon_status, daemon_pid = duthost.get_pmon_daemon_status(daemon_name) - logger.info("{} daemon is {} with pid {}".format(daemon_name, daemon_status, daemon_pid)) - pytest_assert(daemon_status == expected_running_status, - "{} expected running status is {} but is {}".format(daemon_name, expected_running_status, daemon_status)) - pytest_assert(daemon_pid != -1, - "{} expected pid is a positive integer but is {}".format(daemon_name, daemon_pid)) - - pytest_assert(data_before_restart['keys'], "DB keys is not availale on daemon running") - pytest_assert(data_before_restart['data'], "DB data is not availale on daemon running") - - -def test_pmon_thermalctld_stop_and_start_status(check_daemon_status, duthosts, rand_one_dut_hostname, data_before_restart): - """ - @summary: This test case is to check the thermalctld stopped and restarted status - """ - duthost = duthosts[rand_one_dut_hostname] - pre_daemon_status, pre_daemon_pid = duthost.get_pmon_daemon_status(daemon_name) - logger.info("{} daemon is {} with pid {}".format(daemon_name, pre_daemon_status, pre_daemon_pid)) - - duthost.stop_pmon_daemon(daemon_name, SIG_STOP_SERVICE) - time.sleep(2) - - daemon_status, daemon_pid = duthost.get_pmon_daemon_status(daemon_name) - pytest_assert(daemon_status == expected_stopped_status, - "{} expected stopped status is {} but is {}".format(daemon_name, expected_stopped_status, daemon_status)) - pytest_assert(daemon_pid == -1, - "{} expected pid is -1 but is {}".format(daemon_name, daemon_pid)) - - data = collect_data(duthost) - pytest_assert(not data['keys'], "DB data keys is not cleared on daemon stop") - pytest_assert(not data['data'], "DB data is not cleared on daemon stop") - - duthost.start_pmon_daemon(daemon_name) - time.sleep(10) - - post_daemon_status, post_daemon_pid = duthost.get_pmon_daemon_status(daemon_name) - pytest_assert(post_daemon_status == expected_running_status, - "{} expected restarted status is {} but is {}".format(daemon_name, expected_running_status, post_daemon_status)) - pytest_assert(post_daemon_pid != -1, - "{} expected pid is not -1 but is {}".format(daemon_name, post_daemon_pid)) - pytest_assert(post_daemon_pid > pre_daemon_pid, - "Restarted {} pid should be bigger than {} but it is {}".format(daemon_name, pre_daemon_pid, post_daemon_pid)) - - data_after_restart = wait_data(duthost) - pytest_assert(data_after_restart == data_before_restart, 'DB data present before and after restart does not match') - - -def test_pmon_thermalctld_term_and_start_status(check_daemon_status, duthosts, rand_one_dut_hostname, data_before_restart): - """ - @summary: This test case is to check the thermalctld terminated and restarted status - """ - duthost = duthosts[rand_one_dut_hostname] - - skip_release(duthost, ["201811", "201911"]) - - pre_daemon_status, pre_daemon_pid = duthost.get_pmon_daemon_status(daemon_name) - logger.info("{} daemon is {} with pid {}".format(daemon_name, pre_daemon_status, pre_daemon_pid)) - - duthost.stop_pmon_daemon(daemon_name, SIG_TERM, pre_daemon_pid) - - wait_until(50, 10, 0, check_expected_daemon_status, duthost, expected_running_status) - - post_daemon_status, post_daemon_pid = duthost.get_pmon_daemon_status(daemon_name) - pytest_assert(post_daemon_status == expected_running_status, - "{} expected restarted status is {} but is {}".format(daemon_name, expected_running_status, post_daemon_status)) - pytest_assert(post_daemon_pid != -1, - "{} expected pid is not -1 but is {}".format(daemon_name, post_daemon_pid)) - pytest_assert(post_daemon_pid > pre_daemon_pid, - "Restarted {} pid should be bigger than {} but it is {}".format(daemon_name, pre_daemon_pid, post_daemon_pid)) - data_after_restart = wait_data(duthost) - pytest_assert(data_after_restart == data_before_restart, 'DB data present before and after restart does not match') - - -def test_pmon_thermalctld_kill_and_start_status(check_daemon_status, duthosts, rand_one_dut_hostname, data_before_restart): - """ - @summary: This test case is to check the thermalctld killed unexpectedly (automatically restarted) status - """ - duthost = duthosts[rand_one_dut_hostname] - pre_daemon_status, pre_daemon_pid = duthost.get_pmon_daemon_status(daemon_name) - logger.info("{} daemon is {} with pid {}".format(daemon_name, pre_daemon_status, pre_daemon_pid)) - - duthost.stop_pmon_daemon(daemon_name, SIG_KILL, pre_daemon_pid) - - daemon_status, daemon_pid = duthost.get_pmon_daemon_status(daemon_name) - pytest_assert(daemon_status != expected_running_status, - "{} unexpected killed status is not {}".format(daemon_name, daemon_status)) - - time.sleep(10) - - post_daemon_status, post_daemon_pid = duthost.get_pmon_daemon_status(daemon_name) - pytest_assert(post_daemon_status == expected_running_status, - "{} expected restarted status is {} but is {}".format(daemon_name, expected_running_status, post_daemon_status)) - pytest_assert(post_daemon_pid != -1, - "{} expected pid is not -1 but is {}".format(daemon_name, post_daemon_pid)) - pytest_assert(post_daemon_pid > pre_daemon_pid, - "Restarted {} pid should be bigger than {} but it is {}".format(daemon_name, pre_daemon_pid, post_daemon_pid)) - data_after_restart = wait_data(duthost) - pytest_assert(data_after_restart == data_before_restart, 'DB data present before and after restart does not match') diff --git a/tests/platform_tests/files/valid_policy.json b/tests/platform_tests/files/valid_policy.json index 9343746d374..965f581b44a 100644 --- a/tests/platform_tests/files/valid_policy.json +++ b/tests/platform_tests/files/valid_policy.json @@ -1,6 +1,6 @@ { "thermal_control_algorithm": { - "run_at_boot_up": "false", + "run_at_boot_up": "true", "fan_speed_when_suspend": "60" }, "info_types": [ @@ -23,10 +23,6 @@ } ], "actions": [ - { - "type": "thermal_control.control", - "status": "false" - }, { "type": "fan.all.set_speed", "speed": "100" @@ -41,10 +37,6 @@ } ], "actions": [ - { - "type": "thermal_control.control", - "status": "false" - }, { "type": "fan.all.set_speed", "speed": "100" diff --git a/tests/platform_tests/fwutil/conftest.py b/tests/platform_tests/fwutil/conftest.py index 8e1f82ecc29..43486debf29 100644 --- a/tests/platform_tests/fwutil/conftest.py +++ b/tests/platform_tests/fwutil/conftest.py @@ -2,29 +2,32 @@ import json import pytest import logging - +import os from random import randrange - +import subprocess from fwutil_common import show_firmware logger = logging.getLogger(__name__) -DUT_HOME="/home/admin" -DEVICES_PATH="/usr/share/sonic/device" +DUT_HOME = "/home/admin" +DEVICES_PATH = "/usr/share/sonic/device" FS_PATH_TEMPLATE = "/host/image-{}/fs.squashfs" FS_RW_TEMPLATE = "/host/image-{}/rw" FS_WORK_TEMPLATE = "/host/image-{}/work" FS_MOUNTPOINT_TEMPLATE = "/tmp/image-{}-fs" OVERLAY_MOUNTPOINT_TEMPLATE = "/tmp/image-{}-overlay" -def check_path_exists(path): - return duthost.stat(path = path)["stat"]["exists"] + +def check_path_exists(duthost, path): + return duthost.stat(path=path)["stat"]["exists"] + def pytest_generate_tests(metafunc): val = metafunc.config.getoption('--fw-pkg') - if 'fw_pkg_name' in metafunc.fixturenames and val is not None: + if 'fw_pkg_name' in metafunc.fixturenames: metafunc.parametrize('fw_pkg_name', [val], scope="module") + @pytest.fixture(scope='module') def fw_pkg(fw_pkg_name): if fw_pkg_name is None: @@ -32,8 +35,8 @@ def fw_pkg(fw_pkg_name): logger.info("Unpacking firmware package to ./firmware") try: os.mkdir("firmware") - except Exception as e: - pass # Already exists, thats fine + except OSError: + pass # Already exists, thats fine with tarfile.open(fw_pkg_name, "r:gz") as f: f.extractall("./firmware/") with open('./firmware/firmware.json', 'r') as fw: @@ -41,6 +44,7 @@ def fw_pkg(fw_pkg_name): yield fw_data subprocess.call("rm -rf firmware", shell=True) + @pytest.fixture(scope='function') def random_component(duthost, fw_pkg): chass = show_firmware(duthost)["chassis"].keys()[0] @@ -48,19 +52,19 @@ def random_component(duthost, fw_pkg): if len(components) == 0: pytest.skip("No suitable components found in config file for platform {}.".format(duthost.facts['platform'])) + return components[randrange(len(components))] - return components[randrange(len(components))] @pytest.fixture(scope='function') def host_firmware(localhost, duthost): logger.info("Starting local python server to test URL firmware update....") - comm = "python3 -m http.server --directory {}".format(os.path.join(DEVICES_PATH, - duthost.facts['platform'])) + comm = "python3 -m http.server --directory {}".format(os.path.join(DEVICES_PATH, duthost.facts['platform'])) duthost.command(comm, module_ignore_errors=True, module_async=True) yield "http://localhost:8000/" logger.info("Stopping local python server.") duthost.command('pkill -f "{}"'.format(comm), module_ignore_errors=True) + @pytest.fixture(scope='function') def next_image(duthost, fw_pkg): @@ -90,6 +94,7 @@ def next_image(duthost, fw_pkg): overlay_mountpoint = OVERLAY_MOUNTPOINT_TEMPLATE.format(target) logger.info("Attempting to stage test firware onto newly-installed image.") + # noinspection PyBroadException try: wait_until(10, 1, 0, check_path_exists, fs_rw) @@ -105,12 +110,11 @@ def next_image(duthost, fw_pkg): overlay_mountpoint ) duthost.command(cmd) - except Exception as e: + except Exception: pytest.fail("Failed to setup next-image.") duthost.command("sonic_installer set-default {}".format(current)) yield overlay_mountpoint logger.info("Ensuring correct image is set to default boot.") - duthost.command("sonic_installer set-default {}".format(current)) - + duthost.command("sonic-installer remove {} -y".format("SONiC-OS-{}".format(target))) diff --git a/tests/platform_tests/fwutil/fwutil_common.py b/tests/platform_tests/fwutil/fwutil_common.py index cb6c668c5b0..f3497de284c 100644 --- a/tests/platform_tests/fwutil/fwutil_common.py +++ b/tests/platform_tests/fwutil/fwutil_common.py @@ -1,7 +1,10 @@ +import time import pytest import os import json import logging +import allure +import re from copy import deepcopy @@ -14,20 +17,28 @@ POWER_CYCLE = "power off" FAST_REBOOT = "fast" -DEVICES_PATH="usr/share/sonic/device" -TIMEOUT=3600 +DEVICES_PATH = "usr/share/sonic/device" +TIMEOUT = 3600 REBOOT_TYPES = { COLD_REBOOT: "reboot", WARM_REBOOT: "warm-reboot", FAST_REBOOT: "fast-reboot" } + def find_pattern(lines, pattern): for line in lines: if pattern.match(line): return True return False + +def get_hw_revision(duthost): + out = duthost.command("show platform summary") + rev_line = out["stdout"].splitlines()[6] + return rev_line.split(": ")[1] + + def power_cycle(duthost=None, pdu_ctrl=None, delay_time=60): if pdu_ctrl is None: pytest.skip("No PSU controller for %s, skipping" % duthost.hostname) @@ -42,17 +53,21 @@ def power_cycle(duthost=None, pdu_ctrl=None, delay_time=60): for outlet in all_outlets: pdu_ctrl.turn_on_outlet(outlet) + def reboot(duthost, pdu_ctrl, reboot_type, pdu_delay=60): - if reboot_type == POWER_CYCLE: + if reboot_type == POWER_CYCLE: power_cycle(duthost, pdu_ctrl, pdu_delay) return - if reboot_type not in REBOOT_TYPES: pytest.fail("Invalid reboot type {}".format(reboot_type)) + if reboot_type not in REBOOT_TYPES: + pytest.fail("Invalid reboot type {}".format(reboot_type)) logger.info("Rebooting using {}".format(reboot_type)) duthost.command(REBOOT_TYPES[reboot_type], module_ignore_errors=True, module_async=True) -def complete_install(duthost, localhost, boot_type, res, pdu_ctrl, auto_reboot=False, current=None, next_image=None, timeout=TIMEOUT, pdu_delay=60): + +def complete_install(duthost, localhost, boot_type, res, pdu_ctrl, auto_reboot=False, current=None, next_image=None, + timeout=TIMEOUT, pdu_delay=60): hn = duthost.mgmt_ip if boot_type != "none": @@ -62,7 +77,7 @@ def complete_install(duthost, localhost, boot_type, res, pdu_ctrl, auto_reboot=F logger.info("Rebooting switch using {} boot".format(boot_type)) duthost.command("sonic-installer set-default {}".format(current)) reboot(duthost, pdu_ctrl, boot_type, pdu_delay) - + logger.info("Waiting on switch to shutdown...") # Wait for ssh flap localhost.wait_for(host=hn, port=22, state='stopped', delay=10, timeout=timeout) @@ -87,12 +102,12 @@ def complete_install(duthost, localhost, boot_type, res, pdu_ctrl, auto_reboot=F wait_until(300, 30, 0, duthost.critical_services_fully_started) time.sleep(60) + def show_firmware(duthost): out = duthost.command("fwutil show status") - num_spaces = 2 curr_chassis = "" - output_data = {"chassis":{}} + output_data = {"chassis": {}} status_output = out['stdout'] separators = re.split(r'\s{2,}', status_output.splitlines()[1]) # get separators output_lines = status_output.splitlines()[2:] @@ -114,26 +129,48 @@ def show_firmware(duthost): return output_data -def get_install_paths(duthost, fw, versions, chassis): - component = fw["chassis"].get(chassis, {})["component"] + + +def get_install_paths(duthost, defined_fw, versions, chassis, target_component): + component = get_defined_components(duthost, defined_fw, chassis) ver = versions["chassis"].get(chassis, {})["component"] - + paths = {} for comp, revs in component.items(): if comp in ver: if revs[0].get("upgrade_only", False) and ver[comp] not in [r["version"] for r in revs]: - log.warning("Firmware is upgrade only and existing firmware {} is not present in version list. Skipping {}".format(ver[comp], comp)) + logger.warning("Firmware is upgrade only and existing firmware {} is not present in version list. " + "Skipping {}".format(ver[comp], comp)) continue for i, rev in enumerate(revs): + if "hw_revision" in rev and rev["hw_revision"] != get_hw_revision(duthost): + logger.warning("Firmware {} only supports HW Revision {} and this chassis is {}. Skipping". + format(rev["version"], rev["hw_revision"], get_hw_revision(duthost))) + continue if rev["version"] != ver[comp]: paths[comp] = rev break elif rev.get("upgrade_only", False): - log.warning("Firmware is upgrade only and newer version than {} is not available. Skipping {}".format(ver[comp], comp)) + logger.warning("Firmware is upgrade only and newer version than {} is not available. Skipping {}". + format(ver[comp], comp)) break return paths + +def get_defined_components(duthost, defined_fw, chassis): + """ + Update the component content, in case there is a pre-definition for a specific host. + Sometimes, if there is some DUTs has specific component(for example a respined board which requires + a different CPLD) - it can be defined in the firmware.json file + """ + component = defined_fw["chassis"].get(chassis, {})["component"] + if "host" in defined_fw and duthost.hostname in defined_fw["host"]: + for component_type in defined_fw["host"][duthost.hostname]["component"].keys(): + component[component_type] = defined_fw["host"][duthost.hostname]["component"][component_type] + return component + + def generate_config(duthost, cfg, versions): valid_keys = ["firmware", "version"] chassis = versions["chassis"].keys()[0] @@ -143,40 +180,36 @@ def generate_config(duthost, cfg, versions): for comp in versions["chassis"][chassis]["component"].keys(): paths[comp] = paths.get(comp, {}) if "firmware" in paths[comp]: - paths[comp]["firmware"] = os.path.join("/", DEVICES_PATH, - duthost.facts["platform"], - os.path.basename(paths[comp]["firmware"])) + paths[comp]["firmware"] = os.path.join("/", DEVICES_PATH, duthost.facts["platform"], + os.path.basename(paths[comp]["firmware"])) # Populate items we are installing with open("platform_components.json", "w") as f: - json.dump({"chassis":{chassis:{"component":{comp:{k: v - for k, v in dat.items() - if k in valid_keys} - for comp, dat in paths.items()}}}}, f, indent=4) + json.dump({"chassis": {chassis: {"component": {comp: {k: v for k, v in dat.items() if k in valid_keys} + for comp, dat in paths.items()}}}}, f, indent=4) + def upload_platform(duthost, paths, next_image=None): target = next_image if next_image else "/" # Backup the original platform_components.json file - duthost.fetch(dest=os.path.join("firmware", "platform_components_backup.json"), - src=os.path.join(target, DEVICES_PATH, duthost.facts["platform"], "platform_components.json"), - flat=True) + duthost.fetch(dest=os.path.join("firmware", "platform_components_backup.json"), + src=os.path.join(target, DEVICES_PATH, duthost.facts["platform"], "platform_components.json"), + flat=True) logger.info("Backing up platform_components.json") # Copy over the platform_components.json file - duthost.copy(src="platform_components.json", - dest=os.path.join(target, DEVICES_PATH, duthost.facts["platform"])) + duthost.copy(src="platform_components.json", dest=os.path.join(target, DEVICES_PATH, duthost.facts["platform"])) logger.info("Copying platform_components.json to {}".format( os.path.join(target, DEVICES_PATH, duthost.facts["platform"]))) for comp, dat in paths.items(): - duthost.copy(src=os.path.join("firmware", dat["firmware"]), - dest=os.path.join(target, DEVICES_PATH, duthost.facts["platform"])) - if "install" in dat: - duthost.copy(src=os.path.join("firmware", dat["install"]["firmware"]), - dest=os.path.join(target, DEVICES_PATH, duthost.facts["platform"])) - logger.info("Copying {} to {}".format(os.path.join("firmware", dat["install"]["firmware"]), - os.path.join(target, DEVICES_PATH, duthost.facts["platform"]))) + if dat["firmware"].startswith("http"): + duthost.get_url(url=dat["firmware"], dest=os.path.join(target, DEVICES_PATH, duthost.facts["platform"])) + else: + duthost.copy(src=os.path.join("firmware", dat["firmware"]), + dest=os.path.join(target, DEVICES_PATH, duthost.facts["platform"])) + def validate_versions(init, final, config, chassis, boot): final = final["chassis"][chassis]["component"] @@ -188,13 +221,21 @@ def validate_versions(init, final, config, chassis, boot): return False return True -def call_fwutil(duthost, localhost, pdu_ctrl, fw, component=None, next_image=None, boot=None, basepath=None): - logger.info("Calling fwutil with component: {} | next_image: {} | boot: {} | basepath: {}".format(component, next_image, boot, basepath)) + + +def call_fwutil(duthost, localhost, pdu_ctrl, fw_pkg, component=None, next_image=None, boot=None, basepath=None): + allure.step("Collect firmware versions") + logger.info("Calling fwutil with component: {} | next_image: {} | boot: {} | basepath: {}".format(component, + next_image, + boot, basepath)) init_versions = show_firmware(duthost) logger.info("Initial Versions: {}".format(init_versions)) - chassis = init_versions["chassis"].keys()[0] # Only one chassis - paths = get_install_paths(duthost, fw, init_versions, chassis) + # Only one chassis + chassis = init_versions["chassis"].keys()[0] + paths = get_install_paths(duthost, fw_pkg, init_versions, chassis, component) current = duthost.shell('sonic_installer list | grep Current | cut -f2 -d " "')['stdout'] + if component not in paths: + pytest.skip("No available firmware to install on {}. Skipping".format(component)) generate_config(duthost, paths, init_versions) upload_platform(duthost, paths, next_image) @@ -208,8 +249,6 @@ def call_fwutil(duthost, localhost, pdu_ctrl, fw, component=None, next_image=Non if component is None: command += " all" else: - if component not in paths: - pytest.skip("No available firmware to install on {}. Skipping".format(component)) command += " chassis component {} fw".format(component) if basepath is not None: @@ -237,24 +276,33 @@ def call_fwutil(duthost, localhost, pdu_ctrl, fw, component=None, next_image=Non if paths[comp].get("auto_reboot", False): auto_reboot = True timeout = max([v.get("timeout", TIMEOUT) for k, v in paths.items()]) - pdu_delay = fw["chassis"][chassis].get("power_cycle_delay", 60) + pdu_delay = fw_pkg["chassis"][chassis].get("power_cycle_delay", 60) complete_install(duthost, localhost, boot_type, res, pdu_ctrl, auto_reboot, current, next_image, timeout, pdu_delay) + allure.step("Collect Updated Firmware Versions") + time.sleep(2) # Give a little bit of time in case of no-op install for mounts to complete final_versions = show_firmware(duthost) - assert validate_versions(init_versions, final_versions, paths, chassis, boot_type) - - duthost.copy(src=os.path.join("firmware", "platform_components_backup.json"), - dest=os.path.join(target, DEVICES_PATH, duthost.facts["platform"], "platform_components.json")) - logger.info("Restoring backup platform_components.json to {}".format( - os.path.join(DEVICES_PATH, duthost.facts["platform"]))) - - update_needed = copy(fw) + test_result = validate_versions(init_versions, final_versions, paths, chassis, boot_type) + + allure.step("Begin Switch Restoration") + if next_image is None: + duthost.copy(src=os.path.join("firmware", "platform_components_backup.json"), + dest=os.path.join("/", DEVICES_PATH, duthost.facts["platform"], "platform_components.json")) + logger.info("Restoring backup platform_components.json to {}".format( + os.path.join(DEVICES_PATH, duthost.facts["platform"]))) + + update_needed = deepcopy(fw_pkg) + update_needed["chassis"][chassis]["component"] = {} + defined_components = get_defined_components(duthost, fw_pkg, chassis) + final_components = final_versions["chassis"][chassis]["component"] for comp in paths.keys(): - if fw["chassis"][chassis]["component"][comp][0]["version"] == final_versions[comp] or paths[comp]["upgrade_only"]: - del update_needed["chassis"][chassis]["component"][comp] + if defined_components[comp][0]["version"] != final_components[comp] and \ + boot in defined_components[comp][0]["reboot"] + [None] and \ + not paths[comp].get("upgrade_only", False): + update_needed["chassis"][chassis]["component"][comp] = defined_components[comp] if len(update_needed["chassis"][chassis]["component"].keys()) > 0: logger.info("Latest firmware not installed after test. Installing....") - call_fwutil(duthost, localhost, pdu_ctrl, update_needed, component, None, boot, basepath) - - return True + call_fwutil(duthost, localhost, pdu_ctrl, update_needed, component, None, boot, + os.path.join("/", DEVICES_PATH, duthost.facts['platform']) if basepath is not None else None) + return test_result diff --git a/tests/platform_tests/fwutil/test_fwutil.py b/tests/platform_tests/fwutil/test_fwutil.py index 4a728bae307..cc0f6746092 100644 --- a/tests/platform_tests/fwutil/test_fwutil.py +++ b/tests/platform_tests/fwutil/test_fwutil.py @@ -2,17 +2,20 @@ import pytest import os import json - from fwutil_common import call_fwutil, show_firmware, upload_platform, find_pattern -DEVICES_PATH="/usr/share/sonic/device" +pytestmark = [ + pytest.mark.topology("any") +] + +DEVICES_PATH = "/usr/share/sonic/device" + def test_fwutil_show(duthost): """Tests that fwutil show has all components defined for platform""" - platform_comp = {} duthost.fetch(dest=os.path.join("firmware", "platform_components_backup.json"), - src=os.path.join(DEVICES_PATH, duthost.facts["platform"], "platform_components.json"), - flat=True) + src=os.path.join(DEVICES_PATH, duthost.facts["platform"], "platform_components.json"), + flat=True) with open(os.path.join("firmware", "platform_components_backup.json")) as f: platform_comp = json.load(f) @@ -24,85 +27,96 @@ def test_fwutil_show(duthost): assert show_fw_comp_set == platform_comp_set + def test_fwutil_install_file(duthost, localhost, pdu_controller, fw_pkg, random_component): """Tests manually installing firmware to a component from a file.""" assert call_fwutil(duthost, - localhost, - pdu_controller, - fw_pkg, - component=random_component, - basepath=os.path.join(DEVICES_PATH, duthost.facts['platform'])) + localhost, + pdu_controller, + fw_pkg, + component=random_component, + basepath=os.path.join(DEVICES_PATH, duthost.facts['platform'])) + def test_fwutil_install_url(duthost, localhost, pdu_controller, fw_pkg, random_component, host_firmware): """Tests manually installing firmware to a component from a URL.""" assert call_fwutil(duthost, - localhost, - pdu_controller, - fw_pkg, - component=random_component, - basepath=host_firmware) + localhost, + pdu_controller, + fw_pkg, + component=random_component, + basepath=host_firmware) + -def test_fwutil_install_bad_name(duthost, fw_pkg): +def test_fwutil_install_bad_name(duthost): """Tests that fwutil install validates component names correctly.""" out = duthost.command("fwutil install chassis component BAD fw BAD.pkg", module_ignore_errors=True) pattern = re.compile(r'.*Invalid value for ""*.') - found = find_pattern(out['stderr_lines'], pattern) - assert found + assert find_pattern(out['stderr_lines'], pattern) -def test_fwutil_install_bad_path(duthost, fw_pkg, random_component): + +def test_fwutil_install_bad_path(duthost, random_component): """Tests that fwutil install validates firmware paths correctly.""" - out = duthost.command("fwutil install chassis component {} fw BAD.pkg".format(random_component), module_ignore_errors=True) + out = duthost.command("fwutil install chassis component {} fw BAD.pkg".format(random_component), + module_ignore_errors=True) pattern = re.compile(r'.*Error: Invalid value for ""*.') - found = find_pattern(out['stderr_lines'], pattern) - assert found + assert find_pattern(out['stderr_lines'], pattern) + def test_fwutil_update_current(duthost, localhost, pdu_controller, fw_pkg, random_component): """Tests updating firmware from current image using fwutil update""" assert call_fwutil(duthost, - localhost, - pdu_controller, - fw_pkg, - component=random_component) + localhost, + pdu_controller, + fw_pkg, + component=random_component) + def test_fwutil_update_next(duthost, localhost, pdu_controller, fw_pkg, random_component, next_image): """Tests updating firmware from the "next" image using fwutil update""" assert call_fwutil(duthost, - localhost, - pdu_controller, - fw_pkg, - component=random_component, - next_image=next_image) + localhost, + pdu_controller, + fw_pkg, + component=random_component, + next_image=next_image) -def test_fwutil_update_bad_config(duthost, fw_pkg, random_component): + +def test_fwutil_update_bad_config(duthost, random_component): """Tests that fwutil update validates the platform_components.json schema correctly.""" versions = show_firmware(duthost) - chassis = versions["chassis"].keys()[0] # Only one chassis + chassis = versions["chassis"].keys()[0] # Only one chassis # Test fwutil update with config file without chassis section with open("platform_components.json", "w") as f: json.dump({}, f, indent=4) upload_platform(duthost, {}) - out_empty_json = duthost.command("fwutil update chassis component {} fw -y".format(random_component), module_ignore_errors=True) + out_empty_json = duthost.command("fwutil update chassis component {} fw -y".format(random_component), + module_ignore_errors=True) pattern_bad_platform = re.compile(r'.*Error: Failed to parse "platform_components.json": invalid platform schema*.') found_bad_platform = find_pattern(out_empty_json['stdout_lines'], pattern_bad_platform) assert found_bad_platform # Test fwutil update with config file without component section with open("platform_components.json", "w") as f: - json.dump({"chassis":{chassis:{}}}, f, indent=4) + json.dump({"chassis": {chassis: {}}}, f, indent=4) upload_platform(duthost, {}) - out_empty_chassis = duthost.command("fwutil update chassis component {} fw -y".format(random_component), module_ignore_errors=True) + out_empty_chassis = duthost.command("fwutil update chassis component {} fw -y". + format(random_component), module_ignore_errors=True) pattern_bad_chassis = re.compile(r'.*Error: Failed to parse "platform_components.json": invalid chassis schema*.') found_bad_chassis = find_pattern(out_empty_chassis['stdout_lines'], pattern_bad_chassis) assert found_bad_chassis # Test fwutil update with config file with version of type dict with open("platform_components.json", "w") as f: - json.dump({"chassis":{chassis:{"component":{random_component:{"version":{"version":"ver"}}}}}} - , f, indent=4) + json.dump({"chassis": {chassis: {"component": {random_component: {"version": {"version": "ver"}}}}}}, + f, + indent=4) upload_platform(duthost, {}) - out_bad_version = duthost.command("fwutil update chassis component {} fw -y".format(random_component), module_ignore_errors=True) - pattern_bad_component = re.compile(r'.*Error: Failed to parse "platform_components.json": invalid component schema*.') + out_bad_version = duthost.command("fwutil update chassis component {} fw -y".format(random_component), + module_ignore_errors=True) + pattern_bad_component = re.compile(r'.*Error: Failed to parse "platform_components.json": ' + r'invalid component schema*.') found_bad_component = find_pattern(out_bad_version['stdout_lines'], pattern_bad_component) assert found_bad_component @@ -111,8 +125,7 @@ def test_fwutil_update_bad_config(duthost, fw_pkg, random_component): def test_fwutil_auto(duthost, localhost, pdu_controller, fw_pkg, reboot_type): """Tests fwutil update all command ability to properly select firmware for install based on boot type.""" assert call_fwutil(duthost, - localhost, - pdu_controller, - fw_pkg, - reboot=reboot_type) - + localhost, + pdu_controller, + fw_pkg, + boot=reboot_type) diff --git a/tests/platform_tests/link_flap/link_flap_utils.py b/tests/platform_tests/link_flap/link_flap_utils.py index 867d6720410..5523e3aef21 100644 --- a/tests/platform_tests/link_flap/link_flap_utils.py +++ b/tests/platform_tests/link_flap/link_flap_utils.py @@ -218,7 +218,7 @@ def check_bgp_routes(dut, start_time_ipv4_route_counts, start_time_ipv6_route_co """ MAX_DIFF = 5 - sumv4, sumv6 = dut.get_ip_route_summary() + sumv4, sumv6 = dut.get_ip_route_summary(skip_kernel_tunnel=True) totalsv4 = sumv4.get('Totals', {}) totalsv6 = sumv6.get('Totals', {}) routesv4 = totalsv4.get('routes', 0) diff --git a/tests/platform_tests/link_flap/test_cont_link_flap.py b/tests/platform_tests/link_flap/test_cont_link_flap.py index 212cc468a91..ad6a97764a0 100644 --- a/tests/platform_tests/link_flap/test_cont_link_flap.py +++ b/tests/platform_tests/link_flap/test_cont_link_flap.py @@ -55,7 +55,10 @@ def test_cont_link_flap(self, request, duthosts, nbrhosts, enum_rand_one_per_hws logging.info("Redis Memory: %s M", start_time_redis_memory) # Record ipv4 route counts at start - sumv4, sumv6 = duthost.get_ip_route_summary() + sumv4, sumv6 = duthost.get_ip_route_summary(skip_kernel_tunnel=True) + logging.debug("sumv4 {} ".format(sumv4)) + logging.debug("sumv6 {} ".format(sumv6)) + totalsv4 = sumv4.get('Totals', {}) totalsv6 = sumv6.get('Totals', {}) start_time_ipv4_route_counts = totalsv4.get('routes', 0) @@ -86,7 +89,7 @@ def test_cont_link_flap(self, request, duthosts, nbrhosts, enum_rand_one_per_hws # Make Sure all ipv4/ipv6 routes are relearned with jitter of ~5 if not wait_until(120, 2, 0, check_bgp_routes, duthost, start_time_ipv4_route_counts, start_time_ipv6_route_counts): - endv4, endv6 = duthost.get_ip_route_summary() + endv4, endv6 = duthost.get_ip_route_summary(skip_kernel_tunnel=True) failmsg = [] failmsg.append( "IP routes are not equal after link flap: before ipv4 {} ipv6 {}, after ipv4 {} ipv6 {}".format(sumv4, diff --git a/tests/platform_tests/mellanox/check_sysfs.py b/tests/platform_tests/mellanox/check_sysfs.py index 9b0c09e4a69..6bdb5bc9c5e 100644 --- a/tests/platform_tests/mellanox/check_sysfs.py +++ b/tests/platform_tests/mellanox/check_sysfs.py @@ -129,7 +129,7 @@ def check_sysfs(dut): assert "Invalid PSU fan speed value {} for PSU {}, exception: {}".format(psu_info["fan_speed"], psu_id, e) - if "201911" not in dut.os_version and "202012" not in dut.os_version: + if "201911" not in dut.sonic_release and "202012" not in dut.sonic_release: # Check consistency between voltage capability and sysfs all_capabilities = platform_data["psus"].get("capabilities") if all_capabilities: diff --git a/tests/platform_tests/mellanox/test_check_sfp_eeprom.py b/tests/platform_tests/mellanox/test_check_sfp_eeprom.py new file mode 100644 index 00000000000..7d9c789578e --- /dev/null +++ b/tests/platform_tests/mellanox/test_check_sfp_eeprom.py @@ -0,0 +1,65 @@ +import pytest +import allure + +from tests.common.fixtures.conn_graph_facts import conn_graph_facts # noqa F401 +from util import parse_sfp_eeprom_infos, check_sfp_eeprom_info, is_support_dom, get_pci_cr0_path + +pytestmark = [ + pytest.mark.asic('mellanox'), + pytest.mark.topology('any') +] + +SHOW_EEPOMR_CMDS = ["show interface transceiver eeprom -d", "sudo sfputil show eeprom -d"] + + +@pytest.fixture(scope="module", autouse=True) +def sfp_test_intfs_to_dom_map(duthosts, rand_one_dut_hostname, conn_graph_facts, xcvr_skip_list): # noqa F811 + ''' + This fixture is to get map sfp test intfs to dom + ''' + duthost = duthosts[rand_one_dut_hostname] + + ports_map = duthost.config_facts(host=duthost.hostname, source="running")['ansible_facts']["PORT"] + port_name_to_index_map = dict([(port, value["index"]) for port, value in ports_map.items()]) + + sfp_test_intf_list = conn_graph_facts["device_conn"][duthost.hostname].keys() + + intf_with_dom_dict = {} + sfp_test_intfs_to_dom_map_dict = {} + pic_cr0_path = get_pci_cr0_path(duthost) + for intf in sfp_test_intf_list: + if intf not in xcvr_skip_list[duthost.hostname]: + port_index = port_name_to_index_map[intf] + if port_index in intf_with_dom_dict: + inft_support_dom = intf_with_dom_dict[port_index] + else: + inft_support_dom = is_support_dom(duthost, port_index, pic_cr0_path) + intf_with_dom_dict[port_index] = inft_support_dom + sfp_test_intfs_to_dom_map_dict[intf] = inft_support_dom + + return sfp_test_intfs_to_dom_map_dict + + +@pytest.mark.parametrize("show_eeprom_cmd", SHOW_EEPOMR_CMDS) +def test_check_sfp_eeprom_with_option_dom(duthosts, rand_one_dut_hostname, show_eeprom_cmd, sfp_test_intfs_to_dom_map): + """This test case is to check result of transceiver eeprom with option -d is correct or not for every interface . + It will do below checks for every available interface + 1. Check if all expected keys exist in the the result + 2. When cable support dom, check the corresponding keys related to monitor exist, + and the the corresponding value has correct format + """ + duthost = duthosts[rand_one_dut_hostname] + + with allure.step("Run: {} to get transceiver eeprom info".format(show_eeprom_cmd)): + check_eeprom_dom_output = duthost.command(show_eeprom_cmd) + assert check_eeprom_dom_output["rc"] == 0, "Failed to read eeprom info for all interfaces" + sfp_info_dict = parse_sfp_eeprom_infos(check_eeprom_dom_output["stdout"]) + + with allure.step("Check results for {}".format(show_eeprom_cmd)): + for intf, inft_support_dom in sfp_test_intfs_to_dom_map.items(): + if intf in sfp_info_dict: + with allure.step("Check {}".format(intf)): + if sfp_info_dict[intf] == "SFP EEPROM Not detected": + allure.step("{}: SFP EEPROM Not detected".format(intf)) + continue + check_sfp_eeprom_info(duthost, sfp_info_dict[intf], inft_support_dom, show_eeprom_cmd) diff --git a/tests/platform_tests/mellanox/util.py b/tests/platform_tests/mellanox/util.py new file mode 100644 index 00000000000..6efc12f2ac4 --- /dev/null +++ b/tests/platform_tests/mellanox/util.py @@ -0,0 +1,372 @@ +import re +import logging +import ast + +pattern_top_layer_key_value = r"^(?PEthernet\d+):(?P.*)" +pattern_second_layer_key_value = r"(^\s{8}|\t{1})(?P[a-zA-Z0-9][a-zA-Z0-9\s\/\(\)-]+):(?P.*)" +pattern_third_layer_key_value = r"(^\s{16}|\t{2})(?P[a-zA-Z0-9][a-zA-Z0-9\s\/]+):(?P.*)" + +pattern_digit_unit = r"^(?P-[0-9\.]+|[0-9.]+)(?PdBm|mA|C|c|Volts)" + + +def parse_one_sfp_eeprom_info(sfp_eeprom_info): + """ + Parse the one sfp eeprom info, return top_key, sfp_eeprom_info_dict + e.g + sfp_info: + Ethernet0: SFP EEPROM detected + Application Advertisement: N/A + Connector: No separable connector + Encoding: 64B/66B + Extended Identifier: Power Class 3 Module (2.5W max.), + No CLEI code present in Page 02h, CDR present in TX, CDR present in RX + Extended RateSelect Compliance: Unknown + Identifier: QSFP28 or later + Length Cable Assembly(m): 3.0 + Nominal Bit Rate(100Mbs): 255 + Specification compliance: + 10/40G Ethernet Compliance Code: Extended + Extended Specification Compliance: 100G AOC (Active Optical Cable) or 25GAUI C2M AOC + Fibre Channel Link Length: Unknown + Fibre Channel Speed: Unknown + Fibre Channel Transmission Media: Unknown + Fibre Channel Transmitter Technology: Unknown + Gigabit Ethernet Compliant Codes: Unknown + SAS/SATA Compliance Codes: Unknown + SONET Compliance Codes: Unknown + Vendor Date Code(YYYY-MM-DD Lot): 2019-01-17 + Vendor Name: Mellanox + Vendor OUI: 00-02-c9 + Vendor PN: MFA1A00-C003 + Vendor Rev: B2 + Vendor SN: MT1903FT05965 + ChannelMonitorValues: + RX1Power: 0.927dBm + RX2Power: 0.938dBm + RX3Power: 0.912dBm + RX4Power: 0.95dBm + TX1Bias: 6.75mA + TX1Power: 1.071dBm + TX2Bias: 6.75mA + TX2Power: 1.04dBm + TX3Bias: 6.75mA + TX3Power: 1.039dBm + TX4Bias: 6.75mA + TX4Power: 1.031dBm + ChannelThresholdValues: + RxPowerHighAlarm : 5.4dBm + RxPowerHighWarning: 2.4dBm + RxPowerLowAlarm : -13.307dBm + RxPowerLowWarning : -10.301dBm + TxBiasHighAlarm : 8.5mA + TxBiasHighWarning : 8.0mA + TxBiasLowAlarm : 5.492mA + TxBiasLowWarning : 6.0mA + ModuleMonitorValues: + Temperature: 43.105C + Vcc: 3.235Volts + ModuleThresholdValues: + TempHighAlarm : 80.0C + TempHighWarning: 70.0C + TempLowAlarm : -10.0C + TempLowWarning : 0.0C + VccHighAlarm : 3.5Volts + VccHighWarning : 3.465Volts + VccLowAlarm : 3.1Volts + VccLowWarning : 3.135Volts + top_key, sfp_eeprom_info_dict: + Ethernet0, + { + 'Ethernet0': 'SFP EEPROM detected', + 'Application Advertisement': 'N/A', + 'Connector': 'No separable connector', + 'Encoding': '64B/66B', + 'Extended Identifier': 'Power Class 3 Module (2.5W max.), + No CLEI code present in Page 02h, CDR present in TX, CDR present in RX', + 'Extended RateSelect Compliance': 'Unknown', + 'Identifier': 'QSFP28 or later', + 'Length Cable Assembly(m)': '3.0', + 'Nominal Bit Rate(100Mbs)': '255', + 'Specification compliance': { + '10/40G Ethernet Compliance Code': 'Extended', + 'Extended Specification Compliance': '100G AOC (Active Optical Cable) or 25GAUI C2M AOC', + 'Fibre Channel Link Length': 'Unknown', + 'Fibre Channel Speed': 'Unknown', + 'Fibre Channel Transmission Media': 'Unknown', + 'Fibre Channel Transmitter Technology': 'Unknown', + 'Gigabit Ethernet Compliant Codes': 'Unknown', + 'SAS/SATA Compliance Codes': 'Unknown', + 'SONET Compliance Codes': 'Unknown' + }, + 'Vendor Date Code(YYYY-MM-DD Lot)': '2019-01-17', + 'Vendor Name': 'Mellanox', + 'Vendor OUI': '00-02-c9', + 'Vendor PN': 'MFA1A00-C003', + 'Vendor Rev': 'B2', + 'Vendor SN': 'MT1903FT05965', + 'ChannelMonitorValues': { + 'RX1Power': '0.927dBm', + 'RX2Power': '0.938dBm', + 'RX3Power': '0.912dBm', + 'RX4Power': '0.95dBm', + 'TX1Bias': '6.75mA', + 'TX1Power': '1.071dBm', + 'TX2Bias': '6.75mA', + 'TX2Power': '1.04dBm', + 'TX3Bias': '6.75mA', + 'TX3Power': '1.039dBm', + 'TX4Bias': '6.75mA', + 'TX4Power': '1.031dBm' + }, + 'ChannelThresholdValues': { + 'RxPowerHighAlarm': '5.4dBm', + 'RxPowerHighWarning': '2.4dBm', + 'RxPowerLowAlarm': '-13.307dBm', + 'RxPowerLowWarning': '-10.301dBm', + 'TxBiasHighAlarm': '8.5mA', + 'TxBiasHighWarning': '8.0mA', + 'TxBiasLowAlarm': '5.492mA', + 'TxBiasLowWarning': '6.0mA' + }, + 'ModuleMonitorValues': { + 'Temperature': '43.105C', + 'Vcc': '3.235Volts' + }, + 'ModuleThresholdValues': { + 'TempHighAlarm': '80.0C', + 'TempHighWarning': '70.0C', + 'TempLowAlarm': '-10.0C', + 'TempLowWarning': '0.0C', + 'VccHighAlarm': '3.5Volts', + 'VccHighWarning': '3.465Volts', + 'VccLowAlarm': '3.1Volts', + 'VccLowWarning': '3.135Volts' + } + } + """ + one_sfp_eeprom_info_dict = {} + second_layer_dict = {} + previous_key = "" + top_key = "" + for line in sfp_eeprom_info.split("\n"): + res1 = re.match(pattern_top_layer_key_value, line) + if res1: + top_key = res1.groupdict()["key"].strip() + one_sfp_eeprom_info_dict[top_key] = res1.groupdict()["value"].strip() + continue + res2 = re.match(pattern_second_layer_key_value, line) + if res2: + if second_layer_dict and previous_key: + one_sfp_eeprom_info_dict[previous_key] = second_layer_dict + second_layer_dict = {} + one_sfp_eeprom_info_dict[res2.groupdict()["key"].strip()] = res2.groupdict()["value"].strip() + previous_key = res2.groupdict()["key"].strip() + else: + res3 = re.match(pattern_third_layer_key_value, line) + if res3: + second_layer_dict[res3.groupdict()["key"].strip()] = res3.groupdict()["value"].strip() + if second_layer_dict and previous_key: + one_sfp_eeprom_info_dict[previous_key] = second_layer_dict + + return top_key, one_sfp_eeprom_info_dict + + +def parse_sfp_eeprom_infos(eeprom_infos): + """ + This method is to pares sfp eeprom infos, and return sfp_eeprom_info_dict + """ + sfp_eeprom_info_dict = {} + for sfp_info in eeprom_infos.split("\n\n"): + intf, eeprom_info = parse_one_sfp_eeprom_info(sfp_info) + sfp_eeprom_info_dict[intf] = eeprom_info + return sfp_eeprom_info_dict + + +def check_sfp_eeprom_info(duthost, sfp_eeprom_info, is_support_dom, show_eeprom_cmd): + """ + This method is check sfp info is correct or not. + 1. Check if all expected keys exist in the sfp_eeprom_info + 2. Check if Check Vendor name is Mellnaox and Vendor OUI is 00-02-c9 + 3. When cable support dom, check the corresponding keys related to monitor exist, + and the the corresponding value has correct format + """ + logging.info("Check all expected keys exist in sfp info") + expected_keys = set(["Application Advertisement", "Connector", "Encoding", "Extended Identifier", + "Extended RateSelect Compliance", "Identifier", "Nominal Bit Rate(100Mbs)", + "Specification compliance", "Vendor Date Code(YYYY-MM-DD Lot)", + "Vendor Name", "Vendor OUI", "Vendor PN", "Vendor Rev", "Vendor SN", "ChannelMonitorValues", + "ChannelThresholdValues", "ModuleMonitorValues", "ModuleThresholdValues"]) + excluded_keys = set() + if "202012" in duthost.os_version and show_eeprom_cmd == "sudo sfputil show eeprom -d": + if "TypeOfTransceiver" in sfp_eeprom_info and sfp_eeprom_info["TypeOfTransceiver"] == "SFP/SFP+/SFP28": + # There is a bug:https://github.com/sonic-net/sonic-buildimage/issues/12357 + # So for SFP/SFP+/SFP28, we need do special handle + expected_keys = set(["Connector", "EncodingCodes", "ExtIdentOfTypeOfTransceiver", + "NominalSignallingRate(UnitsOf100Mbd)", "RateIdentifier", + "ReceivedPowerMeasurementType", + "VendorDataCode(YYYY-MM-DD Lot)", "TypeOfTransceiver", "TransceiverCodes", + "VendorName", "VendorOUI", "VendorPN", "VendorRev", "VendorSN"]) + is_support_dom = False + else: + excluded_keys = set(["Application Advertisement", "ChannelThresholdValues", "ModuleThresholdValues"]) + expected_keys = expected_keys - excluded_keys + + if "Identifier" in sfp_eeprom_info and sfp_eeprom_info["Identifier"] == "SFP/SFP+/SFP28": + excluded_keys = excluded_keys | {"ChannelMonitorValues", "ChannelThresholdValues", "ModuleMonitorValues", + "ModuleThresholdValues"} + expected_keys = (expected_keys | {"MonitorData", "ThresholdData"}) - excluded_keys + + for key in expected_keys: + assert key in sfp_eeprom_info, "key {} doesn't exist in {}".format(key, sfp_eeprom_info) + + # For the parameter of Length, there are many different value for different cable, + # So here we just check if there is a key including the prefix of Length + is_length_key_exist = False + for key in sfp_eeprom_info.keys(): + if key.startswith("Length"): + is_length_key_exist = True + break + assert is_length_key_exist, "Key related to Length doesn't exist in {} ".format(sfp_eeprom_info) + + if is_support_dom: + pattern_power = r"^(?P-[0-9\.]+|[0-9.]+|-inf)(?PdBm$)" + pattern_bias = r"^(?P-[0-9\.]+|[0-9.]+)(?PmA$)" + pattern_temp = r"^(?P-[0-9\.]+|[0-9.]+)(?P[Cc]$)" + pattern_vcc = r"^(?P-[0-9\.]+|[0-9.]+)(?PVolts$)" + + expected_channel_threshold_values_keys_and_value_pattern = {"RxPowerHighAlarm": pattern_power, + "RxPowerHighWarning": pattern_power, + "RxPowerLowAlarm": pattern_power, + "RxPowerLowWarning": pattern_power, + "TxBiasHighAlarm": pattern_bias, + "TxBiasHighWarning": pattern_bias, + "TxBiasLowAlarm": pattern_bias, + "TxBiasLowWarning": pattern_bias} + expected_module_threshold_values_keys_and_value_pattern = {"TempHighAlarm": pattern_temp, + "TempHighWarning": pattern_temp, + "TempLowAlarm": pattern_temp, + "TempLowWarning": pattern_temp, + "VccHighAlarm": pattern_vcc, + "VccHighWarning": pattern_vcc, + "VccLowAlarm": pattern_vcc, + "VccLowWarning": pattern_vcc} + expected_module_monitor_values_keys_and_pattern = {"Temperature": pattern_temp, "Vcc": pattern_vcc} + + if sfp_eeprom_info["Identifier"] == "SFP/SFP+/SFP28": + expected_monitor_data_keys_and_pattern = {"RXPower": pattern_power, "TXBias": pattern_bias, + "TXPower": pattern_power, "Temperature": pattern_temp, + "Vcc": pattern_vcc} + expected_threshold_data_keys_and_pattern = {"TxPowerHighAlarm": pattern_power, + "TxPowerHighWarning": pattern_power, + "TxPowerLowAlarm": pattern_power, + "TxPowerLowWarning": pattern_power} + expected_threshold_data_keys_and_pattern.update(expected_channel_threshold_values_keys_and_value_pattern) + expected_threshold_data_keys_and_pattern.update(expected_module_threshold_values_keys_and_value_pattern) + + logging.info("Check if MonitorData's keys exist and the corresponding value format is correct") + check_dom_monitor_key_and_data_format(expected_monitor_data_keys_and_pattern, + sfp_eeprom_info["MonitorData"]) + + logging.info("check if ThresholdData's keys exist and the corresponding value format is correct") + check_dom_monitor_key_and_data_format(expected_threshold_data_keys_and_pattern, + sfp_eeprom_info["ThresholdData"]) + + else: + if "ChannelThresholdValues" not in excluded_keys: + logging.info( + "Check if ChannelThresholdValues' keys exist and the corresponding value format is correct") + check_dom_monitor_key_and_data_format(expected_channel_threshold_values_keys_and_value_pattern, + sfp_eeprom_info["ChannelThresholdValues"]) + + if "ModuleThresholdValues" not in excluded_keys: + logging.info("Check if ModuleThresholdValues' keys exist and the corresponding format is correct") + check_dom_monitor_key_and_data_format(expected_module_threshold_values_keys_and_value_pattern, + sfp_eeprom_info["ModuleThresholdValues"]) + + logging.info("Check if ChannelMonitorValues's value format is correct") + for k, v in sfp_eeprom_info["ChannelMonitorValues"].items(): + pattern = pattern_power if "Power" in k else pattern_bias + assert re.match(pattern, v), "Value of {}:{} format is not correct. pattern is {}".format(k, v, pattern) + + logging.info("Check ModuleMonitorValues keys exist and the corresponding value format is correct") + check_dom_monitor_key_and_data_format(expected_module_monitor_values_keys_and_pattern, + sfp_eeprom_info["ModuleMonitorValues"]) + + +def check_dom_monitor_key_and_data_format(expected_keys_and_pattern_dict, dom_monitor_data): + for key, pattern in expected_keys_and_pattern_dict.items(): + assert key in dom_monitor_data, "Key {} doesn't exist in {}".format(key, dom_monitor_data) + assert re.match(pattern, dom_monitor_data[key]), \ + "Value of {}:{} format is not correct. pattern is {}".format( + key, dom_monitor_data[key], pattern) + + +def is_support_dom(duthost, port_index, pic_cr0_path): + """ + This method is to check if cable support dom + 1. For 202012 branch(It not support mlxlink tool, so use get_transceiver_bulk_status to judge if it support dom) + 1) Get get transceiver bulk status + 2) Return True, When any one value for all parameters including power, bias,temperature and voltage is not in + ['N/A', '0.0', 0.0, '0.0000mA', '-inf'], else False. + 2. For other branches apart from 202012 + 1) Get the pci_cro info by mlxlink tool + 2) Return false, when all values of 5 fields + (Temperature|Voltage|Bias Current|Rx Power Current|Tx Power Current) are N/A, else True + """ + if duthost.sonic_release in ["202012"]: + bulk_status_str = get_transceiver_bulk_status(duthost, port_index) + bulk_status_str = bulk_status_str.replace('-inf', '\'-inf\'') + bulk_status_dict = ast.literal_eval(bulk_status_str) + for k, v in bulk_status_dict.items(): + if "power" in k or "bias" in k or "temperature" in k or "voltage" in k: + if v not in ['N/A', '0.0', 0.0, '0.0000mA', '-inf']: + logging.info("Port {} support dom".format(port_index)) + return True + logging.info("Port {} doesn't support dom".format(port_index)) + return False + else: + pattern_for_dom_check = r'^(Temperature|Voltage|Bias Current|Rx Power Current|Tx Power Current).*: N\/A.*' + pci_cr0 = get_mlxlink_pci_cr0(duthost, pic_cr0_path, port_index) + + check_support_dom_filed_number = 5 + not_support_dom_field_counter = 0 + for line in pci_cr0.split("\n"): + res = re.match(pattern_for_dom_check, line) + if res: + not_support_dom_field_counter += 1 + logging.info("Find {} Value is N/A: {}".format(not_support_dom_field_counter, line)) + if not_support_dom_field_counter >= check_support_dom_filed_number: + logging.info("Port {} doesn't support dom".format(port_index)) + return False + logging.info("Port {} support dom".format(port_index)) + return True + + +def get_transceiver_bulk_status(duthost, port_index): + """ + This method is to get transceiver bulk status + """ + cmd = """ +cat << EOF > get_transceiver_bulk_status.py +import sonic_platform.platform as P +info = P.Platform().get_chassis().get_sfp({}).get_transceiver_bulk_status() +print(info) +EOF +""".format(port_index) + duthost.shell(cmd) + return duthost.command("python3 get_transceiver_bulk_status.py")["stdout"] + + +def get_mlxlink_pci_cr0(duthost, pci_cr0_path, port_index): + """ + This method is to get the info of /dev/mst/*_pci_cr0 + """ + cmd = "sudo mlxlink -d {} -p {} -m".format(pci_cr0_path, port_index) + return duthost.command(cmd)["stdout"] + + +def get_pci_cr0_path(duthost): + """ + This method is to get path for /dev/mst/*_pci_cr0 + """ + return duthost.shell('ls /dev/mst/*_pci_cr0')['stdout'].strip() diff --git a/tests/platform_tests/reboot_timing_constants.py b/tests/platform_tests/reboot_timing_constants.py index 36cd9b0a5ab..a7309c3e475 100644 --- a/tests/platform_tests/reboot_timing_constants.py +++ b/tests/platform_tests/reboot_timing_constants.py @@ -1,3 +1,18 @@ +import re + + +REQUIRED_PATTERNS = { + "time_span": [ + "SAI_CREATE_SWITCH", + "INIT_VIEW", + "APPLY_VIEW" + ], + "offset_from_kexec": [ + "LAG_READY", + "PORT_READY" + ] +} + SERVICE_PATTERNS = { "LATEST": { "Stopping": re.compile(r'.*Stopping.*(service|container).*'), @@ -22,9 +37,9 @@ "FPMSYNCD_RECONCILIATION|Start": re.compile(r'.*NOTICE bgp#fpmsyncd: :- main: Warm-Restart timer started.*'), "FPMSYNCD_RECONCILIATION|End": re.compile(r'.*NOTICE bgp#fpmsyncd: :- main: Warm-Restart reconciliation processed.*'), "ROUTE_DEFERRAL_TIMER|Start": re.compile(r'.*ADJCHANGE: neighbor .* in vrf default Up.*'), - "ROUTE_DEFERRAL_TIMER|End": re.compile(r'.*rcvd End-of-RIB for IPv4 Unicast from.*'), + "ROUTE_DEFERRAL_TIMER|End": re.compile(r'.*rcvd End-of-RIB for .* Unicast from.*'), "FDB_AGING_DISABLE|Start": re.compile(r'.*NOTICE swss#orchagent.*setAgingFDB: Set switch.*fdb_aging_time 0 sec'), - "FDB_AGING_DISABLE|End": re.compile(r'.*NOTICE swss#orchagent.*doAppSwitchTableTask: Set switch attribute fdb_aging_time to 600') + "FDB_AGING_DISABLE|End": re.compile(r'.*NOTICE swss#orchagent.*do.*Task: Set switch attribute fdb_aging_time to 600') }, "LATEST": { "INIT_VIEW|Start": re.compile(r'.*swss#orchagent.*notifySyncd.*sending syncd.*INIT_VIEW.*'), @@ -57,7 +72,9 @@ "SAI_CREATE_SWITCH|End": re.compile(r'.*\|g\|SAI_OBJECT_TYPE_SWITCH.*SAI_SWITCH_ATTR_DEFAULT_VIRTUAL_ROUTER_ID.*'), "NEIGHBOR_ENTRY|Start": re.compile(r'.*\|c\|SAI_OBJECT_TYPE_NEIGHBOR_ENTRY.*'), "DEFAULT_ROUTE_SET|Start": re.compile(r'.*\|(S|s)\|SAI_OBJECT_TYPE_ROUTE_ENTRY.*0\.0\.0\.0/0.*SAI_ROUTE_ENTRY_ATTR_PACKET_ACTION=SAI_PACKET_ACTION_FORWARD.*'), - "FDB_RESTORE|Start": re.compile(r'.*\|c\|SAI_OBJECT_TYPE_FDB_ENTRY.*') + "FDB_RESTORE|Start": re.compile(r'.*\|c\|SAI_OBJECT_TYPE_FDB_ENTRY.*'), + "FDB_EVENT_OTHER_MAC_EXPIRY|Start": re.compile(r".*\|n\|fdb_event.*fdb_entry.*mac.*(?!00:06:07:08:09:0A).*fdb_event.*SAI_FDB_EVENT_LEARNED.*SAI_FDB_ENTRY_ATTR_TYPE.*SAI_FDB_ENTRY_TYPE_DYNAMIC.*SAI_FDB_ENTRY_ATTR_PACKET_ACTION.*SAI_PACKET_ACTION_FORWARD.*"), + "FDB_EVENT_SCAPY_MAC_EXPIRY|Start": re.compile(r".*\|n\|fdb_event.*fdb_entry.*mac.*00:06:07:08:09:0A.*fdb_event.*SAI_FDB_EVENT_LEARNED.*SAI_FDB_ENTRY_ATTR_TYPE.*SAI_FDB_ENTRY_TYPE_DYNAMIC.*SAI_FDB_ENTRY_ATTR_PACKET_ACTION.*SAI_PACKET_ACTION_FORWARD.*"), } OFFSET_ITEMS = ['DATABASE', 'FINALIZER', 'INIT_VIEW', 'SYNCD_CREATE_SWITCH', diff --git a/tests/platform_tests/templates/expect_boot_messages b/tests/platform_tests/templates/expect_boot_messages index d8c194b960e..63dcc893fd5 100644 --- a/tests/platform_tests/templates/expect_boot_messages +++ b/tests/platform_tests/templates/expect_boot_messages @@ -21,7 +21,7 @@ r, ".* NOTICE bgp#fpmsyncd.*main: Warm-Restart timer started.*.*" r, ".* NOTICE bgp#fpmsyncd.*main: Warm-Restart reconciliation processed..*" r, ".* INFO syncd#syncd.*SAI_API_FDB:_brcm_sai_fdb_event_cb.*fdbEvent: 0 for mac.*" r, ".* NOTICE swss#orchagent.*setAgingFDB: Set switch.*fdb_aging_time 0 sec" -r, ".* NOTICE swss#orchagent.*doAppSwitchTableTask: Set switch attribute fdb_aging_time to 600" +r, ".* NOTICE swss#orchagent.*do.*Task: Set switch attribute fdb_aging_time to 600" # sairedis.rec messages r, ".*\|c\|SAI_OBJECT_TYPE_SWITCH.*" @@ -32,12 +32,15 @@ r, ".*\|c\|SAI_OBJECT_TYPE_FDB_ENTRY.*" # bgpd.log messages r, ".*ADJCHANGE: neighbor .* in vrf default Up.*" -r, ".*rcvd End-of-RIB for IPv4 Unicast from.*" +r, ".*rcvd End-of-RIB for .* Unicast from.*" -# mlnx specific regexes +# mlnx specific syslog regexes r, ".* syncd.*mlnx_sai_switch.*mlnx_create_switch: Create switch.*INIT_SWITCH=true.*" r, ".* syncd#SDK.*mlnx_sai_switch.*mlnx_create_switch.*Created switch Switch ID.*" +# mlnx specific sairedis regexes +r, ".*\|n\|fdb_event.*fdb_entry.*mac.*fdb_event.*SAI_FDB_EVENT_LEARNED.*SAI_FDB_ENTRY_ATTR_TYPE.*SAI_FDB_ENTRY_TYPE_DYNAMIC.*SAI_FDB_ENTRY_ATTR_PACKET_ACTION.*SAI_PACKET_ACTION_FORWARD.*" + # 201911 specific regexes r, ".* NOTICE admin: Stopping radv ....*" r, ".* INFO systemd.*Stopped Router advertiser container.*" diff --git a/tests/platform_tests/test_advanced_reboot.py b/tests/platform_tests/test_advanced_reboot.py index 46c85ea08f4..4dfdb06fee0 100644 --- a/tests/platform_tests/test_advanced_reboot.py +++ b/tests/platform_tests/test_advanced_reboot.py @@ -8,9 +8,13 @@ from tests.platform_tests.verify_dut_health import add_fail_step_to_reboot # lgtm[py/unused-import] from tests.platform_tests.warmboot_sad_cases import get_sad_case_list, SAD_CASE_LIST +from tests.common.fixtures.ptfhost_utils import run_icmp_responder +from tests.common.fixtures.ptfhost_utils import run_garp_service + pytestmark = [ pytest.mark.disable_loganalyzer, - pytest.mark.topology('t0') + pytest.mark.topology('t0'), + pytest.mark.skip_check_dut_health ] diff --git a/tests/platform_tests/test_cont_warm_reboot.py b/tests/platform_tests/test_cont_warm_reboot.py index f89506e67fa..c18c40f5d32 100644 --- a/tests/platform_tests/test_cont_warm_reboot.py +++ b/tests/platform_tests/test_cont_warm_reboot.py @@ -273,7 +273,7 @@ def create_test_report(self): format(self.test_failures, self.reboot_count)) - def start_continuous_reboot(self, request, duthost, ptfhost, localhost, tbinfo, creds): + def start_continuous_reboot(self, request, duthosts, duthost, ptfhost, localhost, tbinfo, creds): self.test_set_up() # Start continuous warm/fast reboot on the DUT for count in range(self.continuous_reboot_count): @@ -284,7 +284,7 @@ def start_continuous_reboot(self, request, duthost, ptfhost, localhost, tbinfo, .format(self.reboot_count, self.continuous_reboot_count, self.reboot_type)) reboot_type = self.reboot_type + "-reboot" try: - self.advancedReboot = AdvancedReboot(request, duthost, ptfhost, localhost, tbinfo, creds,\ + self.advancedReboot = AdvancedReboot(request, duthosts, duthost, ptfhost, localhost, tbinfo, creds,\ rebootType=reboot_type, moduleIgnoreErrors=True) except Exception: self.sub_test_result = False @@ -326,7 +326,7 @@ def test_teardown(self): @pytest.mark.device_type('vs') -def test_continuous_reboot(request, duthosts, rand_one_dut_hostname, ptfhost, localhost, conn_graph_facts, tbinfo, creds): +def test_continuous_reboot(request, duthosts, enum_rand_one_per_hwsku_frontend_hostname, ptfhost, localhost, conn_graph_facts, tbinfo, creds): """ @summary: This test performs continuous reboot cycles on images that are provided as an input. Supported parameters for this test can be modified at runtime: @@ -347,7 +347,7 @@ def test_continuous_reboot(request, duthosts, rand_one_dut_hostname, ptfhost, lo Status of transceivers - ports in lab_connection_graph should be present Status of BGP neighbors - should be established """ - duthost = duthosts[rand_one_dut_hostname] + duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] continuous_reboot = ContinuousReboot(request, duthost, ptfhost, localhost, conn_graph_facts) - continuous_reboot.start_continuous_reboot(request, duthost, ptfhost, localhost, tbinfo, creds) + continuous_reboot.start_continuous_reboot(request, duthosts, duthost, ptfhost, localhost, tbinfo, creds) continuous_reboot.test_teardown() diff --git a/tests/platform_tests/test_cpu_memory_usage.py b/tests/platform_tests/test_cpu_memory_usage.py index a2e7e5e139e..d90368a0d44 100644 --- a/tests/platform_tests/test_cpu_memory_usage.py +++ b/tests/platform_tests/test_cpu_memory_usage.py @@ -10,16 +10,34 @@ ] +def is_asan_image(duthosts, enum_rand_one_per_hwsku_hostname): + duthost = duthosts[enum_rand_one_per_hwsku_hostname] + asan_val_from_sonic_ver_cmd = "sonic-cfggen -y /etc/sonic/sonic_version.yml -v asan" + asan_val = duthost.command(asan_val_from_sonic_ver_cmd)['stdout'] + is_asan = False + if asan_val == "yes": + logging.info("The current sonic image is a ASAN image") + is_asan = True + return is_asan + + @pytest.fixture(scope='module') def setup_thresholds(duthosts, enum_rand_one_per_hwsku_hostname): duthost = duthosts[enum_rand_one_per_hwsku_hostname] cpu_threshold = 50 memory_threshold = 60 high_cpu_consume_procs = {} - if duthost.facts['platform'] in ('x86_64-arista_7050_qx32', 'x86_64-kvm_x86_64-r0'): - memory_threshold = 80 + is_asan = is_asan_image(duthosts, enum_rand_one_per_hwsku_hostname) + if duthost.facts['platform'] in ('x86_64-arista_7050_qx32', 'x86_64-kvm_x86_64-r0', 'x86_64-cel_e1031-r0') or is_asan: + memory_threshold = 90 if duthost.facts['platform'] in ('x86_64-arista_7260cx3_64'): high_cpu_consume_procs['syncd'] = 80 + # The CPU usage of `sx_sdk` on mellanox is expected to be higher, and the actual CPU usage + # is correlated with the number of ports. So we ignore the check of CPU for sx_sdk + if duthost.facts["asic_type"] == 'mellanox': + high_cpu_consume_procs['sx_sdk'] = 90 + num_cpu = int(duthost.command('nproc --all')['stdout_lines'][0]) + cpu_threshold = cpu_threshold * num_cpu return memory_threshold, cpu_threshold, high_cpu_consume_procs def test_cpu_memory_usage(duthosts, enum_rand_one_per_hwsku_hostname, setup_thresholds): @@ -42,7 +60,7 @@ def test_cpu_memory_usage(duthosts, enum_rand_one_per_hwsku_hostname, setup_thre for proc in monit_result.processes: cpu_threshold = normal_cpu_threshold if high_cpu_consume_procs.has_key(proc['name']): - cpu_threshold = high_cpu_consume_procs[proc['name']] + cpu_threshold = high_cpu_consume_procs[proc['name']] if proc['cpu_percent'] >= cpu_threshold: logging.debug("process %s(%d) cpu usage exceeds %d%%.", proc['name'], proc['pid'], cpu_threshold) diff --git a/tests/platform_tests/test_idle_driver.py b/tests/platform_tests/test_idle_driver.py new file mode 100644 index 00000000000..600f2c95da3 --- /dev/null +++ b/tests/platform_tests/test_idle_driver.py @@ -0,0 +1,27 @@ +""" +Some devices have potential problems entering idle state. We +expect to disable both intel idle driver and acpi idle driver, +or have no available idle state higher than 1 for all cpu. +""" +import logging +import pytest + +from tests.common.helpers.assertions import pytest_assert + +logger = logging.getLogger(__name__) + +pytestmark = [ + pytest.mark.topology('m0', 'mx'), +] + + +def test_idle_driver(duthosts, enum_rand_one_per_hwsku_hostname): + duthost = duthosts[enum_rand_one_per_hwsku_hostname] + idle_driver_result = duthost.shell('cat /sys/devices/system/cpu/cpuidle/current_driver', module_ignore_errors=True) + if idle_driver_result['rc'] == 0 and idle_driver_result['stdout'] != "none": + cstates = duthost.shell('sed -n "s/.*C\\([0-9]*\\).*/\\1/p" ' + '/sys/devices/system/cpu/cpu*/cpuidle/state*/name')['stdout'].split() + max_cstate = max([int(cstate) for cstate in cstates]) + pytest_assert(max_cstate <= 1, + "When idle driver is present, cstate>1 is not allowed: max_cstate {}" + .format(max_cstate)) diff --git a/tests/platform_tests/test_memory_exhaustion.py b/tests/platform_tests/test_memory_exhaustion.py new file mode 100644 index 00000000000..957737d868c --- /dev/null +++ b/tests/platform_tests/test_memory_exhaustion.py @@ -0,0 +1,100 @@ +import logging +import time +import pytest + +from tests.common.helpers.assertions import pytest_assert +from tests.common.platform.processes_utils import wait_critical_processes +from tests.common.reboot import SONIC_SSH_PORT, SONIC_SSH_REGEX + +pytestmark = [ + pytest.mark.disable_loganalyzer, + pytest.mark.topology('any') +] + +SSH_SHUTDOWN_TIMEOUT = 360 +SSH_STARTUP_TIMEOUT = 420 + +SSH_STATE_ABSENT = "absent" +SSH_STATE_STARTED = "started" + + +class TestMemoryExhaustion: + """ + This test case is used to verify that DUT will reboot when it runs out of memory. + """ + + @pytest.fixture(autouse=True) + def teardown(self, duthost, localhost, pdu_controller): + yield + # If the SSH connection is not established, or any critical process is exited, + # try to recover the DUT by PDU reboot. + dut_ip = duthost.mgmt_ip + hostname = duthost.hostname + if not self.check_ssh_state(localhost, dut_ip, SSH_STATE_STARTED): + if pdu_controller is None: + logging.error("No PDU controller for {}, failed to recover DUT!".format(hostname)) + return + self.pdu_reboot(pdu_controller) + # Waiting for SSH connection startup + pytest_assert(self.check_ssh_state(localhost, dut_ip, SSH_STATE_STARTED, SSH_STARTUP_TIMEOUT), + 'Recover {} by PDU reboot failed'.format(hostname)) + # Wait until all critical processes are healthy. + wait_critical_processes(duthost) + + def test_memory_exhaustion(self, duthost, localhost): + dut_ip = duthost.mgmt_ip + hostname = duthost.hostname + dut_datetime = duthost.get_now_time() + + # Our shell command is designed as 'nohup bash -c "sleep 5 && tail /dev/zero" &' because of: + # * `tail /dev/zero` is used to run out of memory completely. + # * Since `tail /dev/zero` will cause the DUT reboot, we need to run it in the background + # (using &) to avoid pytest getting stuck. `nohup` is also necessary to protect the + # background process. + # * Some DUTs with few free memory may reboot before ansible receive the result of shell + # command, so we add `sleep 5` to ensure ansible receive the result first. + cmd = 'nohup bash -c "sleep 5 && tail /dev/zero" &' + res = duthost.shell(cmd) + if not res.is_successful: + pytest.fail('DUT {} run command {} failed'.format(hostname, cmd)) + + # Waiting for SSH connection shutdown + pytest_assert(self.check_ssh_state(localhost, dut_ip, SSH_STATE_ABSENT, SSH_SHUTDOWN_TIMEOUT), + 'DUT {} did not shutdown'.format(hostname)) + # Waiting for SSH connection startup + pytest_assert(self.check_ssh_state(localhost, dut_ip, SSH_STATE_STARTED, SSH_STARTUP_TIMEOUT), + 'DUT {} did not startup'.format(hostname)) + # Wait until all critical processes are healthy. + wait_critical_processes(duthost) + # Verify DUT uptime is later than the time when the test case started running. + dut_uptime = duthost.get_up_time() + pytest_assert(dut_uptime > dut_datetime, "Device {} did not reboot".format(hostname)) + + def check_ssh_state(self, localhost, dut_ip, expected_state, timeout=60): + """ + Check the SSH state of DUT. + + :param localhost: A `tests.common.devices.local.Localhost` Object. + :param dut_ip: A string, the IP address of DUT. + :param expected_state: A string, the expected SSH state. + :param timeout: An integer, the maximum number of seconds to wait for. + :return: A boolean, True if SSH state is the same as expected + , False otherwise. + """ + res = localhost.wait_for(host=dut_ip, + port=SONIC_SSH_PORT, + state=expected_state, + search_regex=SONIC_SSH_REGEX, + delay=10, + timeout=timeout, + module_ignore_errors=True) + return not res.is_failed and 'Timeout' not in res.get('msg', '') + + def pdu_reboot(self, pdu_controller): + hostname = pdu_controller.dut_hostname + if not pdu_controller.turn_off_outlet(): + logging.error("Turn off the PDU outlets of {} failed".format(hostname)) + return + time.sleep(10) # sleep 10 second to ensure there is gap between power off and on + if not pdu_controller.turn_on_outlet(): + logging.error("Turn on the PDU outlets of {} failed".format(hostname)) diff --git a/tests/platform_tests/test_platform_info.py b/tests/platform_tests/test_platform_info.py index ac270700062..d326fcbace2 100644 --- a/tests/platform_tests/test_platform_info.py +++ b/tests/platform_tests/test_platform_info.py @@ -207,7 +207,7 @@ def check_all_psu_on(dut, psu_test_results): @pytest.mark.disable_loganalyzer @pytest.mark.parametrize('ignore_particular_error_log', [SKIP_ERROR_LOG_PSU_ABSENCE], indirect=True) -def test_turn_on_off_psu_and_check_psustatus(duthosts, enum_rand_one_per_hwsku_hostname, pdu_controller, ignore_particular_error_log): +def test_turn_on_off_psu_and_check_psustatus(duthosts, enum_rand_one_per_hwsku_hostname, pdu_controller, ignore_particular_error_log, tbinfo): """ @summary: Turn off/on PSU and check PSU status using 'show platform psustatus' """ @@ -235,8 +235,13 @@ def test_turn_on_off_psu_and_check_psustatus(duthosts, enum_rand_one_per_hwsku_h logging.info("Start testing turn off/on PSUs") all_outlet_status = pdu_ctrl.get_outlet_status() pytest_require(all_outlet_status and len(all_outlet_status) >= 2, 'Skip the test, cannot get at least 2 outlet status: {}'.format(all_outlet_status)) + if tbinfo["topo"]["properties"]["configuration_properties"]["common"]["dut_type"] == "MgmtTsToR": + all_outlet_status = all_outlet_status[0:-2] + logging.info("DUT is MgmtTsToR, the last 2 outlets are reserved for Console Switch and are not visible from DUT.") for outlet in all_outlet_status: psu_under_test = None + if outlet['outlet_on'] is False: + continue logging.info("Turn off outlet {}".format(outlet)) pdu_ctrl.turn_off_outlet(outlet) diff --git a/tests/platform_tests/test_reboot.py b/tests/platform_tests/test_reboot.py index ac3dc6aa7bd..bc903c14cd2 100644 --- a/tests/platform_tests/test_reboot.py +++ b/tests/platform_tests/test_reboot.py @@ -55,12 +55,15 @@ def reboot_and_check(localhost, dut, interfaces, xcvr_skip_list, reboot_type=REB @param reboot_helper: The helper function used only by power off reboot @param reboot_kwargs: The argument used by reboot_helper """ - + logging.info("Sync reboot cause history queue with DUT reboot cause history queue") sync_reboot_history_queue_with_dut(dut) logging.info("Run %s reboot on DUT" % reboot_type) reboot(dut, localhost, reboot_type=reboot_type, reboot_helper=reboot_helper, reboot_kwargs=reboot_kwargs) + + # Append the last reboot type to the queue + logging.info("Append the latest reboot type to the queue") REBOOT_TYPE_HISTOYR_QUEUE.append(reboot_type) check_interfaces_and_services(dut, interfaces, xcvr_skip_list, reboot_type) @@ -84,7 +87,7 @@ def check_interfaces_and_services(dut, interfaces, xcvr_skip_list, reboot_type = if "201811" in dut.os_version or "201911" in dut.os_version: logging.info("Skip check reboot-cause history for version before 202012") else: - logger.info("Check reboot-cause history") + logging.info("Check reboot-cause history") assert wait_until(MAX_WAIT_TIME_FOR_REBOOT_CAUSE, 20, 0, check_reboot_cause_history, dut, REBOOT_TYPE_HISTOYR_QUEUE), "Check reboot-cause history failed after rebooted by %s" % reboot_type if reboot_ctrl_dict[reboot_type]["test_reboot_cause_only"]: @@ -97,8 +100,6 @@ def check_interfaces_and_services(dut, interfaces, xcvr_skip_list, reboot_type = logging.info("Wait {} seconds for all the transceivers to be detected".format(MAX_WAIT_TIME_FOR_INTERFACES)) result = wait_until(MAX_WAIT_TIME_FOR_INTERFACES, 20, 0, check_all_interface_information, dut, interfaces, xcvr_skip_list) - if not dut.has_sku: - pytest.xfail("hwsku.json is needed for interface checking to pass, and it is not provided.") assert result, "Not all transceivers are detected or interfaces are up in {} seconds".format( MAX_WAIT_TIME_FOR_INTERFACES) @@ -267,8 +268,26 @@ def test_watchdog_reboot(duthosts, enum_rand_one_per_hwsku_hostname, localhost, watchdogutil_status_result = duthost.command("watchdogutil status", module_ignore_errors=True) if "" != watchdogutil_status_result["stderr"] or "" == watchdogutil_status_result["stdout"]: pytest.skip("Watchdog is not supported on this DUT, skip this test case") - - reboot_and_check(localhost, duthost, conn_graph_facts["device_conn"][duthost.hostname], xcvr_skip_list, REBOOT_TYPE_WATCHDOG) + if "x86_64-8102_64h_o-r0" in duthost.facts['platform']: + output = duthost.shell("dmidecode -s bios-version")["stdout"] + bios = output.split('-') + bios_version = bios[1] + if bios_version < "218" and "t1" in tbinfo["topo"]["type"]: + pytest.skip("Skip test if BIOS ver <218 and topo is T1 and platform is M64") + try: + if "x86_64-cel_e1031-r0" in duthost.facts['platform']: + # On Celestica E1031 platform, the cpu_wdt service periodically sends keep alive + # message to watchdog via "watchdogutil arm -s " command. This may affect + # the test result. So, we need to stop the cpu_wdt service before doing watchdog + # reboot on the DUT. + duthost.shell("sudo systemctl stop cpu_wdt", module_ignore_errors=True) + + reboot_and_check(localhost, duthost, conn_graph_facts["device_conn"][duthost.hostname], + xcvr_skip_list, REBOOT_TYPE_WATCHDOG) + finally: + if "x86_64-cel_e1031-r0" in duthost.facts['platform']: + # On Celestica E1031 platform, ensure the cpu_wdt service is started once test finished. + duthost.shell("sudo systemctl start cpu_wdt", module_ignore_errors=True) def test_continuous_reboot(duthosts, enum_rand_one_per_hwsku_hostname, localhost, conn_graph_facts, xcvr_skip_list): diff --git a/tests/platform_tests/thermal_control_test_helper.py b/tests/platform_tests/thermal_control_test_helper.py index 587166c5826..cb813902ae6 100644 --- a/tests/platform_tests/thermal_control_test_helper.py +++ b/tests/platform_tests/thermal_control_test_helper.py @@ -277,14 +277,22 @@ def restart_thermal_control_daemon(dut): # For example, chassis.get_all_sfps will call sfp constructor, and sfp constructor may # use subprocess to call ethtool to do initialization. # So we check here thermalcltd must have at least 2 processes. - assert len(output["stdout_lines"]) >= 2, "There should be at least 2 thermalctld process" + # For mellanox, it has at least two processes, but for celestica(broadcom), + # it only has one thermalctld process + if dut.facts["asic_type"] == "mellanox": + assert len(output["stdout_lines"]) >= 2, "There should be at least 2 thermalctld process" + else: + assert len(output["stdout_lines"]) >= 1, "There should be at least 1 thermalctld process" restart_thermalctl_cmd = "docker exec -i pmon bash -c 'supervisorctl restart thermalctld'" output = dut.shell(restart_thermalctl_cmd) if output["rc"] == 0: output = dut.shell(find_thermalctld_pid_cmd) assert output["rc"] == 0, "Run command '{}' failed after restart of thermalctld on {}".format(find_thermalctld_pid_cmd, dut.hostname) - assert len(output["stdout_lines"]) >= 2, "There should be at least 2 thermalctld process" + if dut.facts["asic_type"] == "mellanox": + assert len(output["stdout_lines"]) >= 2, "There should be at least 2 thermalctld process" + else: + assert len(output["stdout_lines"]) >= 1, "There should be at least 1 thermalctld process" logging.info("thermalctld processes restarted successfully on {}".format(dut.hostname)) return # try restore by config reload... diff --git a/tests/platform_tests/verify_dut_health.py b/tests/platform_tests/verify_dut_health.py index ef7133716bc..ae9c5dc11cb 100644 --- a/tests/platform_tests/verify_dut_health.py +++ b/tests/platform_tests/verify_dut_health.py @@ -168,21 +168,20 @@ def add_exit_to_script(reboot_type): elif "fast" in reboot_type: reboot_script = "fast-reboot" - cmd_format = "sed -i 's/{}/{}/' {}" + cmd_format = "sed -i -u 's/{}/{}/' {}" reboot_script_path = duthost.shell('which {}'.format(reboot_script))['stdout'] - original_line = 'set +e' - replaced_line = 'exit -1; set +e' + original_line = '^setup_control_plane_assistant$' + replaced_line = 'exit -1; setup_control_plane_assistant' replace_cmd = cmd_format.format(original_line, replaced_line, reboot_script_path) logging.info("Modify {} to exit before set +e".format(reboot_script_path)) duthost.shell(replace_cmd) add_exit_to_script.params = (cmd_format, replaced_line, original_line, reboot_script_path, reboot_script_path) - yield add_exit_to_script if add_exit_to_script.params: cmd_format, replaced_line, original_line, reboot_script_path, reboot_script_path = add_exit_to_script.params - replace_cmd = cmd_format.format(replaced_line, original_line, reboot_script_path) + replace_cmd = cmd_format.format(replaced_line, "setup_control_plane_assistant", reboot_script_path) logging.info("Revert {} script to original".format(reboot_script_path)) duthost.shell(replace_cmd) # cold reboot DUT to restore any bad state caused by negative test diff --git a/tests/process_monitoring/test_critical_process_monitoring.py b/tests/process_monitoring/test_critical_process_monitoring.py index c80b136adb6..8a8f81e0ef4 100755 --- a/tests/process_monitoring/test_critical_process_monitoring.py +++ b/tests/process_monitoring/test_critical_process_monitoring.py @@ -4,6 +4,7 @@ """ from collections import defaultdict import logging +import time import pytest diff --git a/tests/pytest.ini b/tests/pytest.ini index 461873ebf0e..bd80f814928 100644 --- a/tests/pytest.ini +++ b/tests/pytest.ini @@ -12,6 +12,7 @@ markers: topology: specify which topology testcase can be executed on: (t0, t1, ptf, etc) platform: specify which platform testcase can be executed on: (physical, virtual, broadcom, mellanox, etc) supported_completeness_level: test supported levels of completeness (coverage) level (Debug, Basic, Confident, Thorough) + skip_check_dut_health: skip default execution of check_dut_health_status fixture log_cli_format: %(asctime)s %(funcNamewithModule)-40.40s L%(lineno)-.4d %(levelname)-7s| %(message)s log_file_format: %(asctime)s %(funcNamewithModule)-40.40s L%(lineno)-.4d %(levelname)-7s| %(message)s diff --git a/tests/qos/Pfc_Storm_with_Shared_Headroom_test_plan.md b/tests/qos/Pfc_Storm_with_Shared_Headroom_test_plan.md new file mode 100644 index 00000000000..f43ee16d31b --- /dev/null +++ b/tests/qos/Pfc_Storm_with_Shared_Headroom_test_plan.md @@ -0,0 +1,29 @@ +# PFC Storm with Shared Headroom Test Plan + +## Motivation + +This test covers the scenrio when a PFC Watchdog is applied on a port which already has it's occupancy crossed into shared headroom. + +The test checks if any PFC Frames are sent to the peer link from the DUT port. + +**Note:** ++ This test case is only intended for Mellanox Platforms ++ This test case requires an RPC image ++ Shared Headroom has to be enabled on the device. + +## Test Plan ++ Verify if the shared headroom is enabled ++ Make sure buffer occupancy crosses into the shared headroom region + - Achieve buffer congestion by closing the dut tx port using `sai_thrift_port_tx_disable` API: https://github.com/Azure/sonic-mgmt/blob/master/tests/saitests/switch.py#L624. + - Send pkts from the PTF docker which are destined to egress out of the dut tx port. + - Make sure to send atleast num_pkts_pfs_frame + private_headroom pkts pkts + - num_pkts_pfs_frame: num of pkts required to be sent in order to trigger a PFC frame from the DUT. More on this here: https://github.com/Azure/sonic-mgmt/blob/master/tests/qos/files/mellanox/qos_param_generator.py + - private_headroom_pkts is specific to mellanox which is in the order of a few pkts. + - To check if the occupancy has indeed crossed into the shared headroom, use the SAI_INGRESS_PRIORITY_GROUP_STAT_XOFF_ROOM_WATERMARK_BYTES value. This can be fetched using "show priority-group watermark headroom" command. + ++ The test triggers a PFC storm on the DUT port ++ PFC Watchdog is triggered ++ The test will stop the PFC Storm and once the PFC WD is restored, the Ingress buffers will be drained to drop the occupancy under Xon + - Achieve this by re-opening dut tx port using `sai_thrift_port_tx_enable` API. + - This'll drain the buffers and the occupancy falls below Xon. ++ Check the Tx PFC Counters on the DUT source port which was stormed after the packets are drained. They shouldn't be incremented as the occupancy has fallen below Xon diff --git a/tests/qos/args/qos_sai_args.py b/tests/qos/args/qos_sai_args.py index c4eea65c49d..463680cb2fa 100644 --- a/tests/qos/args/qos_sai_args.py +++ b/tests/qos/args/qos_sai_args.py @@ -46,3 +46,10 @@ def add_qos_sai_args(parser): help="QoS SAI comma separated list of source ports. Test currently expects exactly 1 source port", ) + qos_group.addoption( + "--qos_dual_tor", + action="store", + type=str2bool, + default=False, + help="Test QoS on dual ToR ports" + ) diff --git a/tests/qos/files/brcm/qos_param_generator.py b/tests/qos/files/brcm/qos_param_generator.py new file mode 100644 index 00000000000..e3088ef564e --- /dev/null +++ b/tests/qos/files/brcm/qos_param_generator.py @@ -0,0 +1,29 @@ +''' +generate and update qos params for brcm platform +so far, only return the original qos_params to testcase +''' + +class QosParamBroadcom(object): + + def __init__(self, + qos_params, + asic_type, + speed_cable_len, + dutConfig, + ingressLosslessProfile, + ingressLossyProfile, + egressLosslessProfile, + egressLossyProfile, + sharedHeadroomPoolSize, + dualTor, + dutTopo, + bufferConfig, + dutHost, + testbedTopologyName, + verbose=True): + self.qos_params = qos_params + return + + + def run(self): + return self.qos_params diff --git a/tests/qos/files/mellanox/qos_param_generator.py b/tests/qos/files/mellanox/qos_param_generator.py index 24a291c1018..9cd94c99e2a 100644 --- a/tests/qos/files/mellanox/qos_param_generator.py +++ b/tests/qos/files/mellanox/qos_param_generator.py @@ -1,25 +1,27 @@ import math class QosParamMellanox(object): - def __init__(self, qos_params, asic_type, speed_cable_len, dutConfig, ingressLosslessProfile, ingressLossyProfile, egressLosslessProfile, egressLossyProfile, sharedHeadroomPoolSize): - asic_param_dic = { + def __init__(self, qos_params, asic_type, speed_cable_len, dutConfig, ingressLosslessProfile, ingressLossyProfile, egressLosslessProfile, egressLossyProfile, sharedHeadroomPoolSize, dualTor): + self.asic_param_dic = { 'spc1': { 'cell_size': 96, - 'headroom_overhead': 95 + 'headroom_overhead': 95, + 'private_headroom': 30 }, 'spc2': { 'cell_size': 144, - 'headroom_overhead': 64 + 'headroom_overhead': 64, + 'private_headroom': 30 }, 'spc3': { 'cell_size': 144, - 'headroom_overhead': 64 + 'headroom_overhead': 64, + 'private_headroom': 30 } } - self.asic_type = asic_type - self.cell_size = asic_param_dic[asic_type]['cell_size'] - self.headroom_overhead = asic_param_dic[asic_type]['headroom_overhead'] + self.cell_size = self.asic_param_dic[asic_type]['cell_size'] + self.headroom_overhead = self.asic_param_dic[asic_type]['headroom_overhead'] if speed_cable_len[0:6] == '400000': self.headroom_overhead += 59 # for 400G ports we need an extra margin in case it is filled unbalancely between two buffer units @@ -41,6 +43,7 @@ def __init__(self, qos_params, asic_type, speed_cable_len, dutConfig, ingressLos else: self.sharedHeadroomPoolSize = None self.dutConfig = dutConfig + self.dualTor = dualTor return @@ -77,6 +80,8 @@ def collect_qos_configurations(self): pkts_num_trig_ingr_drp = ingress_lossless_size + headroom if self.sharedHeadroomPoolSize: pkts_num_trig_ingr_drp += xoff + if self.dualTor: + pkts_num_trig_ingr_drp += 2 * xoff else: pkts_num_trig_ingr_drp -= self.headroom_overhead pkts_num_dismiss_pfc = ingress_lossless_size + 1 @@ -89,13 +94,11 @@ def collect_qos_configurations(self): ingress_ports_list_shp = [] occupancy_per_port = ingress_lossless_size self.qos_parameters['dst_port_id'] = testPortIds[0] + pgs_per_port = 2 if not self.dualTor else 4 for i in range(1, ingress_ports_num_shp): - # for the first PG - pkts_num_trig_pfc_shp.append(occupancy_per_port + xon + hysteresis) - # for the second PG - occupancy_per_port /= 2 - pkts_num_trig_pfc_shp.append(occupancy_per_port + xon + hysteresis) - occupancy_per_port /= 2 + for j in range(pgs_per_port): + pkts_num_trig_pfc_shp.append(occupancy_per_port + xon + hysteresis) + occupancy_per_port /= 2 ingress_ports_list_shp.append(testPortIds[i]) self.qos_parameters['pkts_num_trig_pfc_shp'] = pkts_num_trig_pfc_shp self.qos_parameters['src_port_ids'] = ingress_ports_list_shp @@ -135,7 +138,7 @@ def calculate_parameters(self): hdrm_pool_size['pkts_num_hdrm_partial'] = self.qos_parameters['pkts_num_hdrm_partial'] hdrm_pool_size['dst_port_id'] = self.qos_parameters['dst_port_id'] hdrm_pool_size['src_port_ids'] = self.qos_parameters['src_port_ids'] - hdrm_pool_size['pgs_num'] = 2 * len(self.qos_parameters['src_port_ids']) + hdrm_pool_size['pgs_num'] = (2 if not self.dualTor else 4) * len(self.qos_parameters['src_port_ids']) hdrm_pool_size['cell_size'] = self.cell_size hdrm_pool_size['margin'] = 3 else: @@ -149,6 +152,8 @@ def calculate_parameters(self): xoff['pkts_num_margin'] = 3 self.qos_params_mlnx[self.speed_cable_len]['xoff_1'].update(xoff) self.qos_params_mlnx[self.speed_cable_len]['xoff_2'].update(xoff) + self.qos_params_mlnx[self.speed_cable_len]['xoff_3'].update(xoff) + self.qos_params_mlnx[self.speed_cable_len]['xoff_4'].update(xoff) xon = {} xon['pkts_num_trig_pfc'] = pkts_num_trig_pfc @@ -157,6 +162,8 @@ def calculate_parameters(self): xon['pkts_num_margin'] = 3 self.qos_params_mlnx['xon_1'].update(xon) self.qos_params_mlnx['xon_2'].update(xon) + self.qos_params_mlnx['xon_3'].update(xon) + self.qos_params_mlnx['xon_4'].update(xon) wm_pg_headroom = self.qos_params_mlnx[self.speed_cable_len]['wm_pg_headroom'] wm_pg_headroom['pkts_num_trig_pfc'] = pkts_num_trig_pfc @@ -200,3 +207,4 @@ def calculate_parameters(self): self.qos_params_mlnx['ecn_{}'.format(i+1)]['cell_size'] = self.cell_size self.qos_params_mlnx['shared-headroom-pool'] = self.sharedHeadroomPoolSize + self.qos_params_mlnx['pkts_num_private_headrooom'] = self.asic_param_dic[self.asic_type]['private_headroom'] \ No newline at end of file diff --git a/tests/qos/files/qos.yml b/tests/qos/files/qos.yml index 767e2c5d10a..7ef8ef33a7b 100644 --- a/tests/qos/files/qos.yml +++ b/tests/qos/files/qos.yml @@ -29,6 +29,14 @@ qos_params: dscp: 4 ecn: 1 pg: 4 + xoff_3: + dscp: 2 + ecn: 1 + pg: 2 + xoff_4: + dscp: 6 + ecn: 1 + pg: 6 wm_pg_headroom: dscp: 3 ecn: 1 @@ -42,9 +50,9 @@ qos_params: pkts_num_fill_min: 0 packet_size: 300 hdrm_pool_size: - dscps: [3, 4] + dscps: [3, 4, 2, 6] ecn: 1 - pgs: [3, 4] + pgs: [3, 4, 2, 6] pkts_num_trig_pfc: 0 pkts_num_leak_out: 0 pkts_num_fill_min: 0 @@ -59,6 +67,16 @@ qos_params: ecn: 1 pg: 4 pkts_num_leak_out: 0 + xon_3: + dscp: 2 + ecn: 1 + pg: 2 + pkts_num_leak_out: 0 + xon_4: + dscp: 6 + ecn: 1 + pg: 6 + pkts_num_leak_out: 0 ecn_1: dscp: 8 ecn: 0 @@ -654,15 +672,15 @@ qos_params: dscp: 3 ecn: 1 pg: 3 - pkts_num_trig_pfc: 13259 - pkts_num_trig_ingr_drp: 13621 + pkts_num_trig_pfc: 59605 + pkts_num_trig_ingr_drp: 60228 pkts_num_margin: 4 xoff_2: dscp: 4 ecn: 1 pg: 4 - pkts_num_trig_pfc: 13259 - pkts_num_trig_ingr_drp: 13621 + pkts_num_trig_pfc: 59605 + pkts_num_trig_ingr_drp: 60228 pkts_num_margin: 4 hdrm_pool_size: dscps: [3, 4] @@ -671,40 +689,40 @@ qos_params: src_port_ids: [0, 2, 4, 6, 8, 10, 12, 14, 16] dst_port_id: 18 pgs_num: 18 - pkts_num_trig_pfc: 4601 + pkts_num_trig_pfc: 4478 pkts_num_hdrm_full: 362 pkts_num_hdrm_partial: 182 wm_pg_headroom: dscp: 3 ecn: 1 pg: 3 - pkts_num_trig_pfc: 13259 - pkts_num_trig_ingr_drp: 13621 + pkts_num_trig_pfc: 59605 + pkts_num_trig_ingr_drp: 59967 cell_size: 256 pkts_num_margin: 2 xon_1: dscp: 3 ecn: 1 pg: 3 - pkts_num_trig_pfc: 13259 + pkts_num_trig_pfc: 59605 pkts_num_dismiss_pfc: 18 xon_2: dscp: 4 ecn: 1 pg: 4 - pkts_num_trig_pfc: 13259 + pkts_num_trig_pfc: 59605 pkts_num_dismiss_pfc: 18 lossy_queue_1: dscp: 8 ecn: 1 pg: 0 - pkts_num_trig_egr_drp: 31807 + pkts_num_trig_egr_drp: 113198 wm_pg_shared_lossless: dscp: 3 ecn: 1 pg: 3 pkts_num_fill_min: 18 - pkts_num_trig_pfc: 13259 + pkts_num_trig_pfc: 59605 packet_size: 64 cell_size: 256 wm_pg_shared_lossy: @@ -712,7 +730,7 @@ qos_params: ecn: 1 pg: 0 pkts_num_fill_min: 0 - pkts_num_trig_egr_drp: 31807 + pkts_num_trig_egr_drp: 113198 packet_size: 64 cell_size: 256 wm_q_shared_lossless: @@ -720,7 +738,7 @@ qos_params: ecn: 1 queue: 3 pkts_num_fill_min: 0 - pkts_num_trig_ingr_drp: 13621 + pkts_num_trig_ingr_drp: 59967 cell_size: 256 wm_buf_pool_lossless: dscp: 3 @@ -728,8 +746,8 @@ qos_params: pg: 3 queue: 3 pkts_num_fill_ingr_min: 6 - pkts_num_trig_pfc: 13259 - pkts_num_trig_ingr_drp: 13621 + pkts_num_trig_pfc: 59605 + pkts_num_trig_ingr_drp: 59967 pkts_num_fill_egr_min: 8 cell_size: 256 wm_q_shared_lossy: @@ -737,7 +755,7 @@ qos_params: ecn: 1 queue: 0 pkts_num_fill_min: 7 - pkts_num_trig_egr_drp: 31807 + pkts_num_trig_egr_drp: 59967 cell_size: 256 wm_buf_pool_lossy: dscp: 8 @@ -745,7 +763,7 @@ qos_params: pg: 0 queue: 0 pkts_num_fill_ingr_min: 0 - pkts_num_trig_egr_drp: 31854 + pkts_num_trig_egr_drp: 113198 pkts_num_fill_egr_min: 14 cell_size: 256 ecn_1: @@ -800,22 +818,52 @@ qos_params: lossless_weight: 30 hdrm_pool_wm_multiplier: 1 cell_size: 256 - topo-any: - 50000_300m: + topo-dualtor: + 100000_300m: pkts_num_leak_out: 32 xoff_1: dscp: 3 ecn: 1 pg: 3 - pkts_num_trig_pfc: 13012 - pkts_num_trig_ingr_drp: 13252 + pkts_num_trig_pfc: 60204 + pkts_num_trig_ingr_drp: 60829 pkts_num_margin: 4 xoff_2: dscp: 4 ecn: 1 pg: 4 - pkts_num_trig_pfc: 13012 - pkts_num_trig_ingr_drp: 13252 + pkts_num_trig_pfc: 60204 + pkts_num_trig_ingr_drp: 60829 + pkts_num_margin: 4 + pcbb_xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 60204 + pkts_num_trig_ingr_drp: 60829 + pkts_num_margin: 4 + pcbb_xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 60204 + pkts_num_trig_ingr_drp: 60829 + pkts_num_margin: 4 + pcbb_xoff_3: + outer_dscp: 2 + dscp: 3 + ecn: 1 + pg: 2 + pkts_num_trig_pfc: 60204 + pkts_num_trig_ingr_drp: 60829 + pkts_num_margin: 4 + pcbb_xoff_4: + outer_dscp: 6 + dscp: 4 + ecn: 1 + pg: 6 + pkts_num_trig_pfc: 60204 + pkts_num_trig_ingr_drp: 60829 pkts_num_margin: 4 hdrm_pool_size: dscps: [3, 4] @@ -824,40 +872,476 @@ qos_params: src_port_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9] dst_port_id: 10 pgs_num: 18 - pkts_num_trig_pfc: 4519 + pkts_num_trig_pfc: 6301 + pkts_num_hdrm_full: 362 + pkts_num_hdrm_partial: 182 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 59714 + pkts_num_trig_ingr_drp: 60076 + cell_size: 256 + pkts_num_margin: 2 + xon_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 59714 + pkts_num_dismiss_pfc: 18 + xon_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 59714 + pkts_num_dismiss_pfc: 18 + lossy_queue_1: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_trig_egr_drp: 84935 + wm_pg_shared_lossless: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_fill_min: 18 + pkts_num_trig_pfc: 59714 + packet_size: 64 + cell_size: 256 + wm_pg_shared_lossy: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_fill_min: 0 + pkts_num_trig_egr_drp: 84935 + packet_size: 64 + cell_size: 256 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_fill_min: 0 + pkts_num_trig_ingr_drp: 60076 + cell_size: 256 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_fill_ingr_min: 6 + pkts_num_trig_pfc: 59714 + pkts_num_trig_ingr_drp: 60076 + pkts_num_fill_egr_min: 8 + cell_size: 256 + wm_q_shared_lossy: + dscp: 8 + ecn: 1 + queue: 0 + pkts_num_fill_min: 7 + pkts_num_trig_egr_drp: 84935 + cell_size: 256 + wm_buf_pool_lossy: + dscp: 8 + ecn: 1 + pg: 0 + queue: 0 + pkts_num_fill_ingr_min: 0 + pkts_num_trig_egr_drp: 84930 + pkts_num_fill_egr_min: 14 + cell_size: 256 + ecn_1: + dscp: 8 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 256 + ecn_2: + dscp: 8 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 256 + ecn_3: + dscp: 0 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 256 + ecn_4: + dscp: 0 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 256 + wrr: + ecn: 1 + q0_num_of_pkts: 140 + q1_num_of_pkts: 140 + q2_num_of_pkts: 140 + q3_num_of_pkts: 150 + q4_num_of_pkts: 150 + q5_num_of_pkts: 140 + q6_num_of_pkts: 140 + limit: 80 + wrr_chg: + ecn: 1 + q0_num_of_pkts: 80 + q1_num_of_pkts: 80 + q2_num_of_pkts: 80 + q3_num_of_pkts: 300 + q4_num_of_pkts: 300 + q5_num_of_pkts: 80 + q6_num_of_pkts: 80 + limit: 80 + lossy_weight: 8 + lossless_weight: 30 + hdrm_pool_wm_multiplier: 1 + cell_size: 256 + topo-dualtor-56: + 50000_300m: + pkts_num_leak_out: 32 + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 58576 + pkts_num_trig_ingr_drp: 58816 + pkts_num_margin: 50 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 58576 + pkts_num_trig_ingr_drp: 58816 + pkts_num_margin: 50 + hdrm_pool_size: + dscps: [3, 4] + ecn: 1 + pgs: [3, 4] + src_port_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9] + dst_port_id: 10 + pgs_num: 18 + pkts_num_trig_pfc: 4478 pkts_num_hdrm_full: 240 pkts_num_hdrm_partial: 182 wm_pg_headroom: dscp: 3 ecn: 1 pg: 3 - pkts_num_trig_pfc: 13012 - pkts_num_trig_ingr_drp: 13252 + pkts_num_trig_pfc: 58276 + pkts_num_trig_ingr_drp: 58516 + cell_size: 256 + pkts_num_margin: 2 + xon_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 58276 + pkts_num_dismiss_pfc: 18 + xon_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 58276 + pkts_num_dismiss_pfc: 18 + lossy_queue_1: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_trig_egr_drp: 112302 + wm_pg_shared_lossless: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_fill_min: 18 + pkts_num_trig_pfc: 58276 + packet_size: 64 + cell_size: 256 + wm_pg_shared_lossy: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_fill_min: 0 + pkts_num_trig_egr_drp: 112302 + packet_size: 64 + cell_size: 256 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_fill_min: 0 + pkts_num_trig_ingr_drp: 58516 + cell_size: 256 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_fill_ingr_min: 6 + pkts_num_trig_pfc: 58276 + pkts_num_trig_ingr_drp: 58516 + pkts_num_fill_egr_min: 8 + cell_size: 256 + wm_q_shared_lossy: + dscp: 8 + ecn: 1 + queue: 0 + pkts_num_fill_min: 7 + pkts_num_trig_egr_drp: 58516 + cell_size: 256 + wm_buf_pool_lossy: + dscp: 8 + ecn: 1 + pg: 0 + queue: 0 + pkts_num_fill_ingr_min: 0 + pkts_num_trig_egr_drp: 58516 + pkts_num_fill_egr_min: 14 + cell_size: 256 + 100000_300m: + pkts_num_leak_out: 32 + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 59784 + pkts_num_trig_ingr_drp: 60410 + pkts_num_margin: 4 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 59784 + pkts_num_trig_ingr_drp: 60410 + pkts_num_margin: 4 + pcbb_xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 58620 + pkts_num_trig_ingr_drp: 59245 + pkts_num_margin: 4 + pcbb_xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 58620 + pkts_num_trig_ingr_drp: 59245 + pkts_num_margin: 4 + pcbb_xoff_3: + outer_dscp: 2 + dscp: 3 + ecn: 1 + pg: 2 + pkts_num_trig_pfc: 58620 + pkts_num_trig_ingr_drp: 59245 + pkts_num_margin: 4 + pcbb_xoff_4: + outer_dscp: 6 + dscp: 4 + ecn: 1 + pg: 6 + pkts_num_trig_pfc: 58620 + pkts_num_trig_ingr_drp: 59245 + pkts_num_margin: 4 + hdrm_pool_size: + dscps: [3, 4] + ecn: 1 + pgs: [3, 4] + src_port_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9] + dst_port_id: 10 + pgs_num: 18 + pkts_num_trig_pfc: 6301 + pkts_num_hdrm_full: 362 + pkts_num_hdrm_partial: 182 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 59714 + pkts_num_trig_ingr_drp: 60076 + cell_size: 256 + pkts_num_margin: 2 + xon_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 59714 + pkts_num_dismiss_pfc: 18 + xon_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 59714 + pkts_num_dismiss_pfc: 18 + lossy_queue_1: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_trig_egr_drp: 84935 + wm_pg_shared_lossless: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_fill_min: 18 + pkts_num_trig_pfc: 59714 + packet_size: 64 + cell_size: 256 + wm_pg_shared_lossy: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_fill_min: 0 + pkts_num_trig_egr_drp: 84935 + packet_size: 64 + cell_size: 256 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_fill_min: 0 + pkts_num_trig_ingr_drp: 60076 + cell_size: 256 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_fill_ingr_min: 6 + pkts_num_trig_pfc: 59714 + pkts_num_trig_ingr_drp: 60076 + pkts_num_fill_egr_min: 8 + cell_size: 256 + wm_q_shared_lossy: + dscp: 8 + ecn: 1 + queue: 0 + pkts_num_fill_min: 7 + pkts_num_trig_egr_drp: 84935 + cell_size: 256 + wm_buf_pool_lossy: + dscp: 8 + ecn: 1 + pg: 0 + queue: 0 + pkts_num_fill_ingr_min: 0 + pkts_num_trig_egr_drp: 84930 + pkts_num_fill_egr_min: 14 + cell_size: 256 + ecn_1: + dscp: 8 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 256 + ecn_2: + dscp: 8 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 256 + ecn_3: + dscp: 0 + ecn: 0 + num_of_pkts: 5000 + limit: 182000 + min_limit: 180000 + cell_size: 256 + ecn_4: + dscp: 0 + ecn: 1 + num_of_pkts: 2047 + limit: 182320 + min_limit: 0 + cell_size: 256 + wrr: + ecn: 1 + q0_num_of_pkts: 140 + q1_num_of_pkts: 140 + q2_num_of_pkts: 140 + q3_num_of_pkts: 150 + q4_num_of_pkts: 150 + q5_num_of_pkts: 140 + q6_num_of_pkts: 140 + limit: 80 + wrr_chg: + ecn: 1 + q0_num_of_pkts: 80 + q1_num_of_pkts: 80 + q2_num_of_pkts: 80 + q3_num_of_pkts: 300 + q4_num_of_pkts: 300 + q5_num_of_pkts: 80 + q6_num_of_pkts: 80 + limit: 80 + lossy_weight: 8 + lossless_weight: 30 + hdrm_pool_wm_multiplier: 1 + cell_size: 256 + topo-any: + 50000_300m: + pkts_num_leak_out: 32 + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 58576 + pkts_num_trig_ingr_drp: 58816 + pkts_num_margin: 50 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 58576 + pkts_num_trig_ingr_drp: 58816 + pkts_num_margin: 50 + hdrm_pool_size: + dscps: [3, 4] + ecn: 1 + pgs: [3, 4] + src_port_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9] + dst_port_id: 10 + pgs_num: 18 + pkts_num_trig_pfc: 4478 + pkts_num_hdrm_full: 240 + pkts_num_hdrm_partial: 182 + wm_pg_headroom: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 58276 + pkts_num_trig_ingr_drp: 58516 cell_size: 256 pkts_num_margin: 2 xon_1: dscp: 3 ecn: 1 pg: 3 - pkts_num_trig_pfc: 13012 + pkts_num_trig_pfc: 58276 pkts_num_dismiss_pfc: 18 xon_2: dscp: 4 ecn: 1 pg: 4 - pkts_num_trig_pfc: 13012 + pkts_num_trig_pfc: 58276 pkts_num_dismiss_pfc: 18 lossy_queue_1: dscp: 8 ecn: 1 pg: 0 - pkts_num_trig_egr_drp: 31304 + pkts_num_trig_egr_drp: 112302 wm_pg_shared_lossless: dscp: 3 ecn: 1 pg: 3 pkts_num_fill_min: 18 - pkts_num_trig_pfc: 13012 + pkts_num_trig_pfc: 58276 packet_size: 64 cell_size: 256 wm_pg_shared_lossy: @@ -865,7 +1349,7 @@ qos_params: ecn: 1 pg: 0 pkts_num_fill_min: 0 - pkts_num_trig_egr_drp: 31304 + pkts_num_trig_egr_drp: 112302 packet_size: 64 cell_size: 256 wm_q_shared_lossless: @@ -873,7 +1357,7 @@ qos_params: ecn: 1 queue: 3 pkts_num_fill_min: 0 - pkts_num_trig_ingr_drp: 13252 + pkts_num_trig_ingr_drp: 58516 cell_size: 256 wm_buf_pool_lossless: dscp: 3 @@ -881,8 +1365,8 @@ qos_params: pg: 3 queue: 3 pkts_num_fill_ingr_min: 6 - pkts_num_trig_pfc: 13012 - pkts_num_trig_ingr_drp: 13252 + pkts_num_trig_pfc: 58276 + pkts_num_trig_ingr_drp: 58516 pkts_num_fill_egr_min: 8 cell_size: 256 wm_q_shared_lossy: @@ -890,7 +1374,7 @@ qos_params: ecn: 1 queue: 0 pkts_num_fill_min: 7 - pkts_num_trig_egr_drp: 31304 + pkts_num_trig_egr_drp: 58516 cell_size: 256 wm_buf_pool_lossy: dscp: 8 @@ -898,7 +1382,7 @@ qos_params: pg: 0 queue: 0 pkts_num_fill_ingr_min: 0 - pkts_num_trig_egr_drp: 31304 + pkts_num_trig_egr_drp: 58516 pkts_num_fill_egr_min: 14 cell_size: 256 100000_300m: @@ -907,15 +1391,15 @@ qos_params: dscp: 3 ecn: 1 pg: 3 - pkts_num_trig_pfc: 13283 - pkts_num_trig_ingr_drp: 13645 + pkts_num_trig_pfc: 59784 + pkts_num_trig_ingr_drp: 60410 pkts_num_margin: 4 xoff_2: dscp: 4 ecn: 1 pg: 4 - pkts_num_trig_pfc: 13283 - pkts_num_trig_ingr_drp: 13645 + pkts_num_trig_pfc: 59784 + pkts_num_trig_ingr_drp: 60410 pkts_num_margin: 4 hdrm_pool_size: dscps: [3, 4] @@ -924,40 +1408,40 @@ qos_params: src_port_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9] dst_port_id: 10 pgs_num: 18 - pkts_num_trig_pfc: 4610 + pkts_num_trig_pfc: 6301 pkts_num_hdrm_full: 362 - pkts_num_hdrm_partial: 182 + pkts_num_hdrm_partial: 182 wm_pg_headroom: dscp: 3 ecn: 1 pg: 3 - pkts_num_trig_pfc: 13283 - pkts_num_trig_ingr_drp: 13645 + pkts_num_trig_pfc: 59714 + pkts_num_trig_ingr_drp: 60076 cell_size: 256 pkts_num_margin: 2 xon_1: dscp: 3 ecn: 1 pg: 3 - pkts_num_trig_pfc: 13283 + pkts_num_trig_pfc: 59714 pkts_num_dismiss_pfc: 18 xon_2: dscp: 4 ecn: 1 pg: 4 - pkts_num_trig_pfc: 13283 + pkts_num_trig_pfc: 59714 pkts_num_dismiss_pfc: 18 lossy_queue_1: dscp: 8 ecn: 1 pg: 0 - pkts_num_trig_egr_drp: 31854 + pkts_num_trig_egr_drp: 84935 wm_pg_shared_lossless: dscp: 3 ecn: 1 pg: 3 pkts_num_fill_min: 18 - pkts_num_trig_pfc: 13283 + pkts_num_trig_pfc: 59714 packet_size: 64 cell_size: 256 wm_pg_shared_lossy: @@ -965,7 +1449,7 @@ qos_params: ecn: 1 pg: 0 pkts_num_fill_min: 0 - pkts_num_trig_egr_drp: 31854 + pkts_num_trig_egr_drp: 84935 packet_size: 64 cell_size: 256 wm_q_shared_lossless: @@ -973,7 +1457,7 @@ qos_params: ecn: 1 queue: 3 pkts_num_fill_min: 0 - pkts_num_trig_ingr_drp: 13645 + pkts_num_trig_ingr_drp: 60076 cell_size: 256 wm_buf_pool_lossless: dscp: 3 @@ -981,8 +1465,8 @@ qos_params: pg: 3 queue: 3 pkts_num_fill_ingr_min: 6 - pkts_num_trig_pfc: 13283 - pkts_num_trig_ingr_drp: 13645 + pkts_num_trig_pfc: 59714 + pkts_num_trig_ingr_drp: 60076 pkts_num_fill_egr_min: 8 cell_size: 256 wm_q_shared_lossy: @@ -990,7 +1474,7 @@ qos_params: ecn: 1 queue: 0 pkts_num_fill_min: 7 - pkts_num_trig_egr_drp: 31854 + pkts_num_trig_egr_drp: 84935 cell_size: 256 wm_buf_pool_lossy: dscp: 8 @@ -998,7 +1482,7 @@ qos_params: pg: 0 queue: 0 pkts_num_fill_ingr_min: 0 - pkts_num_trig_egr_drp: 31854 + pkts_num_trig_egr_drp: 84930 pkts_num_fill_egr_min: 14 cell_size: 256 ecn_1: @@ -1317,14 +1801,14 @@ qos_params: dscp: 3 ecn: 1 pg: 3 - pkts_num_trig_pfc: 4490 - pkts_num_trig_ingr_drp: 4978 + pkts_num_trig_pfc: 19994 + pkts_num_trig_ingr_drp: 20481 xoff_2: dscp: 4 ecn: 1 pg: 4 - pkts_num_trig_pfc: 4490 - pkts_num_trig_ingr_drp: 4978 + pkts_num_trig_pfc: 19994 + pkts_num_trig_ingr_drp: 20481 wm_pg_headroom: dscp: 3 ecn: 1 @@ -1443,6 +1927,7 @@ qos_params: q4_num_of_pkts: 150 q5_num_of_pkts: 140 q6_num_of_pkts: 140 + q7_num_of_pkts: 140 limit: 80 wrr_chg: ecn: 1 @@ -1453,6 +1938,7 @@ qos_params: q4_num_of_pkts: 300 q5_num_of_pkts: 80 q6_num_of_pkts: 80 + q7_num_of_pkts: 80 limit: 80 lossy_weight: 8 lossless_weight: 30 @@ -1545,14 +2031,14 @@ qos_params: dscp: 3 ecn: 1 pg: 3 - pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 5140 + pkts_num_trig_pfc: 20034 + pkts_num_trig_ingr_drp: 20521 xoff_2: dscp: 4 ecn: 1 pg: 4 - pkts_num_trig_pfc: 4457 - pkts_num_trig_ingr_drp: 5140 + pkts_num_trig_pfc: 20034 + pkts_num_trig_ingr_drp: 20521 hdrm_pool_size: dscps: [3, 4] ecn: 1 @@ -1641,6 +2127,7 @@ qos_params: q4_num_of_pkts: 150 q5_num_of_pkts: 140 q6_num_of_pkts: 140 + q7_num_of_pkts: 140 limit: 80 wrr_chg: ecn: 1 @@ -1651,6 +2138,7 @@ qos_params: q4_num_of_pkts: 300 q5_num_of_pkts: 80 q6_num_of_pkts: 80 + q7_num_of_pkts: 80 limit: 80 lossy_weight: 8 lossless_weight: 30 @@ -1697,15 +2185,15 @@ qos_params: dscp: 3 ecn: 1 pg: 3 - pkts_num_trig_pfc: 21898 - pkts_num_trig_ingr_drp: 22360 + pkts_num_trig_pfc: 22026 + pkts_num_trig_ingr_drp: 22488 pkts_num_margin: 4 xoff_2: dscp: 4 ecn: 1 pg: 4 - pkts_num_trig_pfc: 21898 - pkts_num_trig_ingr_drp: 22360 + pkts_num_trig_pfc: 22026 + pkts_num_trig_ingr_drp: 22488 pkts_num_margin: 4 hdrm_pool_size: dscps: [3, 4] @@ -1721,54 +2209,54 @@ qos_params: dscp: 3 ecn: 1 pg: 3 - pkts_num_trig_pfc: 21898 - pkts_num_trig_ingr_drp: 22360 + pkts_num_trig_pfc: 22026 + pkts_num_trig_ingr_drp: 22488 cell_size: 254 pkts_num_margin: 8 xon_1: dscp: 3 ecn: 1 pg: 3 - pkts_num_trig_pfc: 21898 + pkts_num_trig_pfc: 22026 pkts_num_dismiss_pfc: 13 pkts_num_margin: 4 xon_2: dscp: 4 ecn: 1 pg: 4 - pkts_num_trig_pfc: 21898 + pkts_num_trig_pfc: 22026 pkts_num_dismiss_pfc: 13 pkts_num_margin: 4 lossy_queue_1: dscp: 8 ecn: 1 pg: 0 - pkts_num_trig_egr_drp: 72967 + pkts_num_trig_egr_drp: 73394 pkts_num_margin: 11 wm_pg_shared_lossless: dscp: 3 ecn: 1 pg: 3 pkts_num_fill_min: 10 - pkts_num_trig_pfc: 21898 + pkts_num_trig_pfc: 22026 packet_size: 64 cell_size: 254 - pkts_num_margin: 2 + pkts_num_margin: 1 wm_pg_shared_lossy: dscp: 8 ecn: 1 pg: 0 pkts_num_fill_min: 7 - pkts_num_trig_egr_drp: 72967 + pkts_num_trig_egr_drp: 73394 packet_size: 64 cell_size: 254 - pkts_num_margin: 2 + pkts_num_margin: 10 wm_q_shared_lossless: dscp: 3 ecn: 1 queue: 3 pkts_num_fill_min: 0 - pkts_num_trig_ingr_drp: 22360 + pkts_num_trig_ingr_drp: 22488 cell_size: 254 wm_buf_pool_lossless: dscp: 3 @@ -1776,8 +2264,8 @@ qos_params: pg: 3 queue: 3 pkts_num_fill_ingr_min: 7 - pkts_num_trig_pfc: 21898 - pkts_num_trig_ingr_drp: 22360 + pkts_num_trig_pfc: 22026 + pkts_num_trig_ingr_drp: 22488 pkts_num_fill_egr_min: 8 cell_size: 254 wm_q_shared_lossy: @@ -1785,7 +2273,7 @@ qos_params: ecn: 1 queue: 0 pkts_num_fill_min: 7 - pkts_num_trig_egr_drp: 72967 + pkts_num_trig_egr_drp: 73394 cell_size: 254 wm_buf_pool_lossy: dscp: 8 @@ -1793,7 +2281,7 @@ qos_params: pg: 0 queue: 0 pkts_num_fill_ingr_min: 0 - pkts_num_trig_egr_drp: 72967 + pkts_num_trig_egr_drp: 73394 pkts_num_fill_egr_min: 7 cell_size: 254 ecn_1: @@ -1833,6 +2321,7 @@ qos_params: q4_num_of_pkts: 150 q5_num_of_pkts: 140 q6_num_of_pkts: 140 + q7_num_of_pkts: 140 limit: 80 wrr_chg: ecn: 1 @@ -1843,6 +2332,7 @@ qos_params: q4_num_of_pkts: 300 q5_num_of_pkts: 80 q6_num_of_pkts: 80 + q7_num_of_pkts: 80 limit: 80 lossy_weight: 8 lossless_weight: 30 @@ -1915,7 +2405,7 @@ qos_params: dscp: 8 ecn: 1 pg: 0 - pkts_num_fill_min: 0 + pkts_num_fill_min: 7 pkts_num_trig_egr_drp: 50482 packet_size: 64 cell_size: 254 @@ -1990,6 +2480,7 @@ qos_params: q4_num_of_pkts: 150 q5_num_of_pkts: 140 q6_num_of_pkts: 140 + q7_num_of_pkts: 140 limit: 80 wrr_chg: ecn: 1 @@ -2000,8 +2491,542 @@ qos_params: q4_num_of_pkts: 300 q5_num_of_pkts: 80 q6_num_of_pkts: 80 + q7_num_of_pkts: 80 limit: 80 lossy_weight: 8 lossless_weight: 30 hdrm_pool_wm_multiplier: 1 cell_size: 254 + gb: + topo-any: + wm_pg_shared_lossless: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 14804 + pkts_num_fill_min: 0 + pkts_num_margin: 4 + packet_size: 1350 + cell_size: 384 + wm_pg_shared_lossy: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_trig_egr_drp: 16000 + pkts_num_fill_min: 0 + pkts_num_margin: 4 + packet_size: 1350 + cell_size: 384 + 100000_300m: + pkts_num_leak_out: 0 + pg_drop: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_trig_pfc: 13653 + pkts_num_trig_ingr_drp: 14819 + pkts_num_margin: 375 + iterations: 100 + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 3415 + pkts_num_trig_ingr_drp: 3704 + packet_size: 1350 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 3415 + pkts_num_trig_ingr_drp: 3704 + packet_size: 1350 + xon_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 3414 + pkts_num_dismiss_pfc: 2 + packet_size: 1350 + xon_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 3414 + pkts_num_dismiss_pfc: 2 + packet_size: 1350 + lossless_voq_1: + dscp: 3 + ecn: 1 + pg: 3 + src_port_1_id: 34 + src_port_2_id: 36 + dst_port_id: 37 + num_of_flows: 'multiple' + pkts_num_trig_pfc: 3415 + pkts_num_margin: 4 + packet_size: 1350 + lossless_voq_2: + dscp: 4 + ecn: 1 + pg: 4 + src_port_1_id: 34 + src_port_2_id: 36 + dst_port_id: 37 + num_of_flows: 'multiple' + pkts_num_trig_pfc: 3415 + pkts_num_margin: 4 + packet_size: 1350 + lossless_voq_3: + dscp: 3 + ecn: 1 + pg: 3 + src_port_1_id: 34 + src_port_2_id: 36 + dst_port_id: 37 + num_of_flows: 'single' + pkts_num_trig_pfc: 3415 + pkts_num_margin: 4 + packet_size: 1350 + lossless_voq_4: + dscp: 4 + ecn: 1 + pg: 4 + src_port_1_id: 34 + src_port_2_id: 36 + dst_port_id: 37 + num_of_flows: 'single' + pkts_num_trig_pfc: 3415 + pkts_num_margin: 4 + packet_size: 1350 + lossy_queue_1: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_trig_egr_drp: 16000 + pkts_num_margin: 4 + packet_size: 1350 + cell_size: 384 + lossy_queue_voq_1: + dscp: 8 + ecn: 1 + pg: 0 + src_port_id: 34 + dst_port_id: 36 + pkts_num_trig_egr_drp: 16000 + pkts_num_margin: 4 + packet_size: 64 + cell_size: 384 + lossy_queue_voq_2: + dscp: 8 + ecn: 1 + pg: 0 + src_port_id: 34 + dst_port_id: 36 + pkts_num_trig_egr_drp: 8000 + pkts_num_margin: 4 + packet_size: 64 + cell_size: 384 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_fill_min: 0 + pkts_num_trig_ingr_drp: 14819 + pkts_num_margin: 3072 + cell_size: 384 + wm_q_shared_lossy: + dscp: 8 + ecn: 1 + queue: 0 + pkts_num_fill_min: 0 + pkts_num_trig_egr_drp: 16000 + pkts_num_margin: 3072 + cell_size: 384 + shared_res_size_1: + dscps: [8, 8, 8, 8, 8, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4] + ecn: 1 + pgs: [0, 0, 0, 0, 0, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4] + queues: [0, 0, 0, 0, 0, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4] + src_port_ids: [0, 4, 16, 20, 34, 0, 0, 4, 4, 16, 16, 20, 20, 34, 34, 36, 36] + dst_port_ids: [37, 38, 39, 42, 44, 37, 37, 38, 38, 39, 39, 42, 42, 44, 44, 45, 45] + pkt_counts: [3413, 3413, 3413, 3413, 3413, 2389, 2389, 2389, 1526, 1526, 1392, 415, 415, 415, 415, 42, 1] + packet_size: 1350 + cell_size: 384 + pkts_num_margin: 1 + shared_limit_bytes: 46661760 + shared_res_size_2: + dscps: [3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3] + ecn: 1 + pgs: [3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3] + queues: [3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3] + src_port_ids: [0, 0, 4, 4, 16, 16, 20, 20, 34, 34, 36, 36, 37] + dst_port_ids: [38, 38, 39, 39, 42, 42, 44, 44, 45, 45, 46, 46, 47] + pkt_counts: [3527, 3527, 3527, 3527, 3527, 3527, 1798, 1798, 846, 687, 687, 328, 1] + packet_size: 1350 + cell_size: 384 + pkts_num_margin: 1 + shared_limit_bytes: 41943552 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_fill_ingr_min: 0 + pkts_num_trig_pfc: 3703 + cell_size: 384 + packet_size: 1350 + wm_buf_pool_lossy: + dscp: 8 + ecn: 1 + pg: 0 + queue: 0 + pkts_num_trig_egr_drp: 4000 + pkts_num_fill_egr_min: 0 + cell_size: 384 + packet_size: 1350 + 100000_40m: + pkts_num_leak_out: 0 + pg_drop: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_trig_pfc: 13653 + pkts_num_trig_ingr_drp: 14819 + pkts_num_margin: 375 + iterations: 100 + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 3415 + pkts_num_trig_ingr_drp: 3704 + packet_size: 1350 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 3415 + pkts_num_trig_ingr_drp: 3704 + packet_size: 1350 + xon_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 3414 + pkts_num_dismiss_pfc: 2 + packet_size: 1350 + xon_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 3414 + pkts_num_dismiss_pfc: 2 + packet_size: 1350 + lossless_voq_1: + dscp: 3 + ecn: 1 + pg: 3 + src_port_1_id: 34 + src_port_2_id: 36 + dst_port_id: 37 + num_of_flows: 'multiple' + pkts_num_trig_pfc: 3415 + pkts_num_margin: 4 + packet_size: 1350 + lossless_voq_2: + dscp: 4 + ecn: 1 + pg: 4 + src_port_1_id: 34 + src_port_2_id: 36 + dst_port_id: 37 + num_of_flows: 'multiple' + pkts_num_trig_pfc: 3415 + pkts_num_margin: 4 + packet_size: 1350 + lossless_voq_3: + dscp: 3 + ecn: 1 + pg: 3 + src_port_1_id: 34 + src_port_2_id: 36 + dst_port_id: 37 + num_of_flows: 'single' + pkts_num_trig_pfc: 3415 + pkts_num_margin: 4 + packet_size: 1350 + lossless_voq_4: + dscp: 4 + ecn: 1 + pg: 4 + src_port_1_id: 34 + src_port_2_id: 36 + dst_port_id: 37 + num_of_flows: 'single' + pkts_num_trig_pfc: 3415 + pkts_num_margin: 4 + packet_size: 1350 + lossy_queue_1: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_trig_egr_drp: 16000 + pkts_num_margin: 4 + packet_size: 1350 + cell_size: 384 + lossy_queue_voq_1: + dscp: 8 + ecn: 1 + pg: 0 + src_port_id: 34 + dst_port_id: 36 + pkts_num_trig_egr_drp: 16000 + pkts_num_margin: 4 + packet_size: 64 + cell_size: 384 + lossy_queue_voq_2: + dscp: 8 + ecn: 1 + pg: 0 + src_port_id: 34 + dst_port_id: 36 + pkts_num_trig_egr_drp: 8000 + pkts_num_margin: 4 + packet_size: 64 + cell_size: 384 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_fill_min: 0 + pkts_num_trig_ingr_drp: 14819 + pkts_num_margin: 3072 + cell_size: 384 + wm_q_shared_lossy: + dscp: 8 + ecn: 1 + queue: 0 + pkts_num_fill_min: 0 + pkts_num_trig_egr_drp: 16000 + pkts_num_margin: 3072 + cell_size: 384 + shared_res_size_1: + dscps: [8, 8, 8, 8, 8, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4] + ecn: 1 + pgs: [0, 0, 0, 0, 0, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4] + queues: [0, 0, 0, 0, 0, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4] + src_port_ids: [0, 4, 16, 20, 34, 0, 0, 4, 4, 16, 16, 20, 20, 34, 34, 36, 36] + dst_port_ids: [37, 38, 39, 42, 44, 37, 37, 38, 38, 39, 39, 42, 42, 44, 44, 45, 45] + pkt_counts: [3413, 3413, 3413, 3413, 3413, 2389, 2389, 2389, 1526, 1526, 1392, 415, 415, 415, 415, 42, 1] + packet_size: 1350 + cell_size: 384 + pkts_num_margin: 1 + shared_limit_bytes: 46661760 + shared_res_size_2: + dscps: [3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3] + ecn: 1 + pgs: [3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3] + queues: [3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3] + src_port_ids: [0, 0, 4, 4, 16, 16, 20, 20, 34, 34, 36, 36, 37] + dst_port_ids: [38, 38, 39, 39, 42, 42, 44, 44, 45, 45, 46, 46, 47] + pkt_counts: [3527, 3527, 3527, 3527, 3527, 3527, 1798, 1798, 846, 687, 687, 328, 1] + packet_size: 1350 + cell_size: 384 + pkts_num_margin: 1 + shared_limit_bytes: 41943552 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_fill_ingr_min: 0 + pkts_num_trig_pfc: 3703 + cell_size: 384 + packet_size: 1350 + 100000_5m: + pkts_num_leak_out: 0 + pg_drop: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_trig_pfc: 13653 + pkts_num_trig_ingr_drp: 14819 + pkts_num_margin: 375 + iterations: 100 + xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 3415 + pkts_num_trig_ingr_drp: 3704 + packet_size: 1350 + xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 3415 + pkts_num_trig_ingr_drp: 3704 + packet_size: 1350 + xon_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 3414 + pkts_num_dismiss_pfc: 2 + packet_size: 1350 + xon_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 3414 + pkts_num_dismiss_pfc: 2 + packet_size: 1350 + lossless_voq_1: + dscp: 3 + ecn: 1 + pg: 3 + src_port_1_id: 34 + src_port_2_id: 36 + dst_port_id: 37 + num_of_flows: 'multiple' + pkts_num_trig_pfc: 3415 + pkts_num_margin: 4 + packet_size: 1350 + lossless_voq_2: + dscp: 4 + ecn: 1 + pg: 4 + src_port_1_id: 34 + src_port_2_id: 36 + dst_port_id: 37 + num_of_flows: 'multiple' + pkts_num_trig_pfc: 3415 + pkts_num_margin: 4 + packet_size: 1350 + lossless_voq_3: + dscp: 3 + ecn: 1 + pg: 3 + src_port_1_id: 34 + src_port_2_id: 36 + dst_port_id: 37 + num_of_flows: 'single' + pkts_num_trig_pfc: 3415 + pkts_num_margin: 4 + packet_size: 1350 + lossless_voq_4: + dscp: 4 + ecn: 1 + pg: 4 + src_port_1_id: 34 + src_port_2_id: 36 + dst_port_id: 37 + num_of_flows: 'single' + pkts_num_trig_pfc: 3415 + pkts_num_margin: 4 + packet_size: 1350 + lossy_queue_1: + dscp: 8 + ecn: 1 + pg: 0 + pkts_num_trig_egr_drp: 16000 + pkts_num_margin: 4 + packet_size: 1350 + cell_size: 384 + lossy_queue_voq_1: + dscp: 8 + ecn: 1 + pg: 0 + src_port_id: 34 + dst_port_id: 36 + pkts_num_trig_egr_drp: 16000 + pkts_num_margin: 4 + packet_size: 64 + cell_size: 384 + lossy_queue_voq_2: + dscp: 8 + ecn: 1 + pg: 0 + src_port_id: 34 + dst_port_id: 36 + pkts_num_trig_egr_drp: 8000 + pkts_num_margin: 4 + packet_size: 64 + cell_size: 384 + wm_q_shared_lossless: + dscp: 3 + ecn: 1 + queue: 3 + pkts_num_fill_min: 0 + pkts_num_trig_ingr_drp: 14819 + pkts_num_margin: 3072 + cell_size: 384 + wm_q_shared_lossy: + dscp: 8 + ecn: 1 + queue: 0 + pkts_num_fill_min: 0 + pkts_num_trig_egr_drp: 16000 + pkts_num_margin: 3072 + cell_size: 384 + shared_res_size_1: + dscps: [8, 8, 8, 8, 8, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4] + ecn: 1 + pgs: [0, 0, 0, 0, 0, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4] + queues: [0, 0, 0, 0, 0, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4] + src_port_ids: [0, 4, 16, 20, 34, 0, 0, 4, 4, 16, 16, 20, 20, 34, 34, 36, 36] + dst_port_ids: [37, 38, 39, 42, 44, 37, 37, 38, 38, 39, 39, 42, 42, 44, 44, 45, 45] + pkt_counts: [3413, 3413, 3413, 3413, 3413, 2389, 2389, 2389, 1526, 1526, 1392, 415, 415, 415, 415, 42, 1] + packet_size: 1350 + cell_size: 384 + pkts_num_margin: 1 + shared_limit_bytes: 46661760 + shared_res_size_2: + dscps: [3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3] + ecn: 1 + pgs: [3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3] + queues: [3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3] + src_port_ids: [0, 0, 4, 4, 16, 16, 20, 20, 34, 34, 36, 36, 37] + dst_port_ids: [38, 38, 39, 39, 42, 42, 44, 44, 45, 45, 46, 46, 47] + pkt_counts: [3527, 3527, 3527, 3527, 3527, 3527, 1798, 1798, 846, 687, 687, 328, 1] + packet_size: 1350 + cell_size: 384 + pkts_num_margin: 1 + shared_limit_bytes: 41943552 + wm_buf_pool_lossless: + dscp: 3 + ecn: 1 + pg: 3 + queue: 3 + pkts_num_fill_ingr_min: 0 + pkts_num_trig_pfc: 3703 + cell_size: 384 + packet_size: 1350 + wrr: + ecn: 1 + q0_num_of_pkts: 70 + q1_num_of_pkts: 70 + q2_num_of_pkts: 70 + q3_num_of_pkts: 75 + q4_num_of_pkts: 75 + q5_num_of_pkts: 70 + q6_num_of_pkts: 70 + limit: 80 + wrr_chg: + ecn: 1 + q0_num_of_pkts: 40 + q1_num_of_pkts: 40 + q2_num_of_pkts: 40 + q3_num_of_pkts: 150 + q4_num_of_pkts: 150 + q5_num_of_pkts: 40 + q6_num_of_pkts: 40 + limit: 80 + lossy_weight: 8 + lossless_weight: 30 + hdrm_pool_wm_multiplier: 1 + cell_size: 384 diff --git a/tests/qos/files/tunnel_qos_map.json b/tests/qos/files/tunnel_qos_map.json new file mode 100644 index 00000000000..f75ce843b51 --- /dev/null +++ b/tests/qos/files/tunnel_qos_map.json @@ -0,0 +1,186 @@ +{ + "DSCP_TO_TC_MAP": { + "AZURE": { + "0": "1", + "1": "1", + "10": "1", + "11": "1", + "12": "1", + "13": "1", + "14": "1", + "15": "1", + "16": "1", + "17": "1", + "18": "1", + "19": "1", + "2": "1", + "20": "1", + "21": "1", + "22": "1", + "23": "1", + "24": "1", + "25": "1", + "26": "1", + "27": "1", + "28": "1", + "29": "1", + "3": "3", + "30": "1", + "31": "1", + "32": "1", + "33": "8", + "34": "1", + "35": "1", + "36": "1", + "37": "1", + "38": "1", + "39": "1", + "4": "4", + "40": "1", + "41": "1", + "42": "1", + "43": "1", + "44": "1", + "45": "1", + "46": "5", + "47": "1", + "48": "7", + "49": "1", + "5": "1", + "50": "1", + "51": "1", + "52": "1", + "53": "1", + "54": "1", + "55": "1", + "56": "1", + "57": "1", + "58": "1", + "59": "1", + "6": "1", + "60": "1", + "61": "1", + "62": "1", + "63": "1", + "7": "1", + "8": "0", + "9": "1" + }, + "AZURE_TUNNEL": { + "0": "1", + "1": "1", + "2": "1", + "3": "3", + "4": "4", + "5": "1", + "6": "1", + "7": "1", + "8": "0", + "9": "1", + "10": "1", + "11": "1", + "12": "1", + "13": "1", + "14": "1", + "15": "1", + "16": "1", + "17": "1", + "18": "1", + "19": "1", + "20": "1", + "21": "1", + "22": "1", + "23": "1", + "24": "1", + "25": "1", + "26": "1", + "27": "1", + "28": "1", + "29": "1", + "30": "1", + "31": "1", + "32": "1", + "33": "8", + "34": "1", + "35": "1", + "36": "1", + "37": "1", + "38": "1", + "39": "1", + "40": "1", + "41": "1", + "42": "1", + "43": "1", + "44": "1", + "45": "1", + "46": "5", + "47": "1", + "48": "7", + "49": "1", + "50": "1", + "51": "1", + "52": "1", + "53": "1", + "54": "1", + "55": "1", + "56": "1", + "57": "1", + "58": "1", + "59": "1", + "60": "1", + "61": "1", + "62": "1", + "63": "1" + } + }, + "TC_TO_PRIORITY_GROUP_MAP": { + "AZURE_TUNNEL": { + "0": "0", + "1": "0", + "2": "0", + "3": "2", + "4": "6", + "5": "0", + "6": "0", + "7": "0", + "8": "0" + } + }, + "TC_TO_DSCP_MAP": { + "AZURE_TUNNEL": { + "0": "8", + "1": "0", + "2": "0", + "3": "2", + "4": "6", + "5": "46", + "6": "0", + "7": "48", + "8": "33" + } + }, + "TC_TO_QUEUE_MAP": { + "AZURE": { + "0": "0", + "1": "1", + "2": "1", + "3": "3", + "4": "4", + "5": "5", + "6": "1", + "7": "7", + "8": "1" + }, + "AZURE_TUNNEL": { + "0": "0", + "1": "1", + "2": "1", + "3": "2", + "4": "6", + "5": "5", + "6": "1", + "7": "7", + "8": "1" + } + } +} diff --git a/tests/qos/qos_helpers.py b/tests/qos/qos_helpers.py index bf77096f3fb..8003e36d6d9 100644 --- a/tests/qos/qos_helpers.py +++ b/tests/qos/qos_helpers.py @@ -27,13 +27,33 @@ def ansible_stdout_to_str(ansible_stdout): result += x.encode('UTF8') return result -def eos_to_linux_intf(eos_intf_name): +def eos_to_linux_intf(eos_intf_name, hwsku=None): """ @Summary: Map EOS's interface name to Linux's interface name @param eos_intf_name: Interface name in EOS @return: Return the interface name in Linux """ - return eos_intf_name.replace('Ethernet', 'et').replace('/', '_') + if hwsku == "MLNX-OS": + linux_intf_name = eos_intf_name.replace("ernet 1/", "sl1p").replace("/", "sp") + else: + linux_intf_name = eos_intf_name.replace('Ethernet', 'et').replace('/', '_') + return linux_intf_name + +def nxos_to_linux_intf(nxos_intf_name): + """ + @Summary: Map NxOS's interface name to Linux's interface name + @param nxos_intf_name: Interface name in NXOS + @return: Return the interface name in Linux + """ + return nxos_intf_name.replace('Ethernet', 'Eth').replace('/', '-') + +def sonic_to_linux_intf(sonic_intf_name): + """ + @Summary: Map SONiC's interface name to Linux's interface name + @param sonic_intf_name: Interface name in SONiC + @return: Return the interface name in Linux + """ + return sonic_intf_name def get_phy_intfs(host_ans): """ diff --git a/tests/qos/qos_sai_base.py b/tests/qos/qos_sai_base.py index a757d4b069b..3584d7701c0 100644 --- a/tests/qos/qos_sai_base.py +++ b/tests/qos/qos_sai_base.py @@ -8,11 +8,12 @@ from tests.common.fixtures.ptfhost_utils import ptf_portmap_file # lgtm[py/unused-import] from tests.common.helpers.assertions import pytest_assert, pytest_require from tests.common.mellanox_data import is_mellanox_device as isMellanoxDevice -from tests.common.utilities import wait_until -from tests.common.dualtor.dual_tor_utils import upper_tor_host,lower_tor_host -from tests.common.dualtor.mux_simulator_control import mux_server_url, toggle_all_simulator_ports +from tests.common.dualtor.dual_tor_utils import upper_tor_host,lower_tor_host,dualtor_ports +from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports, get_mux_status, check_mux_status, validate_check_result from tests.common.dualtor.constants import UPPER_TOR, LOWER_TOR from tests.common.utilities import check_qos_db_fv_reference_with_table +from tests.common.fixtures.duthost_utils import dut_qos_maps, separated_dscp_to_tc_map_on_uplink +from tests.common.utilities import wait_until logger = logging.getLogger(__name__) @@ -20,10 +21,10 @@ class QosBase: """ Common APIs """ - SUPPORTED_T0_TOPOS = ["t0", "t0-64", "t0-116", "t0-35", "dualtor-56", "dualtor", "t0-80", "t0-backend"] + SUPPORTED_T0_TOPOS = ["t0", "t0-64", "t0-116", "t0-35", "dualtor-56", "dualtor-120", "dualtor", "t0-80", "t0-backend"] SUPPORTED_T1_TOPOS = ["t1-lag", "t1-64-lag", "t1-backend"] SUPPORTED_PTF_TOPOS = ['ptf32', 'ptf64'] - SUPPORTED_ASIC_LIST = ["td2", "th", "th2", "spc1", "spc2", "spc3", "td3", "th3"] + SUPPORTED_ASIC_LIST = ["gb", "td2", "th", "th2", "spc1", "spc2", "spc3", "td3", "th3"] TARGET_QUEUE_WRED = 3 TARGET_LOSSY_QUEUE_SCHED = 0 @@ -52,33 +53,31 @@ def isBufferInApplDb(self, dut_asic): return self.buffer_model @pytest.fixture(scope='class', autouse=True) - def dutTestParams(self, duthosts, rand_one_dut_hostname, tbinfo, ptf_portmap_file): + def dutTestParams(self, dut_test_params, tbinfo): """ Prepares DUT host test params - - Args: - duthost (AnsibleHost): Device Under Test (DUT) - tbinfo (Fixture, dict): Map containing testbed information - ptfPortMapFile (Fxiture, str): filename residing on PTF host and contains port maps information - Returns: dutTestParams (dict): DUT host test params """ - duthost = duthosts[rand_one_dut_hostname] - mgFacts = duthost.get_extended_minigraph_facts(tbinfo) - topo = tbinfo["topo"]["name"] - - yield { - "topo": topo, - "hwsku": mgFacts["minigraph_hwsku"], - "basicParams": { - "router_mac": '' if topo in self.SUPPORTED_T0_TOPOS else duthost.facts["router_mac"], - "server": duthost.host.options['inventory_manager'].get_host(duthost.hostname).vars['ansible_host'], - "port_map_file": ptf_portmap_file, - "sonic_asic_type": duthost.facts['asic_type'], - "sonic_version": duthost.os_version - } - } + # update router mac + if dut_test_params["topo"] in self.SUPPORTED_T0_TOPOS: + dut_test_params["basicParams"]["router_mac"] = '' + + # For dualtor qos test scenario, DMAC of test traffic is default vlan interface's MAC address. + # To reduce duplicated code, put "is_dualtor" and "def_vlan_mac" into dutTestParams['basicParams']. + if "dualtor" in tbinfo["topo"]["name"]: + dut_test_params["basicParams"]["is_dualtor"] = True + vlan_cfgs = tbinfo['topo']['properties']['topology']['DUT']['vlan_configs'] + if vlan_cfgs and 'default_vlan_config' in vlan_cfgs: + default_vlan_name = vlan_cfgs['default_vlan_config'] + if default_vlan_name: + for vlan in vlan_cfgs[default_vlan_name].values(): + if 'mac' in vlan and vlan['mac']: + dut_test_params["basicParams"]["def_vlan_mac"] = vlan['mac'] + break + pytest_assert(dut_test_params["basicParams"]["def_vlan_mac"] is not None, "Dual-TOR miss default VLAN MAC address") + + yield dut_test_params def runPtfTest(self, ptfhost, testCase='', testParams={}): """ @@ -95,7 +94,8 @@ def runPtfTest(self, ptfhost, testCase='', testParams={}): Raises: RunAnsibleModuleFail if ptf test fails """ - pytest_assert(ptfhost.shell( + try: + pytest_assert(ptfhost.shell( argv = [ "ptf", "--test-dir", @@ -107,6 +107,8 @@ def runPtfTest(self, ptfhost, testCase='', testParams={}): "remote", "-t", ";".join(["{}={}".format(k, repr(v)) for k, v in testParams.items()]), + "--qlen", + "10000", "--disable-ipv6", "--disable-vxlan", "--disable-geneve", @@ -120,6 +122,8 @@ def runPtfTest(self, ptfhost, testCase='', testParams={}): ], chdir = "/root", )["rc"] == 0, "Failed when running test '{0}'".format(testCase)) + except: + raise class QosSaiBase(QosBase): @@ -484,8 +488,8 @@ def __buildTestPorts(self, request, testPortIds, testPortIps, src_port_ids, dst_ @pytest.fixture(scope='class', autouse=True) def dutConfig( - self, request, duthosts, rand_one_dut_hostname, tbinfo, - enum_frontend_asic_index + self, request, duthosts, enum_rand_one_per_hwsku_frontend_hostname, + enum_frontend_asic_index, lower_tor_host, tbinfo, dualtor_ports, dut_qos_maps ): """ Build DUT host config pertaining to QoS SAI tests @@ -498,15 +502,27 @@ def dutConfig( dutConfig (dict): Map of DUT config containing dut interfaces, test port IDs, test port IPs, and test ports """ - duthost = duthosts[rand_one_dut_hostname] + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] + dut_asic = duthost.asic_instance(enum_frontend_asic_index) dutLagInterfaces = [] dutPortIps = {} testPortIps = {} + uplinkPortIds = [] + uplinkPortIps = [] + uplinkPortNames = [] + downlinkPortIds = [] + downlinkPortIps = [] + downlinkPortNames = [] mgFacts = duthost.get_extended_minigraph_facts(tbinfo) topo = tbinfo["topo"]["name"] + dualTorPortIndexes = [] + testPortIds = [] # LAG ports in T1 TOPO need to be removed in Mellanox devices if topo in self.SUPPORTED_T0_TOPOS or isMellanoxDevice(duthost): @@ -532,6 +548,7 @@ def dutConfig( intf_map = mgFacts["minigraph_vlan_sub_interfaces"] else: intf_map = mgFacts["minigraph_interfaces"] + for portConfig in intf_map: intf = portConfig["attachto"].split(".")[0] if ipaddress.ip_interface(portConfig['peer_addr']).ip.version == 4: @@ -541,16 +558,20 @@ def dutConfig( if 'vlan' in portConfig: portIpMap['vlan_id'] = portConfig['vlan'] dutPortIps.update({portIndex: portIpMap}) + if intf in dualtor_ports: + dualTorPortIndexes.append(portIndex) testPortIps = self.__assignTestPortIps(mgFacts) elif topo in self.SUPPORTED_T1_TOPOS: + use_separated_upkink_dscp_tc_map = separated_dscp_to_tc_map_on_uplink(duthost, dut_qos_maps) for iface,addr in dut_asic.get_active_ip_interfaces(tbinfo).items(): vlan_id = None if iface.startswith("Ethernet"): + portName = iface if "." in iface: - iface, vlan_id = iface.split(".") - portIndex = mgFacts["minigraph_ptf_indices"][iface] + portName, vlan_id = iface.split(".") + portIndex = mgFacts["minigraph_ptf_indices"][portName] portIpMap = {'peer_addr': addr["peer_ipv4"]} if vlan_id is not None: portIpMap['vlan_id'] = vlan_id @@ -562,6 +583,18 @@ def dutConfig( portIndex = mgFacts["minigraph_ptf_indices"][portName] portIpMap = {'peer_addr': addr["peer_ipv4"]} dutPortIps.update({portIndex: portIpMap}) + # If the leaf router is using separated DSCP_TO_TC_MAP on uplink/downlink ports. + # we also need to test them separately + if use_separated_upkink_dscp_tc_map: + neighName = mgFacts["minigraph_neighbors"].get(portName, {}).get("name", "").lower() + if 't0' in neighName: + downlinkPortIds.append(portIndex) + downlinkPortIps.append(addr["peer_ipv4"]) + downlinkPortNames.append(portName) + elif 't2' in neighName: + uplinkPortIds.append(portIndex) + uplinkPortIps.append(addr["peer_ipv4"]) + uplinkPortNames.append(portName) testPortIds = sorted(dutPortIps.keys()) else: @@ -607,7 +640,24 @@ def dutConfig( except KeyError: pass + dualTor = request.config.getoption("--qos_dual_tor") + if dualTor: + testPortIds = dualTorPortIndexes + testPorts = self.__buildTestPorts(request, testPortIds, testPortIps, src_port_ids, dst_port_ids) + # Update the uplink/downlink ports to testPorts + testPorts.update({ + "uplink_port_ids": uplinkPortIds, + "uplink_port_ips": uplinkPortIps, + "uplink_port_names": uplinkPortNames, + "downlink_port_ids": downlinkPortIds, + "downlink_port_ips": downlinkPortIps, + "downlink_port_names": downlinkPortNames + }) + dutinterfaces = {} + for port, index in mgFacts["minigraph_ptf_indices"].items(): + if 'Ethernet-Rec' not in port and 'Ethernet-IB' not in port: + dutinterfaces[index] = port yield { "dutInterfaces" : { index: port for port, index in mgFacts["minigraph_ptf_indices"].items() @@ -617,15 +667,21 @@ def dutConfig( "testPorts": testPorts, "qosConfigs": qosConfigs, "dutAsic" : dutAsic, - "dutTopo" : dutTopo + "dutTopo" : dutTopo, + "dutInstance" : duthost, + "dualTor" : request.config.getoption("--qos_dual_tor"), + "dualTorScenario" : len(dualtor_ports) != 0 } @pytest.fixture(scope='class') def ssh_tunnel_to_syncd_rpc( self, duthosts, rand_one_dut_hostname, enum_frontend_asic_index, - swapSyncd + swapSyncd, tbinfo, lower_tor_host ): - duthost = duthosts[rand_one_dut_hostname] + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[rand_one_dut_hostname] dut_asic = duthost.asic_instance(enum_frontend_asic_index) dut_asic.create_ssh_tunnel_sai_rpc() @@ -635,7 +691,7 @@ def ssh_tunnel_to_syncd_rpc( @pytest.fixture(scope='class') def updateIptables( - self, duthosts, rand_one_dut_hostname, enum_frontend_asic_index, swapSyncd + self, duthosts, rand_one_dut_hostname, enum_frontend_asic_index, swapSyncd, tbinfo, lower_tor_host ): """ Update iptables on DUT host with drop rule for BGP SYNC packets @@ -647,7 +703,10 @@ def updateIptables( Returns: None """ - duthost = duthosts[rand_one_dut_hostname] + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[rand_one_dut_hostname] dut_asic = duthost.asic_instance(enum_frontend_asic_index) ipVersions = [{"ip_version": "ipv4"}, {"ip_version": "ipv6"}] @@ -664,10 +723,9 @@ def updateIptables( @pytest.fixture(scope='class') def stopServices( - self, duthosts, rand_one_dut_hostname, enum_frontend_asic_index, - swapSyncd, enable_container_autorestart, disable_container_autorestart, - tbinfo, upper_tor_host, lower_tor_host, toggle_all_simulator_ports - ): + self, duthosts, rand_one_dut_hostname, enum_frontend_asic_index, + swapSyncd, enable_container_autorestart, disable_container_autorestart, get_mux_status, + tbinfo, upper_tor_host, lower_tor_host, toggle_all_simulator_ports): # noqa F811 """ Stop services (lldp-syncs, lldpd, bgpd) on DUT host prior to test start @@ -679,8 +737,8 @@ def stopServices( None """ if 'dualtor' in tbinfo['topo']['name']: - duthost = upper_tor_host - duthost_lower = lower_tor_host + duthost = lower_tor_host + duthost_upper = upper_tor_host else: duthost = duthosts[rand_one_dut_hostname] @@ -703,30 +761,18 @@ def updateDockerService(host, docker="", action="", service=""): docker=docker, action=action, service=service - ) + ), + module_ignore_errors=True ) logger.info("{}ed {}".format(action, service)) - services = [ - {"docker": dut_asic.get_docker_name("lldp"), "service": "lldp-syncd"}, - {"docker": dut_asic.get_docker_name("lldp"), "service": "lldpd"}, - {"docker": dut_asic.get_docker_name("bgp"), "service": "bgpd"}, - {"docker": dut_asic.get_docker_name("bgp"), "service": "bgpmon"}, - ] - - feature_list = ['lldp', 'bgp', 'syncd', 'swss'] - if 'dualtor' in tbinfo['topo']['name']: - disable_container_autorestart(duthost_lower, testcase="test_qos_sai", feature_list=feature_list) - - disable_container_autorestart(duthost, testcase="test_qos_sai", feature_list=feature_list) - for service in services: - updateDockerService(duthost, action="stop", **service) - """ Stop mux container for dual ToR """ if 'dualtor' in tbinfo['topo']['name']: file = "/usr/local/bin/write_standby.py" backup_file = "/usr/local/bin/write_standby.py.bkup" - toggle_all_simulator_ports(UPPER_TOR) + toggle_all_simulator_ports(LOWER_TOR) + check_result = wait_until(120, 10, 10, check_mux_status, duthosts, LOWER_TOR) + validate_check_result(check_result, duthosts, get_mux_status) try: duthost.shell("ls %s" % file) @@ -736,9 +782,26 @@ def updateDockerService(host, docker="", action="", service=""): except: pytest.skip('file {} not found'.format(file)) - duthost_lower.shell('sudo config feature state mux disabled') + duthost_upper.shell('sudo config feature state mux disabled') duthost.shell('sudo config feature state mux disabled') + services = [ + {"docker": dut_asic.get_docker_name("lldp"), "service": "lldp-syncd"}, + {"docker": dut_asic.get_docker_name("lldp"), "service": "lldpd"}, + {"docker": dut_asic.get_docker_name("bgp"), "service": "bgpd"}, + {"docker": dut_asic.get_docker_name("bgp"), "service": "bgpmon"}, + {"docker": dut_asic.get_docker_name("radv"), "service": "radvd"}, + {"docker": dut_asic.get_docker_name("swss"), "service": "arp_update"} + ] + + feature_list = ['lldp', 'bgp', 'syncd', 'swss'] + if 'dualtor' in tbinfo['topo']['name']: + disable_container_autorestart(duthost_upper, testcase="test_qos_sai", feature_list=feature_list) + + disable_container_autorestart(duthost, testcase="test_qos_sai", feature_list=feature_list) + for service in services: + updateDockerService(duthost, action="stop", **service) + yield for service in services: @@ -755,12 +818,12 @@ def updateDockerService(host, docker="", action="", service=""): pytest.skip('file {} not found'.format(backup_file)) duthost.shell('sudo config feature state mux enabled') - duthost_lower.shell('sudo config feature state mux enabled') + duthost_upper.shell('sudo config feature state mux enabled') logger.info("Start mux container for dual ToR testbed") enable_container_autorestart(duthost, testcase="test_qos_sai", feature_list=feature_list) if 'dualtor' in tbinfo['topo']['name']: - enable_container_autorestart(duthost_lower, testcase="test_qos_sai", feature_list=feature_list) + enable_container_autorestart(duthost_upper, testcase="test_qos_sai", feature_list=feature_list) @pytest.fixture(autouse=True) @@ -830,12 +893,36 @@ def disablePacketAging( duthost.command("docker exec syncd python /packets_aging.py enable") duthost.command("docker exec syncd rm -rf /packets_aging.py") + def dutArpProxyConfig(self, duthost): + # so far, only record ARP proxy config to logging for debug purpose + vlanInterface = {} + try: + vlanInterface = json.loads(duthost.shell('sonic-cfggen -d --var-json "VLAN_INTERFACE"')['stdout']) + except: + logger.info('Failed to read vlan interface config') + if not vlanInterface: + return + for key, value in vlanInterface.items(): + if 'proxy_arp' in value: + logger.info('ARP proxy is {} on {}'.format(value['proxy_arp'], key)) + + def dutBufferConfig(self, duthost): + bufferConfig = {} + try: + bufferConfig['BUFFER_POOL'] = json.loads(duthost.shell('sonic-cfggen -d --var-json "BUFFER_POOL"')['stdout']) + bufferConfig['BUFFER_PROFILE'] = json.loads(duthost.shell('sonic-cfggen -d --var-json "BUFFER_PROFILE"')['stdout']) + bufferConfig['BUFFER_QUEUE'] = json.loads(duthost.shell('sonic-cfggen -d --var-json "BUFFER_QUEUE"')['stdout']) + bufferConfig['BUFFER_PG'] = json.loads(duthost.shell('sonic-cfggen -d --var-json "BUFFER_PG"')['stdout']) + except Exception as err: + logger.info(err) + return bufferConfig + @pytest.fixture(scope='class', autouse=True) def dutQosConfig( self, duthosts, enum_frontend_asic_index, rand_one_dut_hostname, dutConfig, ingressLosslessProfile, ingressLossyProfile, egressLosslessProfile, egressLossyProfile, sharedHeadroomPoolSize, - tbinfo + tbinfo, lower_tor_host ): """ Prepares DUT host QoS configuration @@ -848,7 +935,11 @@ def dutQosConfig( Returns: QoSConfig (dict): Map containing DUT host QoS configuration """ - duthost = duthosts[rand_one_dut_hostname] + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[rand_one_dut_hostname] + dut_asic = duthost.asic_instance(enum_frontend_asic_index) mgFacts = duthost.get_extended_minigraph_facts(tbinfo) pytest_assert("minigraph_hwsku" in mgFacts, "Could not find DUT SKU") @@ -869,6 +960,8 @@ def dutQosConfig( dutAsic = dutConfig["dutAsic"] dutTopo = dutConfig["dutTopo"] + self.dutArpProxyConfig(duthost) + if isMellanoxDevice(duthost): current_file_dir = os.path.dirname(os.path.realpath(__file__)) sub_folder_dir = os.path.join(current_file_dir, "files/mellanox/") @@ -882,9 +975,40 @@ def dutQosConfig( ingressLossyProfile, egressLosslessProfile, egressLossyProfile, - sharedHeadroomPoolSize + sharedHeadroomPoolSize, + dutConfig["dualTor"] ) qosParams = qpm.run() + + elif 'broadcom' in duthost.facts['asic_type'].lower(): + bufferConfig = self.dutBufferConfig(duthost) + pytest_assert(len(bufferConfig) == 4, "buffer config is incompleted") + pytest_assert('BUFFER_POOL' in bufferConfig, 'BUFFER_POOL is not exist in bufferConfig') + pytest_assert('BUFFER_PROFILE' in bufferConfig, 'BUFFER_PROFILE is not exist in bufferConfig') + pytest_assert('BUFFER_QUEUE' in bufferConfig, 'BUFFER_QUEUE is not exist in bufferConfig') + pytest_assert('BUFFER_PG' in bufferConfig, 'BUFFER_PG is not exist in bufferConfig') + + current_file_dir = os.path.dirname(os.path.realpath(__file__)) + sub_folder_dir = os.path.join(current_file_dir, "files/brcm/") + if sub_folder_dir not in sys.path: + sys.path.append(sub_folder_dir) + import qos_param_generator + qpm = qos_param_generator.QosParamBroadcom(qosConfigs['qos_params'][dutAsic][dutTopo], + dutAsic, + portSpeedCableLength, + dutConfig, + ingressLosslessProfile, + ingressLossyProfile, + egressLosslessProfile, + egressLossyProfile, + sharedHeadroomPoolSize, + dutConfig["dualTor"], + dutTopo, + bufferConfig, + duthost, + tbinfo["topo"]["name"]) + qosParams = qpm.run() + else: qosParams = qosConfigs['qos_params'][dutAsic][dutTopo] yield { @@ -916,10 +1040,79 @@ def releaseAllPorts( testParams=dutTestParams["basicParams"] ) + def __loadSwssConfig(self, duthost): + """ + Load SWSS configuration on DUT + + Args: + duthost (AnsibleHost): Device Under Test (DUT) + + Raises: + asserts if the load SWSS config failed + + Returns: + None + """ + duthost.docker_cmds_on_all_asics("swssconfig /etc/swss/config.d/switch.json", "swss") + + def __deleteTmpSwitchConfig(self, duthost): + """ + Delete temporary switch.json cofiguration files + + Args: + duthost (AnsibleHost): Device Under Test (DUT) + + Returns: + None + """ + result = duthost.find(path=["/tmp"], patterns=["switch.json*"]) + for file in result["files"]: + duthost.file(path=file["path"], state="absent") + + @pytest.fixture(scope='class', autouse=True) + def handleFdbAging(self, tbinfo, duthosts, lower_tor_host, enum_rand_one_per_hwsku_frontend_hostname): + """ + Disable FDB aging and reenable at the end of tests + + Set fdb_aging_time to 0, update the swss configuration, and restore SWSS configuration afer + test completes + + Args: + duthost (AnsibleHost): Device Under Test (DUT) + + Returns: + None + """ + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] + fdbAgingTime = 0 + + self.__deleteTmpSwitchConfig(duthost) + duthost.docker_copy_from_asic("swss", "/etc/swss/config.d/switch.json", "/tmp") + duthost.replace( + dest='/tmp/switch.json', + regexp='"fdb_aging_time": ".*"', + replace='"fdb_aging_time": "{0}"'.format(fdbAgingTime), + backup=True + ) + duthost.docker_copy_to_all_asics("swss", "/tmp/switch.json", "/etc/swss/config.d/switch.json") + self.__loadSwssConfig(duthost) + + yield + + result = duthost.find(path=["/tmp"], patterns=["switch.json.*"]) + if result["matched"] > 0: + src = result["files"][0]["path"] + duthost.docker_copy_to_all_asics("swss", src, "/etc/swss/config.d/switch.json") + self.__loadSwssConfig(duthost) + self.__deleteTmpSwitchConfig(duthost) + @pytest.fixture(scope='class', autouse=True) def populateArpEntries( self, duthosts, enum_frontend_asic_index, rand_one_dut_hostname, - ptfhost, dutTestParams, dutConfig, releaseAllPorts, + ptfhost, dutTestParams, dutConfig, releaseAllPorts, handleFdbAging, tbinfo, lower_tor_host ): """ Update ARP entries of QoS SAI test ports @@ -938,8 +1131,16 @@ def populateArpEntries( Raises: RunAnsibleModuleFail if ptf test fails """ - duthost = duthosts[rand_one_dut_hostname] + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[rand_one_dut_hostname] + dut_asic = duthost.asic_instance(enum_frontend_asic_index) + + dut_asic.command('sonic-clear fdb all') + dut_asic.command('sonic-clear arp') + saiQosTest = None if dutTestParams["topo"] in self.SUPPORTED_T0_TOPOS: saiQosTest = "sai_qos_tests.ARPpopulate" @@ -954,13 +1155,21 @@ def populateArpEntries( if saiQosTest: testParams = dutTestParams["basicParams"] testParams.update(dutConfig["testPorts"]) + testParams.update({ + "testPortIds": dutConfig["testPortIds"], + "testPortIps": dutConfig["testPortIps"] + }) self.runPtfTest( ptfhost, testCase=saiQosTest, testParams=testParams ) @pytest.fixture(scope='class', autouse=True) - def dut_disable_ipv6(self, duthosts, rand_one_dut_hostname): - duthost = duthosts[rand_one_dut_hostname] + def dut_disable_ipv6(self, duthosts, rand_one_dut_hostname, tbinfo, lower_tor_host): + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[rand_one_dut_hostname] + duthost.shell("sysctl -w net.ipv6.conf.all.disable_ipv6=1") yield @@ -969,7 +1178,7 @@ def dut_disable_ipv6(self, duthosts, rand_one_dut_hostname): @pytest.fixture(scope='class', autouse=True) def sharedHeadroomPoolSize( self, request, duthosts, enum_frontend_asic_index, - rand_one_dut_hostname + rand_one_dut_hostname, tbinfo, lower_tor_host ): """ Retreives shared headroom pool size @@ -982,7 +1191,11 @@ def sharedHeadroomPoolSize( size: shared headroom pool size none if it is not defined """ - duthost = duthosts[rand_one_dut_hostname] + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[rand_one_dut_hostname] + yield self.__getSharedHeadroomPoolSize( request, duthost.asic_instance(enum_frontend_asic_index) @@ -991,7 +1204,7 @@ def sharedHeadroomPoolSize( @pytest.fixture(scope='class', autouse=True) def ingressLosslessProfile( self, request, duthosts, enum_frontend_asic_index, - rand_one_dut_hostname, dutConfig + rand_one_dut_hostname, dutConfig, tbinfo, lower_tor_host, dualtor_ports ): """ Retreives ingress lossless profile @@ -1005,21 +1218,32 @@ def ingressLosslessProfile( Returns: ingressLosslessProfile (dict): Map of ingress lossless buffer profile attributes """ - duthost = duthosts[rand_one_dut_hostname] + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[rand_one_dut_hostname] + dut_asic = duthost.asic_instance(enum_frontend_asic_index) + srcport = dutConfig["dutInterfaces"][dutConfig["testPorts"]["src_port_id"]] + + if srcport in dualtor_ports: + pgs = "2-4" + else: + pgs = "3-4" + yield self.__getBufferProfile( request, dut_asic, duthost.os_version, "BUFFER_PG_TABLE" if self.isBufferInApplDb(dut_asic) else "BUFFER_PG", - dutConfig["dutInterfaces"][dutConfig["testPorts"]["src_port_id"]], - "3-4" + srcport, + pgs ) @pytest.fixture(scope='class', autouse=True) def ingressLossyProfile( self, request, duthosts, enum_frontend_asic_index, - rand_one_dut_hostname, dutConfig + rand_one_dut_hostname, dutConfig, tbinfo, lower_tor_host ): """ Retreives ingress lossy profile @@ -1033,7 +1257,11 @@ def ingressLossyProfile( Returns: ingressLossyProfile (dict): Map of ingress lossy buffer profile attributes """ - duthost = duthosts[rand_one_dut_hostname] + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[rand_one_dut_hostname] + dut_asic = duthost.asic_instance(enum_frontend_asic_index) yield self.__getBufferProfile( request, @@ -1047,7 +1275,7 @@ def ingressLossyProfile( @pytest.fixture(scope='class', autouse=True) def egressLosslessProfile( self, request, duthosts, enum_frontend_asic_index, - rand_one_dut_hostname, dutConfig + rand_one_dut_hostname, dutConfig, tbinfo, lower_tor_host, dualtor_ports ): """ Retreives egress lossless profile @@ -1061,21 +1289,32 @@ def egressLosslessProfile( Returns: egressLosslessProfile (dict): Map of egress lossless buffer profile attributes """ - duthost = duthosts[rand_one_dut_hostname] + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[rand_one_dut_hostname] + dut_asic = duthost.asic_instance(enum_frontend_asic_index) + srcport = dutConfig["dutInterfaces"][dutConfig["testPorts"]["src_port_id"]] + + if srcport in dualtor_ports: + queues = "2-4" + else: + queues = "3-4" + yield self.__getBufferProfile( request, dut_asic, duthost.os_version, "BUFFER_QUEUE_TABLE" if self.isBufferInApplDb(dut_asic) else "BUFFER_QUEUE", - dutConfig["dutInterfaces"][dutConfig["testPorts"]["src_port_id"]], - "3-4" + srcport, + queues ) @pytest.fixture(scope='class', autouse=True) def egressLossyProfile( self, request, duthosts, enum_frontend_asic_index, - rand_one_dut_hostname, dutConfig + rand_one_dut_hostname, dutConfig, tbinfo, lower_tor_host, dualtor_ports ): """ Retreives egress lossy profile @@ -1089,21 +1328,32 @@ def egressLossyProfile( Returns: egressLossyProfile (dict): Map of egress lossy buffer profile attributes """ - duthost = duthosts[rand_one_dut_hostname] + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[rand_one_dut_hostname] + dut_asic = duthost.asic_instance(enum_frontend_asic_index) + srcport = dutConfig["dutInterfaces"][dutConfig["testPorts"]["src_port_id"]] + + if srcport in dualtor_ports: + queues = "0-1" + else: + queues = "0-2" + yield self.__getBufferProfile( request, dut_asic, duthost.os_version, "BUFFER_QUEUE_TABLE" if self.isBufferInApplDb(dut_asic) else "BUFFER_QUEUE", - dutConfig["dutInterfaces"][dutConfig["testPorts"]["src_port_id"]], - "0-2" + srcport, + queues ) @pytest.fixture(scope='class') def losslessSchedProfile( self, duthosts, enum_frontend_asic_index, rand_one_dut_hostname, - dutConfig + dutConfig, tbinfo, lower_tor_host ): """ Retreives lossless scheduler profile @@ -1116,7 +1366,11 @@ def losslessSchedProfile( Returns: losslessSchedProfile (dict): Map of scheduler parameters """ - duthost = duthosts[rand_one_dut_hostname] + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[rand_one_dut_hostname] + yield self.__getSchedulerParam( duthost.asic_instance(enum_frontend_asic_index), dutConfig["dutInterfaces"][dutConfig["testPorts"]["src_port_id"]], @@ -1126,7 +1380,7 @@ def losslessSchedProfile( @pytest.fixture(scope='class') def lossySchedProfile( self, duthosts, enum_frontend_asic_index, rand_one_dut_hostname, - dutConfig + dutConfig, tbinfo, lower_tor_host ): """ Retreives lossy scheduler profile @@ -1139,7 +1393,11 @@ def lossySchedProfile( Returns: lossySchedProfile (dict): Map of scheduler parameters """ - duthost = duthosts[rand_one_dut_hostname] + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[rand_one_dut_hostname] + yield self.__getSchedulerParam( duthost.asic_instance(enum_frontend_asic_index), dutConfig["dutInterfaces"][dutConfig["testPorts"]["src_port_id"]], @@ -1149,7 +1407,7 @@ def lossySchedProfile( @pytest.fixture def updateSchedProfile( self, duthosts, enum_frontend_asic_index, rand_one_dut_hostname, - dutQosConfig, losslessSchedProfile, lossySchedProfile + dutQosConfig, losslessSchedProfile, lossySchedProfile, tbinfo, lower_tor_host ): """ Updates lossless/lossy scheduler profiles @@ -1163,7 +1421,11 @@ def updateSchedProfile( Returns: None """ - duthost = duthosts[rand_one_dut_hostname] + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[rand_one_dut_hostname] + def updateRedisSchedParam(schedParam): """ Helper function to updates lossless/lossy scheduler profiles @@ -1218,7 +1480,7 @@ def updateRedisSchedParam(schedParam): @pytest.fixture def resetWatermark( - self, duthosts, enum_frontend_asic_index, rand_one_dut_hostname + self, duthosts, enum_frontend_asic_index, rand_one_dut_hostname, tbinfo, lower_tor_host ): """ Reset queue watermark @@ -1229,300 +1491,32 @@ def resetWatermark( Returns: None """ - duthost = duthosts[rand_one_dut_hostname] + if 'dualtor' in tbinfo['topo']['name']: + duthost = lower_tor_host + else: + duthost = duthosts[rand_one_dut_hostname] + dut_asic = duthost.asic_instance(enum_frontend_asic_index) dut_asic.command("counterpoll watermark enable") - dut_asic.command("sleep 20") + dut_asic.command("counterpoll queue enable") + dut_asic.command("sleep 70") dut_asic.command("counterpoll watermark disable") + dut_asic.command("counterpoll queue disable") -class QosSaiBaseMasic(QosBase): - - def build_port_ips(self, asic_index, ifaces, mg_facts): - """ - Returns list of port index and IP address for a given ASIC - """ - - dut_port_ips = dict() - - for iface, addr in ifaces.items(): - if iface.startswith("Ethernet"): - portIndex = mg_facts["minigraph_ptf_indices"][iface] - elif iface.startswith("PortChannel"): - portName = mg_facts["minigraph_portchannels"][iface]["members"][0] - portIndex = mg_facts["minigraph_ptf_indices"][portName] - - dut_port_ips.update({ - portIndex: { - "ipv4": addr["peer_ipv4"], - "bgp_neighbor": addr["bgp_neighbor"] - } - }) + @pytest.fixture(scope='function', autouse=True) + def set_static_route(self, duthost, dutConfig, enum_frontend_asic_index): + if duthost.facts["asic_type"] != "cisco-8000": + yield + return + dst_keys = [] + for k in dutConfig["testPorts"].keys(): + if re.search("dst_port.*ip", k): + dst_keys.append(k) - return {asic_index: dut_port_ips} - - def get_backend_ip_ifs(self, duthost, frontend_asic): - """ - On a frontend ASIC return a dict of interfaces with - backend ASIC names - """ - pytest_assert( - frontend_asic in duthost.get_frontend_asic_ids(), - "{} is not frontend ASIC ID".format(frontend_asic) - ) - - ip_ifs = duthost.asic_instance( - frontend_asic - ).show_ip_interface()["ansible_facts"]["ip_interfaces"] - - # Find backend interface names - return {intf: ip["bgp_neighbor"].lower() for intf, ip in ip_ifs.items() - if ip["bgp_neighbor"].lower().startswith("asic")} - - def check_v4route_backend_nhop(self, duthost, frontend_asic, route): - """ - On frontend ASIC Check if v4 address has at least one backend - ASIC nexthop - - Returns: - False if not nexthops with backend ASICs - """ - cmd = 'vtysh -n {} -c "show ip route {} json"'.format( - frontend_asic, route - ) - result = duthost.command(cmd) - pytest_assert(result["rc"] == 0, cmd) - route_info = json.loads(result["stdout"]) - nhop = route_info[route_info.keys().pop()][0] - - nhop_ifs = {x["interfaceName"] for x in nhop["nexthops"]} - backend_ifs = set(self.get_backend_ip_ifs( - duthost, frontend_asic).keys() - ) - - return len(nhop_ifs.intersection(backend_ifs)) - - def backend_ip_if_admin_state( - self, duthost, test_asic, frontend_asic, admin_state - ): - """ - On a frontend ASIC bring down ports (channels) towards backend ASICs - other than the ASIC under test, so that traffic always goes via - backend ASIC under test - """ - - def is_intf_status(asic, intf, oper_state): - intf_status = duthost.asic_instance(asic).show_interface( - command="status", include_internal_intfs=True - )["ansible_facts"]["int_status"] - if intf_status[intf]["oper_state"] == oper_state: - return True - return False - - oper_state = "up" if admin_state == "startup" else "down" - ip_ifs = self.get_backend_ip_ifs(duthost, frontend_asic) - - for intf, asic in ip_ifs.items(): - if asic != "asic{}".format(test_asic): - if admin_state == "startup": - duthost.asic_instance(frontend_asic).startup_interface(intf) - else: - duthost.asic_instance(frontend_asic).shutdown_interface(intf) - - # wait for port status to change - pytest_assert( - wait_until( - 10, 1, 0, is_intf_status, frontend_asic, intf, - oper_state - ), - "Failed to update port status {} {}".format( - intf, admin_state - ) - ) - - - def find_asic_traffic_ports(self, duthost, ptfhost, test_params): - """ - For a given pair of source IP and destination IP, identify - the path taken by the L3 packet. Path implies the backend ASIC - and its tx and rx ports. The path is identified by sending - a burst of packets and finding the difference in interface - counters before and after the burst. - - Assert is thrown if multiple ports or multiple backend ASICs - have similar interface counters. - """ - def find_traffic_ports(asic_id, c1, c2, diff): - - rx_port = None - tx_port = None - - a1 = c1[asic_id]["ansible_facts"]["int_counter"] - a2 = c2[asic_id]["ansible_facts"]["int_counter"] - - for port in a2.keys(): - rx_diff = int(a2[port]["RX_OK"]) - int(a1[port]["RX_OK"]) - - if rx_diff >= diff: - pytest_assert( - rx_port is None, - "Multiple rx ports with {} rx packets".format(diff) - ) - rx_port = port - - tx_diff = int(a2[port]["TX_OK"]) - int(a1[port]["TX_OK"]) - if tx_diff >= diff: - pytest_assert( - tx_port is None, - "Multiple tx ports with {} tx packets".format(diff) - ) - tx_port = port - - # return rx, tx ports that have a packet count difference of > diff - return rx_port, tx_port - - test_params["count"] = 100 - duthost.command("sonic-clear counters") - cnt_before = duthost.show_interface( - command="counter", asic_index="all", include_internal_intfs=True - ) - # send a burst of packets from a given src IP to dst IP - self.runPtfTest( - ptfhost, testCase="sai_qos_tests.PacketTransmit", - testParams=test_params - ) - time.sleep(8) - cnt_after = duthost.show_interface( - command="counter", asic_index="all", include_internal_intfs=True - ) - - asic_idx = None - rx_port = None - tx_port = None - - # identify the backend ASIC and the rx, tx ports on that ASIC - # that forwarded the traffic - for asic in duthost.get_backend_asic_ids(): - rx, tx = find_traffic_ports( - asic, cnt_before, cnt_after, test_params["count"] - ) - if rx and tx: - pytest_assert( - rx_port is None and tx_port is None, - "Multiple backend ASICs with rx/tx ports" - ) - rx_port, tx_port, asic_idx = rx, tx, asic - - pytest_assert(asic_idx is not None, "ASIC, rx and tx ports not found") - return ({ - "test_src_port_name": rx_port, - "test_dst_port_name": tx_port, - "asic_under_test": asic_idx, - } - ) - - @pytest.fixture(scope='class') - def build_ip_interface( - self, duthosts, rand_one_dut_hostname, swapSyncd, tbinfo - ): - """ - builds a list of active IP interfaces and port index - for each ASIC - - Returns: - { - asic_index: { - portIndex: { - "ipv4": peer ipv4, - "bgp_neighbor": BGP neighbor - } - . - . - } - . - . - } - """ - duthost = duthosts[rand_one_dut_hostname] - - topo = tbinfo["topo"]["name"] - if topo not in self.SUPPORTED_T1_TOPOS: - pytest.skip("unsupported topology {}".format(topo)) - - pytest_require(duthost.is_multi_asic, "Not a multi asic platform") - - mg_facts = duthost.get_extended_minigraph_facts(tbinfo) - ip_ifaces = duthost.get_active_ip_interfaces(tbinfo, asic_index="all") - - port_ips = dict() - for idx in range(len(ip_ifaces)): - port_ips.update(self.build_port_ips(idx, ip_ifaces[idx], mg_facts)) - - yield port_ips - - @pytest.fixture(scope='class') - def build_test_ports(self, build_ip_interface): - """ - This fixture builds a list of active L3 interface ports on each - ASIC so that source and destination interfaces can be selected - from different ASICs. Returns a dict of 'src' and 'dst' interfaces - along with the ASIC ID - - Only frontend ASCIs connected to T0 devices are reachable end - to end on multi ASIC platform. - """ - # find asics with T0 neighbors - ports = dict() - for k, v in build_ip_interface.items(): - try: - port_index = next(iter(v)) - port_info = v[port_index] - if port_info["bgp_neighbor"].lower().endswith("t0"): - ports.update({k: v}) - except StopIteration: - continue - - pytest_assert( - len(ports) >= 0, "Ports from at least two ASICs required" - ) - - test_ports = dict() - keys = ports.keys() - src_asic = keys.pop(0) - test_ports.update({"src": {src_asic: ports[src_asic]}}) - test_ports.update({"dst": dict()}) - for dst_asic in keys: - test_ports["dst"].update({dst_asic: ports[dst_asic]}) - - yield test_ports - - @pytest.fixture(scope='class') - def get_test_ports(self, build_test_ports): - """ - Fixture to select test ports from a given list of active L3 - interfaces from multiple frontend ASICs. The source and - destination port will be on different ASICs. - - Fixture also returns the source and desitnation ASCIS IDs - """ - - # source port - src_asic = build_test_ports["src"].keys().pop(0) - src_port_ids = build_test_ports["src"][src_asic].keys() - src_port_id = src_port_ids.pop(0) - src_port_ip = build_test_ports["src"][src_asic][src_port_id]["ipv4"] - - # destination port - dst_asic = build_test_ports["dst"].keys().pop(0) - dst_port_ids = build_test_ports["dst"][dst_asic].keys() - dst_port_id = dst_port_ids.pop(0) - dst_port_ip = build_test_ports["dst"][dst_asic][dst_port_id]["ipv4"] + dut_asic = duthost.asic_instance(enum_frontend_asic_index) + for k in dst_keys: + dut_asic.shell("ping -c 3 {}".format( + dutConfig["testPorts"][k]), module_ignore_errors=True) - return { - "dst_port_id": dst_port_id, - "dst_port_ip": dst_port_ip, - "dst_asic": dst_asic, - "src_port_id": src_port_id, - "src_port_ip": src_port_ip, - "src_asic": src_asic, - } + yield diff --git a/tests/qos/test_buffer.py b/tests/qos/test_buffer.py index b6b8b921fb6..a0500be9b89 100644 --- a/tests/qos/test_buffer.py +++ b/tests/qos/test_buffer.py @@ -18,6 +18,7 @@ from tests.common.plugins.loganalyzer.loganalyzer import LogAnalyzer from tests.common.utilities import check_qos_db_fv_reference_with_table from tests.common.utilities import skip_release +from tests.common.dualtor.dual_tor_utils import is_tunnel_qos_remap_enabled, dualtor_ports # lgtm[py/unused-import] pytestmark = [ pytest.mark.topology('any') @@ -55,6 +56,8 @@ LOSSLESS_MTU = None SMALL_PACKET_PERCENTAGE = None +KEY_2_LOSSLESS_QUEUE = "2_lossless_queues" +KEY_4_LOSSLESS_QUEUE = "4_lossless_queues" def detect_buffer_model(duthost): """Detect the current buffer model (dynamic or traditional) and store it for further use. Called only once when the module is initialized @@ -295,6 +298,7 @@ def setup_module(duthosts, rand_one_dut_hostname, request): duthost = duthosts[rand_one_dut_hostname] detect_buffer_model(duthost) if not is_mellanox_device(duthost): + load_lossless_headroom_data(duthost) yield return @@ -2221,7 +2225,7 @@ def test_buffer_model_test(duthosts, rand_one_dut_hostname, conn_graph_facts): _recovery_to_dynamic_buffer_model(duthost) -def test_buffer_deployment(duthosts, rand_one_dut_hostname, conn_graph_facts): +def test_buffer_deployment(duthosts, rand_one_dut_hostname, conn_graph_facts, tbinfo, dualtor_ports): """The testcase to verify whether buffer template has been correctly rendered and applied 1. For all ports in the config_db, @@ -2373,37 +2377,63 @@ def _check_port_buffer_info_and_return(duthost, table, ids, port, expected_profi pg_name_map = _compose_dict_from_cli(duthost.shell('redis-cli -n 2 hgetall COUNTERS_PG_NAME_MAP')['stdout'].split()) queue_name_map = _compose_dict_from_cli(duthost.shell('redis-cli -n 2 hgetall COUNTERS_QUEUE_NAME_MAP')['stdout'].split()) cable_length_map = _compose_dict_from_cli(duthost.shell('redis-cli -n 4 hgetall "CABLE_LENGTH|AZURE"')['stdout'].split()) - - buffer_items_to_check_dict = {"up": [('BUFFER_PG_TABLE', '0', '[BUFFER_PROFILE_TABLE:ingress_lossy_profile]'), + buffer_table_up = { + KEY_2_LOSSLESS_QUEUE: [('BUFFER_PG_TABLE', '0', '[BUFFER_PROFILE_TABLE:ingress_lossy_profile]'), ('BUFFER_QUEUE_TABLE', '0-2', '[BUFFER_PROFILE_TABLE:q_lossy_profile]'), ('BUFFER_QUEUE_TABLE', '3-4', '[BUFFER_PROFILE_TABLE:egress_lossless_profile]'), ('BUFFER_QUEUE_TABLE', '5-6', '[BUFFER_PROFILE_TABLE:q_lossy_profile]'), (None, None, None) - ], - "down": [('BUFFER_PG_TABLE', '0', '[BUFFER_PROFILE_TABLE:ingress_lossy_pg_zero_profile]'), + ], + KEY_4_LOSSLESS_QUEUE: [('BUFFER_PG_TABLE', '0', '[BUFFER_PROFILE_TABLE:ingress_lossy_profile]'), + ('BUFFER_QUEUE_TABLE', '0-1', '[BUFFER_PROFILE_TABLE:q_lossy_profile]'), + ('BUFFER_QUEUE_TABLE', '2-4', '[BUFFER_PROFILE_TABLE:egress_lossless_profile]'), + ('BUFFER_QUEUE_TABLE', '5', '[BUFFER_PROFILE_TABLE:q_lossy_profile]'), + ('BUFFER_QUEUE_TABLE', '6', '[BUFFER_PROFILE_TABLE:egress_lossless_profile]'), + ('BUFFER_QUEUE_TABLE', '7', '[BUFFER_PROFILE_TABLE:q_lossy_profile]'), + (None, None, None) + ] + } + if is_tunnel_qos_remap_enabled(duthost): + buffer_table_up[KEY_2_LOSSLESS_QUEUE][3] = ('BUFFER_QUEUE_TABLE', '5-7', '[BUFFER_PROFILE_TABLE:q_lossy_profile]') + + if not is_mellanox_device(duthost): + buffer_table_up[KEY_2_LOSSLESS_QUEUE][1] = ('BUFFER_QUEUE_TABLE', '0-2', '[BUFFER_PROFILE_TABLE:egress_lossy_profile]') + if is_tunnel_qos_remap_enabled(duthost): + buffer_table_up[KEY_2_LOSSLESS_QUEUE][3] = ('BUFFER_QUEUE_TABLE', '5-7', '[BUFFER_PROFILE_TABLE:egress_lossy_profile]') + else: + buffer_table_up[KEY_2_LOSSLESS_QUEUE][3] = ('BUFFER_QUEUE_TABLE', '5-6', '[BUFFER_PROFILE_TABLE:egress_lossy_profile]') + + buffer_table_up[KEY_4_LOSSLESS_QUEUE][1] = ('BUFFER_QUEUE_TABLE', '0-1', '[BUFFER_PROFILE_TABLE:egress_lossy_profile]') + buffer_table_up[KEY_4_LOSSLESS_QUEUE][3] = ('BUFFER_QUEUE_TABLE', '5', '[BUFFER_PROFILE_TABLE:egress_lossy_profile]') + buffer_table_up[KEY_4_LOSSLESS_QUEUE][5] = ('BUFFER_QUEUE_TABLE', '7', '[BUFFER_PROFILE_TABLE:egress_lossy_profile]') + + buffer_table_down = { + KEY_2_LOSSLESS_QUEUE: [('BUFFER_PG_TABLE', '0', '[BUFFER_PROFILE_TABLE:ingress_lossy_pg_zero_profile]'), ('BUFFER_QUEUE_TABLE', '0-2', '[BUFFER_PROFILE_TABLE:egress_lossy_zero_profile]'), ('BUFFER_QUEUE_TABLE', '3-4', '[BUFFER_PROFILE_TABLE:egress_lossless_zero_profile]'), ('BUFFER_QUEUE_TABLE', '5-6', '[BUFFER_PROFILE_TABLE:egress_lossy_zero_profile]'), (None, None, None) - ] + ], + KEY_4_LOSSLESS_QUEUE: [(None, None, None)] # The admin_down ports can not be dualtor_ports. Hence there is no 4_lossless_queue profile } - - if not is_mellanox_device(duthost): - buffer_items_to_check_dict["up"][1] = ('BUFFER_QUEUE_TABLE', '0-2', '[BUFFER_PROFILE_TABLE:egress_lossy_profile]') - buffer_items_to_check_dict["up"][3] = ('BUFFER_QUEUE_TABLE', '5-6', '[BUFFER_PROFILE_TABLE:egress_lossy_profile]') + if is_tunnel_qos_remap_enabled(duthost): + buffer_table_down[KEY_2_LOSSLESS_QUEUE][3] = ('BUFFER_QUEUE_TABLE', '5-7', '[BUFFER_PROFILE_TABLE:egress_lossy_zero_profile]') + + buffer_items_to_check_dict = {"up": buffer_table_up, "down": buffer_table_down} if check_qos_db_fv_reference_with_table(duthost): profile_wrapper = '[BUFFER_PROFILE_TABLE:{}]' is_qos_db_reference_with_table = True else: - for key, buffer_items_to_check in buffer_items_to_check_dict.items(): - new_buffer_items_to_check = [] - for item in buffer_items_to_check: - table, ids, profiles = item - if profiles: - profiles = profiles.replace('[BUFFER_PROFILE_TABLE:', '').replace(']', '') - new_buffer_items_to_check.append((table, ids, profiles)) - buffer_items_to_check_dict[key] = new_buffer_items_to_check + for status, buffer_items_to_check_4_6 in buffer_items_to_check_dict.items(): + for queue_4_6, buffer_items_to_check in buffer_items_to_check_4_6.items(): + new_buffer_items_to_check = [] + for item in buffer_items_to_check: + table, ids, profiles = item + if profiles: + profiles = profiles.replace('[BUFFER_PROFILE_TABLE:', '').replace(']', '') + new_buffer_items_to_check.append((table, ids, profiles)) + buffer_items_to_check_dict[status][queue_4_6] = new_buffer_items_to_check profile_wrapper = '{}' is_qos_db_reference_with_table = False @@ -2418,19 +2448,26 @@ def _check_port_buffer_info_and_return(duthost, table, ids, port, expected_profi speed = port_config['speed'] expected_profile = make_expected_profile_name(speed, cable_length, number_of_lanes=len(port_config['lanes'].split(','))) + if port in dualtor_ports: + key_name = KEY_4_LOSSLESS_QUEUE + else: + key_name = KEY_2_LOSSLESS_QUEUE # The last item in the check list various according to port's admin state. # We need to append it according to the port each time. Pop the last item first if port_config.get('admin_status') == 'up': admin_up_ports.add(port) - buffer_items_to_check = buffer_items_to_check_dict["up"] - buffer_items_to_check[-1] = ('BUFFER_PG_TABLE', '3-4', profile_wrapper.format(expected_profile)) + buffer_items_to_check = buffer_items_to_check_dict["up"][key_name][:] + if key_name == KEY_4_LOSSLESS_QUEUE: + buffer_items_to_check.extend( + [('BUFFER_PG_TABLE', '2-4', profile_wrapper.format(expected_profile)), + ('BUFFER_PG_TABLE', '6', profile_wrapper.format(expected_profile))]) + else: + buffer_items_to_check.append(('BUFFER_PG_TABLE', '3-4', profile_wrapper.format(expected_profile))) else: if is_mellanox_device(duthost): - buffer_items_to_check = buffer_items_to_check_dict["down"] - elif is_broadcom_device(duthost) and asic_type in ['td2']: - buffer_items_to_check = [(None, None, None)] + buffer_items_to_check = buffer_items_to_check_dict["down"][key_name] else: - buffer_items_to_check = [('BUFFER_PG_TABLE', '3-4', profile_wrapper.format(expected_profile))] + buffer_items_to_check = [(None, None, None)] for table, ids, expected_profile in buffer_items_to_check: logging.info("Checking buffer item {}:{}:{}".format(table, port, ids)) @@ -2481,25 +2518,34 @@ def _check_port_buffer_info_and_return(duthost, table, ids, port, expected_profi "Buffer profile {} has different buffer pool id {} from others {}".format(expected_profile, buffer_profile_asic_info['SAI_BUFFER_PROFILE_ATTR_POOL_ID'], lossless_pool_oid)) else: pytest_assert(profiles_checked[expected_profile] == buffer_profile_oid, - "PG {}:3-4 has different OID of profile from other PGs sharing the same profile {}".format(port, expected_profile)) + "PG {}:{} has different OID of profile from other PGs sharing the same profile {}".format(port, ids, expected_profile)) if not BUFFER_MODEL_DYNAMIC: + + def _profile_name(duthost, port, pg_id_name): + if is_mellanox_device(duthost): + profile_name = None + else: + profile_name = duthost.shell('redis-cli hget "BUFFER_PG_TABLE:{}:{}" profile'.format(port, pg_id_name))['stdout'] + + return profile_name + port_to_shutdown = admin_up_ports.pop() - expected_profile = duthost.shell('redis-cli hget "BUFFER_PG_TABLE:{}:3-4" profile'.format(port))['stdout'] - if is_mellanox_device(duthost): - profile_to_check = None + if port_to_shutdown in dualtor_ports: + pg_id_names = ["2-4", "6"] else: - profile_to_check = expected_profile + pg_id_names = ["3-4"] try: # Shutdown the port and check whether the lossless PG has been remvoed logging.info("Shut down an admin-up port {} and check its buffer information".format(port_to_shutdown)) duthost.shell('config interface shutdown {}'.format(port_to_shutdown)) - wait_until(60, 5, 0, _check_port_buffer_info_and_return, duthost, 'BUFFER_PG_TABLE', '3-4', port_to_shutdown, profile_to_check) - + for pg_id_name in pg_id_names: + wait_until(60, 5, 0, _check_port_buffer_info_and_return, duthost, 'BUFFER_PG_TABLE', pg_id_name, port_to_shutdown, _profile_name(duthost, port, pg_id_name)) # Startup the port and check whether the lossless PG has been reconfigured logging.info("Re-startup the port {} and check its buffer information".format(port_to_shutdown)) duthost.shell('config interface startup {}'.format(port_to_shutdown)) - wait_until(60, 5, 0, _check_port_buffer_info_and_return, duthost, 'BUFFER_PG_TABLE', '3-4', port_to_shutdown, expected_profile) + for pg_id_name in pg_id_names: + wait_until(60, 5, 0, _check_port_buffer_info_and_return, duthost, 'BUFFER_PG_TABLE', pg_id_name, port_to_shutdown, _profile_name(duthost, port, pg_id_name)) finally: duthost.shell('config interface startup {}'.format(port_to_shutdown), module_ignore_errors=True) diff --git a/tests/qos/test_buffer_traditional.py b/tests/qos/test_buffer_traditional.py index 318bb775215..60536268460 100644 --- a/tests/qos/test_buffer_traditional.py +++ b/tests/qos/test_buffer_traditional.py @@ -13,7 +13,7 @@ RECLAIM_BUFFER_ON_ADMIN_DOWN = None @pytest.fixture(scope="module", autouse=True) -def setup_module(duthosts, rand_one_dut_hostname): +def setup_module(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_frontend_asic_index): """Setup module. Called only once when the module is initialized Args: @@ -22,7 +22,8 @@ def setup_module(duthosts, rand_one_dut_hostname): """ global RECLAIM_BUFFER_ON_ADMIN_DOWN - duthost = duthosts[rand_one_dut_hostname] + duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] + dut_asic = duthost.asic_instance(enum_frontend_asic_index) if duthost.facts["asic_type"] in ["mellanox"]: RECLAIM_BUFFER_ON_ADMIN_DOWN = True else: @@ -31,10 +32,10 @@ def setup_module(duthosts, rand_one_dut_hostname): if "201911" not in duthost.os_version: pytest.skip("Buffer test runs on 201911 branch only, skip") - load_lossless_info_from_pg_profile_lookup(duthost) + load_lossless_info_from_pg_profile_lookup(duthost, dut_asic) -def load_lossless_info_from_pg_profile_lookup(duthost): +def load_lossless_info_from_pg_profile_lookup(duthost, dut_asic): """Load pg_profile_lookup.ini to a dictionary. Called only once when the module is initialized Args: @@ -46,11 +47,13 @@ def load_lossless_info_from_pg_profile_lookup(duthost): global DEFAULT_LOSSLESS_PROFILES # Check the threshold mode - threshold_mode = duthost.shell('redis-cli -n 4 hget "BUFFER_POOL|ingress_lossless_pool" mode')['stdout'] + threshold_mode = dut_asic.run_redis_cmd(argv=['redis-cli', '-n', 4, 'hget', 'BUFFER_POOL|ingress_lossless_pool', 'mode'])[0] threshold_field_name = 'dynamic_th' if threshold_mode == 'dynamic' else 'static_th' dut_hwsku = duthost.facts["hwsku"] dut_platform = duthost.facts["platform"] skudir = "/usr/share/sonic/device/{}/{}/".format(dut_platform, dut_hwsku) + if dut_asic.namespace is not None: + skudir = skudir + dut_asic.namespace.split('asic')[-1] + '/' pg_profile_lookup_file = os.path.join(skudir, 'pg_profile_lookup.ini') duthost.file(path=pg_profile_lookup_file, state="file") lines = duthost.shell('cat {}'.format(pg_profile_lookup_file))["stdout_lines"] @@ -82,7 +85,7 @@ def make_dict_from_output_lines(lines): return None -def test_buffer_pg(duthosts, rand_one_dut_hostname, conn_graph_facts): +def test_buffer_pg(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_frontend_asic_index, conn_graph_facts): """The testcase for (traditional) buffer manager 1. For all ports in the config_db, @@ -118,7 +121,7 @@ def _check_condition(condition, message, use_assert): return True - def _check_port_buffer_info_and_get_profile_oid(duthost, port, expected_profile, use_assert=True): + def _check_port_buffer_info_and_get_profile_oid(dut_asic, port, expected_profile, use_assert=True): """Check port's buffer information against CONFIG_DB and ASIC_DB Args: @@ -130,19 +133,19 @@ def _check_port_buffer_info_and_get_profile_oid(duthost, port, expected_profile, Return: A tuple consisting of the OID of buffer profile and whether there is any check failed """ - profile_in_pg = duthost.shell('redis-cli -n 4 hget "BUFFER_PG|{}|3-4" profile'.format(port))['stdout'] + profile_in_pg = dut_asic.run_redis_cmd(argv=['redis-cli', '-n', 4, 'hget', 'BUFFER_PG|{}|3-4'.format(port), 'profile']) buffer_profile_oid = None default_lossless_pgs = ['3', '4'] if expected_profile: - if not _check_condition(profile_in_pg == expected_profile, "Buffer profile of lossless PG of port {} isn't the expected ({})".format(port, expected_profile), use_assert): + if not _check_condition(profile_in_pg[0] == expected_profile, "Buffer profile of lossless PG of port {} isn't the expected ({})".format(port, expected_profile), use_assert): return None, False if pg_name_map: for pg in default_lossless_pgs: buffer_pg_asic_oid = pg_name_map['{}:{}'.format(port, pg)] - buffer_pg_asic_key = duthost.shell('redis-cli -n 1 keys *{}*'.format(buffer_pg_asic_oid))['stdout'] - buffer_profile_oid_in_pg = duthost.shell('redis-cli -n 1 hget {} SAI_INGRESS_PRIORITY_GROUP_ATTR_BUFFER_PROFILE'.format(buffer_pg_asic_key))['stdout'] + buffer_pg_asic_key = dut_asic.run_redis_cmd(argv=['redis-cli', '-n', 1, 'keys', '*{}*'.format(buffer_pg_asic_oid)])[0] + buffer_profile_oid_in_pg = dut_asic.run_redis_cmd(argv=['redis-cli', '-n', 1, 'hget', buffer_pg_asic_key, 'SAI_INGRESS_PRIORITY_GROUP_ATTR_BUFFER_PROFILE'])[0] logging.info("Checking admin-up port {} lossless PG {} in ASIC_DB ({})".format(port, pg, buffer_profile_oid_in_pg)) if buffer_profile_oid: if not _check_condition(buffer_profile_oid == buffer_profile_oid_in_pg, @@ -157,8 +160,8 @@ def _check_port_buffer_info_and_get_profile_oid(duthost, port, expected_profile, if pg_name_map: for pg in default_lossless_pgs: buffer_pg_asic_oid = pg_name_map['{}:{}'.format(port, pg)] - buffer_pg_asic_key = duthost.shell('redis-cli -n 1 keys *{}*'.format(buffer_pg_asic_oid))['stdout'] - buffer_profile_oid_in_pg = duthost.shell('redis-cli -n 1 hget {} SAI_INGRESS_PRIORITY_GROUP_ATTR_BUFFER_PROFILE'.format(buffer_pg_asic_key))['stdout'] + buffer_pg_asic_key = dut_asic.run_redis_cmd(argv=['redis-cli', '-n', 1, 'keys', '*{}*'.format(buffer_pg_asic_oid)])[0] + buffer_profile_oid_in_pg = dut_asic.run_redis_cmd(argv=['redis-cli', '-n', 1, 'hget', buffer_pg_asic_key, 'SAI_INGRESS_PRIORITY_GROUP_ATTR_BUFFER_PROFILE'])[0] logging.info("Checking admin-down port {} lossless PG {}".format(port, pg)) if not _check_condition(not buffer_profile_oid_in_pg or buffer_profile_oid_in_pg == 'oid:0x0', "Buffer PG configured on admin down port in ASIC_DB {}".format(port), @@ -167,7 +170,7 @@ def _check_port_buffer_info_and_get_profile_oid(duthost, port, expected_profile, return buffer_profile_oid, True - def _check_port_buffer_info_and_return(duthost, port, expected_profile): + def _check_port_buffer_info_and_return(dut_asic, port, expected_profile): """Check port's buffer information against CONFIG_DB and ASIC_DB and return the result This is called from wait_until @@ -180,24 +183,25 @@ def _check_port_buffer_info_and_return(duthost, port, expected_profile): Return: Whether all the checks passed """ - _, result = _check_port_buffer_info_and_get_profile_oid(duthost, port, expected_profile, False) + _, result = _check_port_buffer_info_and_get_profile_oid(dut_asic, port, expected_profile, False) return result global DEFAULT_LOSSLESS_PROFILES - duthost = duthosts[rand_one_dut_hostname] + duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] + dut_asic = duthost.asic_instance(enum_frontend_asic_index) # Check whether the COUNTERS_PG_NAME_MAP exists. Skip ASIC_DB checking if it isn't - pg_name_map = make_dict_from_output_lines(duthost.shell('redis-cli -n 2 hgetall COUNTERS_PG_NAME_MAP')['stdout'].split()) - cable_length_map = make_dict_from_output_lines(duthost.shell('redis-cli -n 4 hgetall "CABLE_LENGTH|AZURE"')['stdout'].split()) + pg_name_map = make_dict_from_output_lines(dut_asic.run_redis_cmd(argv=['redis-cli', '-n', 2, 'hgetall', 'COUNTERS_PG_NAME_MAP'])) + cable_length_map = make_dict_from_output_lines(dut_asic.run_redis_cmd(argv=['redis-cli', '-n', 4, 'hgetall', 'CABLE_LENGTH|AZURE'])) - configdb_ports = [x.split('|')[1] for x in duthost.shell('redis-cli -n 4 keys "PORT|*"')['stdout'].split()] + configdb_ports = [x.split('|')[1] for x in dut_asic.run_redis_cmd(argv=['redis-cli', '-n', 4, 'keys', 'PORT|*'])] profiles_checked = {} lossless_pool_oid = None buffer_profile_asic_info = None admin_up_ports = set() for port in configdb_ports: - port_config = make_dict_from_output_lines(duthost.shell('redis-cli -n 4 hgetall "PORT|{}"'.format(port))['stdout'].split()) + port_config = make_dict_from_output_lines(dut_asic.run_redis_cmd(argv=['redis-cli', '-n', 4, 'hgetall', 'PORT|{}'.format(port)])) is_port_up = port_config.get('admin_status') == 'up' if is_port_up or not RECLAIM_BUFFER_ON_ADMIN_DOWN: @@ -210,17 +214,17 @@ def _check_port_buffer_info_and_return(duthost, port, expected_profile): logging.info("Checking admin-{} port {} buffer information: profile {}".format('up' if is_port_up else 'down', port, expected_profile)) - buffer_profile_oid, _ = _check_port_buffer_info_and_get_profile_oid(duthost, port, expected_profile) + buffer_profile_oid, _ = _check_port_buffer_info_and_get_profile_oid(dut_asic, port, expected_profile) if expected_profile not in profiles_checked: - profile_info = make_dict_from_output_lines(duthost.shell('redis-cli -n 4 hgetall "{}"'.format(expected_profile[1:-1]))['stdout'].split()) + profile_info = make_dict_from_output_lines(dut_asic.run_redis_cmd(argv=['redis-cli', '-n', 4, 'hgetall', expected_profile[1:-1]])) pytest_assert(profile_info == DEFAULT_LOSSLESS_PROFILES[(speed, cable_length)], "Buffer profile {} {} doesn't match default {}".format(expected_profile, profile_info, DEFAULT_LOSSLESS_PROFILES[(speed, cable_length)])) logging.info("Checking buffer profile {}: OID: {}".format(expected_profile, buffer_profile_oid)) if buffer_profile_oid: # Further check the buffer profile in ASIC_DB - buffer_profile_key = duthost.shell('redis-cli -n 1 keys *{}*'.format(buffer_profile_oid))['stdout'] - buffer_profile_asic_info = make_dict_from_output_lines(duthost.shell('redis-cli -n 1 hgetall {}'.format(buffer_profile_key))['stdout'].split()) + buffer_profile_key = dut_asic.run_redis_cmd(argv=['redis-cli', '-n', 1, 'keys', '*{}*'.format(buffer_profile_oid)])[0] + buffer_profile_asic_info = make_dict_from_output_lines(dut_asic.run_redis_cmd(argv=['redis-cli', '-n', 1, 'hgetall', buffer_profile_key])) pytest_assert(buffer_profile_asic_info['SAI_BUFFER_PROFILE_ATTR_XON_TH'] == profile_info['xon'] and buffer_profile_asic_info['SAI_BUFFER_PROFILE_ATTR_XOFF_TH'] == profile_info['xoff'] and buffer_profile_asic_info['SAI_BUFFER_PROFILE_ATTR_RESERVED_BUFFER_SIZE'] == profile_info['size'] and @@ -245,25 +249,29 @@ def _check_port_buffer_info_and_return(duthost, port, expected_profile): # After deployment, there should not be lossless PG configured on any platforms # This is guaranteed by buffers_config.j2: no lossless PG will be configured on inactive ports logging.info("Checking admin-down port buffer information: {}".format(port)) - _, _ = _check_port_buffer_info_and_get_profile_oid(duthost, port, None) + _, _ = _check_port_buffer_info_and_get_profile_oid(dut_asic, port, None) port_to_shutdown = admin_up_ports.pop() - expected_profile = duthost.shell('redis-cli -n 4 hget "BUFFER_PG|{}|3-4" profile'.format(port_to_shutdown))['stdout'] + expected_profile = dut_asic.run_redis_cmd(argv=['redis-cli', '-n', 4, 'hget', 'BUFFER_PG|{}|3-4'.format(port_to_shutdown), 'profile'])[0] + + ns = '' + if dut_asic.namespace is not None: + ns += '-n {}'.format(dut_asic.namespace) try: # Shutdown the port and check whether the lossless PGs # - have been removed on Mellanox platforms # - will not be affected on other platforms logging.info("Shut down an admin-up port {} and check its buffer information".format(port_to_shutdown)) - duthost.shell('config interface shutdown {}'.format(port_to_shutdown)) + dut_asic.shell('config interface {} shutdown {}'.format(ns, port_to_shutdown)) if RECLAIM_BUFFER_ON_ADMIN_DOWN: expected_profile_admin_down = None else: expected_profile_admin_down = expected_profile - wait_until(60, 5, _check_port_buffer_info_and_return, duthost, port_to_shutdown, expected_profile_admin_down) + wait_until(60, 5, 0, _check_port_buffer_info_and_return, dut_asic, port_to_shutdown, expected_profile_admin_down) # Startup the port and check whether the lossless PG has been reconfigured logging.info("Re-startup the port {} and check its buffer information".format(port_to_shutdown)) - duthost.shell('config interface startup {}'.format(port_to_shutdown)) - wait_until(60, 5, _check_port_buffer_info_and_return, duthost, port_to_shutdown, expected_profile) + dut_asic.shell('config interface {} startup {}'.format(ns, port_to_shutdown)) + wait_until(60, 5, 0, _check_port_buffer_info_and_return, dut_asic, port_to_shutdown, expected_profile) finally: - duthost.shell('config interface startup {}'.format(port_to_shutdown), module_ignore_errors=True) + dut_asic.shell('config interface {} startup {}'.format(ns, port_to_shutdown), module_ignore_errors=True) diff --git a/tests/qos/test_pfc_counters.py b/tests/qos/test_pfc_counters.py index 34757a2b0f3..dedfbeda053 100644 --- a/tests/qos/test_pfc_counters.py +++ b/tests/qos/test_pfc_counters.py @@ -1,6 +1,6 @@ -from tests.common.fixtures.conn_graph_facts import conn_graph_facts +from tests.common.fixtures.conn_graph_facts import conn_graph_facts, fanout_graph_facts from qos_fixtures import leaf_fanouts -from qos_helpers import eos_to_linux_intf +from qos_helpers import eos_to_linux_intf, nxos_to_linux_intf, sonic_to_linux_intf import os import time import pytest @@ -24,6 +24,7 @@ PFC_GEN_FILE_RELATIVE_PATH = r'../../ansible/roles/test/files/helpers/pfc_gen.py' """ Expected PFC generator path at the leaf fanout switch """ PFC_GEN_FILE_DEST = r'~/pfc_gen.py' +PFC_GEN_FILE_ABSULOTE_PATH = r'/root/pfc_gen.py' """ Number of generated packets for each test case """ PKT_COUNT = 10 """ Number of switch priorities """ @@ -60,7 +61,7 @@ def setup_testbed(fanouthosts, duthost, leaf_fanouts): file_src = os.path.join(os.path.dirname(__file__), PFC_GEN_FILE_RELATIVE_PATH) peerdev_ans.host.copy(src = file_src, dest = PFC_GEN_FILE_DEST, force = True) -def run_test(fanouthosts, duthost, conn_graph_facts, leaf_fanouts, is_pfc=True, pause_time=65535, check_continous_pfc=False): +def run_test(fanouthosts, duthost, conn_graph_facts, fanout_graph_facts, leaf_fanouts, is_pfc=True, pause_time=65535, check_continous_pfc=False): """ @Summary: Run test for Ethernet flow control (FC) or priority-based flow control (PFC) @param duthost: The object for interacting with DUT through ansible @@ -71,7 +72,7 @@ def run_test(fanouthosts, duthost, conn_graph_facts, leaf_fanouts, is_pfc=True, """ setup_testbed(fanouthosts, duthost, leaf_fanouts) conn_facts = conn_graph_facts['device_conn'][duthost.hostname] - + onyx_pfc_container_name = 'storm' int_status = duthost.show_interface(command = "status")['ansible_facts']['int_status'] """ We only test active physical interfaces """ @@ -84,22 +85,37 @@ def run_test(fanouthosts, duthost, conn_graph_facts, leaf_fanouts, is_pfc=True, for intf in active_phy_intfs: peer_device = conn_facts[intf]['peerdevice'] peer_port = conn_facts[intf]['peerport'] - peer_port_name = eos_to_linux_intf(peer_port) - peerdev_ans = fanouthosts[peer_device] + fanout_os = peerdev_ans.get_fanout_os() + fanout_hwsku = fanout_graph_facts[peerdev_ans.hostname]["device_info"]["HwSku"] + if fanout_os == "nxos": + peer_port_name = nxos_to_linux_intf(peer_port) + elif fanout_os == "sonic": + peer_port_name = sonic_to_linux_intf(peer_port) + else: + peer_port_name = eos_to_linux_intf(peer_port, hwsku=fanout_hwsku) + if is_pfc: for priority in range(PRIO_COUNT): - cmd = "sudo python %s -i %s -p %d -t %d -n %d" % (PFC_GEN_FILE_DEST, peer_port_name, 2 ** priority, pause_time, PKT_COUNT) - peerdev_ans.host.command(cmd) + if fanout_hwsku == "MLNX-OS": + cmd = 'docker exec %s "python %s -i %s -p %d -t %d -n %d"' % (onyx_pfc_container_name, PFC_GEN_FILE_ABSULOTE_PATH, peer_port_name, 2 ** priority, pause_time, PKT_COUNT) + peerdev_ans.host.config(cmd) + else: + cmd = "sudo python %s -i %s -p %d -t %d -n %d" % (PFC_GEN_FILE_DEST, peer_port_name, 2 ** priority, pause_time, PKT_COUNT) + peerdev_ans.host.command(cmd) else: - cmd = "sudo python %s -i %s -g -t %d -n %d" % (PFC_GEN_FILE_DEST, peer_port_name, pause_time, PKT_COUNT) - peerdev_ans.host.command(cmd) + if fanout_hwsku == "MLNX-OS": + cmd = 'docker exec %s "python %s -i %s -g -t %d -n %d"' % (onyx_pfc_container_name, PFC_GEN_FILE_ABSULOTE_PATH, peer_port_name, pause_time, PKT_COUNT) + peerdev_ans.host.config(cmd) + else: + cmd = "sudo python %s -i %s -g -t %d -n %d" % (PFC_GEN_FILE_DEST, peer_port_name, pause_time, PKT_COUNT) + peerdev_ans.host.command(cmd) """ SONiC takes some time to update counters in database """ time.sleep(5) """ Check results """ - counter_facts = duthost.sonic_pfc_counters(method = "get")['ansible_facts'] + counter_facts = duthost.sonic_pfc_counters(method="get")['ansible_facts'] for intf in active_phy_intfs: if is_pfc: @@ -112,18 +128,30 @@ def run_test(fanouthosts, duthost, conn_graph_facts, leaf_fanouts, is_pfc=True, """only check priority 3 and 4: lossless priorities""" for priority in range(3, 5): """ Clear PFC counters """ - duthost.sonic_pfc_counters(method = "clear") + duthost.sonic_pfc_counters(method="clear") peer_device = conn_facts[intf]['peerdevice'] peer_port = conn_facts[intf]['peerport'] - peer_port_name = eos_to_linux_intf(peer_port) peerdev_ans = fanouthosts[peer_device] - cmd = "sudo python %s -i %s -p %d -t %d -n %d" % (PFC_GEN_FILE_DEST, peer_port_name, 2 ** priority, pause_time, PKT_COUNT) - peerdev_ans.host.command(cmd) + fanout_os = peerdev_ans.get_fanout_os() + fanout_hwsku = fanout_graph_facts[peerdev_ans.hostname]["device_info"]["HwSku"] + if fanout_os == "nxos": + peer_port_name = nxos_to_linux_intf(peer_port) + elif fanout_os == "sonic": + peer_port_name = sonic_to_linux_intf(peer_port) + else: + peer_port_name = eos_to_linux_intf(peer_port, hwsku=fanout_hwsku) + + if fanout_hwsku == "MLNX-OS": + cmd = 'docker exec %s "python %s -i %s -p %d -t %d -n %d"' % (onyx_pfc_container_name, PFC_GEN_FILE_ABSULOTE_PATH, peer_port_name, 2 ** priority, pause_time, PKT_COUNT) + peerdev_ans.host.config(cmd) + else: + cmd = "sudo python %s -i %s -p %d -t %d -n %d" % (PFC_GEN_FILE_DEST, peer_port_name, 2 ** priority, pause_time, PKT_COUNT) + peerdev_ans.host.command(cmd) time.sleep(5) - pfc_rx = duthost.sonic_pfc_counters(method = "get")['ansible_facts'] + pfc_rx = duthost.sonic_pfc_counters(method="get")['ansible_facts'] """check pfc Rx frame count on particular priority are increased""" assert pfc_rx[intf]['Rx'][priority] == str(PKT_COUNT) """check LHS priorities are 0 count""" @@ -133,27 +161,27 @@ def run_test(fanouthosts, duthost, conn_graph_facts, leaf_fanouts, is_pfc=True, for i in range(priority+1, PRIO_COUNT): assert pfc_rx[intf]['Rx'][i] == '0' -def test_pfc_pause(fanouthosts, duthosts, rand_one_dut_hostname, conn_graph_facts, leaf_fanouts): +def test_pfc_pause(fanouthosts, duthosts, rand_one_dut_hostname, conn_graph_facts, fanout_graph_facts, leaf_fanouts): """ @Summary: Run PFC pause frame (pause time quanta > 0) tests """ duthost = duthosts[rand_one_dut_hostname] - run_test(fanouthosts, duthost, conn_graph_facts, leaf_fanouts) + run_test(fanouthosts, duthost, conn_graph_facts, fanout_graph_facts, leaf_fanouts) -def test_pfc_unpause(fanouthosts, duthosts, rand_one_dut_hostname, conn_graph_facts, leaf_fanouts): +def test_pfc_unpause(fanouthosts, duthosts, rand_one_dut_hostname, conn_graph_facts, fanout_graph_facts, leaf_fanouts): """ @Summary: Run PFC unpause frame (pause time quanta = 0) tests """ duthost = duthosts[rand_one_dut_hostname] - run_test(fanouthosts, duthost, conn_graph_facts, leaf_fanouts, pause_time=0) + run_test(fanouthosts, duthost, conn_graph_facts, fanout_graph_facts, leaf_fanouts, pause_time=0) -def test_fc_pause(fanouthosts, duthosts, rand_one_dut_hostname, conn_graph_facts, leaf_fanouts): +def test_fc_pause(fanouthosts, duthosts, rand_one_dut_hostname, conn_graph_facts, fanout_graph_facts, leaf_fanouts): """ @Summary: Run FC pause frame (pause time quanta > 0) tests """ duthost = duthosts[rand_one_dut_hostname] - run_test(fanouthosts, duthost, conn_graph_facts, leaf_fanouts, is_pfc=False) + run_test(fanouthosts, duthost, conn_graph_facts, fanout_graph_facts, leaf_fanouts, is_pfc=False) -def test_fc_unpause(fanouthosts, duthosts, rand_one_dut_hostname, conn_graph_facts, leaf_fanouts): +def test_fc_unpause(fanouthosts, duthosts, rand_one_dut_hostname, conn_graph_facts, fanout_graph_facts, leaf_fanouts): """ @Summary: Run FC pause frame (pause time quanta = 0) tests """ duthost = duthosts[rand_one_dut_hostname] - run_test(fanouthosts, duthost, conn_graph_facts, leaf_fanouts, is_pfc=False, pause_time=0) + run_test(fanouthosts, duthost, conn_graph_facts, fanout_graph_facts, leaf_fanouts, is_pfc=False, pause_time=0) -def test_continous_pfc(fanouthosts, duthosts, rand_one_dut_hostname, conn_graph_facts, leaf_fanouts): +def test_continous_pfc(fanouthosts, duthosts, rand_one_dut_hostname, conn_graph_facts, fanout_graph_facts, leaf_fanouts): duthost = duthosts[rand_one_dut_hostname] - run_test(fanouthosts, duthost, conn_graph_facts, leaf_fanouts, check_continous_pfc=True) + run_test(fanouthosts, duthost, conn_graph_facts, fanout_graph_facts, leaf_fanouts, check_continous_pfc=True) diff --git a/tests/qos/test_pfc_pause.py b/tests/qos/test_pfc_pause.py index 1066c563a32..e33ad00db1f 100644 --- a/tests/qos/test_pfc_pause.py +++ b/tests/qos/test_pfc_pause.py @@ -3,8 +3,11 @@ import pytest import time +from natsort import natsorted + from qos_fixtures import lossless_prio_dscp_map from qos_helpers import ansible_stdout_to_str, get_phy_intfs, get_addrs_in_subnet, get_active_vlan_members, get_vlan_subnet, natural_keys, get_max_priority +from tests.common.dualtor.dual_tor_utils import mux_cable_server_ip from tests.common.fixtures.conn_graph_facts import conn_graph_facts, fanout_graph_facts from tests.common.fixtures.ptfhost_utils import copy_ptftests_directory # lgtm[py/unused-import] from tests.common.fixtures.ptfhost_utils import change_mac_addresses # lgtm[py/unused-import] @@ -23,12 +26,12 @@ PFC_PKT_COUNT = 1000000000 PTF_FILE_REMOTE_PATH = '~/ptftests/pfc_pause_test.py' -PTF_PKT_COUNT = 50 +PTF_PKT_COUNT = 20 PTF_PKT_INTVL_SEC = 0.1 PTF_PASS_RATIO_THRESH = 0.6 """ Maximum number of interfaces to test on a DUT """ -MAX_TEST_INTFS_COUNT = 4 +MAX_TEST_INTFS_COUNT = 2 @pytest.fixture(scope="module", autouse=True) def pfc_test_setup(duthosts, rand_one_dut_hostname, tbinfo, ptfhost): @@ -52,7 +55,13 @@ def pfc_test_setup(duthosts, rand_one_dut_hostname, tbinfo, ptfhost): vlan_subnet = get_vlan_subnet(duthost) """ Generate IP addresses for servers in the Vlan """ - vlan_ip_addrs = get_addrs_in_subnet(vlan_subnet, len(vlan_members)) + vlan_ip_addrs = list() + if 'dualtor' in tbinfo['topo']['name']: + servers = mux_cable_server_ip(duthost) + for intf, value in natsorted(servers.items()): + vlan_ip_addrs.append(value['server_ipv4'].split('/')[0]) + else: + vlan_ip_addrs = get_addrs_in_subnet(vlan_subnet, len(vlan_members)) """ Find correspoinding interfaces on PTF """ phy_intfs = get_phy_intfs(duthost) @@ -79,8 +88,7 @@ def pfc_test_setup(duthosts, rand_one_dut_hostname, tbinfo, ptfhost): duthost.command('sonic-clear fdb all') """ Enable DUT's PFC wd """ - if 'dualtor' not in tbinfo['topo']['name']: - duthost.shell('sudo pfcwd start_default') + duthost.shell('sudo pfcwd start_default') def run_test(pfc_test_setup, fanouthosts, duthost, ptfhost, conn_graph_facts, fanout_info, traffic_params, pause_prio=None, queue_paused=True, @@ -172,6 +180,7 @@ def run_test(pfc_test_setup, fanouthosts, duthost, ptfhost, conn_graph_facts, + "queue_paused=%s;" % queue_paused + "dut_has_mac=False;" + "vlan_id=%s;" % vlan_id + + "debug=%s;" % True + "testbed_type=\'%s\'" % testbed_type) cmd = 'ptf --test-dir %s pfc_pause_test %s --test-params="%s"' % (os.path.dirname(PTF_FILE_REMOTE_PATH), intf_info, test_params) diff --git a/tests/qos/test_qos_masic.py b/tests/qos/test_qos_masic.py new file mode 100644 index 00000000000..ab102a8c280 --- /dev/null +++ b/tests/qos/test_qos_masic.py @@ -0,0 +1,432 @@ +import json +import logging +import pytest + +from tests.common.utilities import wait_until + +logger = logging.getLogger(__name__) + +from tests.common.helpers.assertions import pytest_assert, pytest_require + +pytestmark = [ + pytest.mark.topology('t1') +] + + +class QosSaiBaseMasic: + + SUPPORTED_T1_TOPOS = ["t1-lag", "t1-64-lag", "t1-backend"] + + def runPtfTest(self, ptfhost, testCase='', testParams={}): + """ + Runs QoS SAI test case on PTF host + + Args: + ptfhost (AnsibleHost): Packet Test Framework (PTF) + testCase (str): SAI tests test case name + testParams (dict): Map of test params required by testCase + + Returns: + None + + Raises: + RunAnsibleModuleFail if ptf test fails + """ + pytest_assert(ptfhost.shell( + argv = [ + "ptf", + "--test-dir", + "saitests", + testCase, + "--platform-dir", + "ptftests", + "--platform", + "remote", + "-t", + ";".join(["{}={}".format(k, repr(v)) for k, v in testParams.items()]), + "--disable-ipv6", + "--disable-vxlan", + "--disable-geneve", + "--disable-erspan", + "--disable-mpls", + "--disable-nvgre", + "--log-file", + "/tmp/{0}.log".format(testCase), + "--test-case-timeout", + "600" + ], + chdir = "/root", + )["rc"] == 0, "Failed when running test '{0}'".format(testCase)) + + def build_port_ips(self, asic_index, ifaces, mg_facts): + """ + Returns list of port index and IP address for a given ASIC + """ + + dut_port_ips = dict() + + for iface, addr in ifaces.items(): + if iface.startswith("Ethernet"): + portIndex = mg_facts["minigraph_ptf_indices"][iface] + elif iface.startswith("PortChannel"): + portName = mg_facts["minigraph_portchannels"][iface]["members"][0] + portIndex = mg_facts["minigraph_ptf_indices"][portName] + + dut_port_ips.update({ + portIndex: { + "ipv4": addr["peer_ipv4"], + "bgp_neighbor": addr["bgp_neighbor"] + } + }) + + return {asic_index: dut_port_ips} + + def get_backend_ip_ifs(self, duthost, frontend_asic): + """ + On a frontend ASIC return a dict of interfaces with + backend ASIC names + """ + pytest_assert( + frontend_asic in duthost.get_frontend_asic_ids(), + "{} is not frontend ASIC ID".format(frontend_asic) + ) + + ip_ifs = duthost.asic_instance( + frontend_asic + ).show_ip_interface()["ansible_facts"]["ip_interfaces"] + + # Find backend interface names + return {intf: ip["bgp_neighbor"].lower() for intf, ip in ip_ifs.items() + if ip["bgp_neighbor"].lower().startswith("asic")} + + def check_v4route_backend_nhop(self, duthost, frontend_asic, route): + """ + On frontend ASIC Check if v4 address has at least one backend + ASIC nexthop + + Returns: + False if not nexthops with backend ASICs + """ + cmd = 'vtysh -n {} -c "show ip route {} json"'.format( + frontend_asic, route + ) + result = duthost.command(cmd) + pytest_assert(result["rc"] == 0, cmd) + route_info = json.loads(result["stdout"]) + nhop = route_info[route_info.keys().pop()][0] + + nhop_ifs = {x.get("interfaceName") for x in nhop["nexthops"]} + backend_ifs = set(self.get_backend_ip_ifs( + duthost, frontend_asic).keys() + ) + + return len(nhop_ifs.intersection(backend_ifs)) + + def backend_ip_if_admin_state( + self, duthost, test_asic, frontend_asic, admin_state + ): + """ + On a frontend ASIC bring down ports (channels) towards backend ASICs + other than the ASIC under test, so that traffic always goes via + backend ASIC under test + """ + + def is_intf_status(asic, intf, oper_state): + intf_status = duthost.asic_instance(asic).show_interface( + command="status", include_internal_intfs=True + )["ansible_facts"]["int_status"] + if intf_status[intf]["oper_state"] == oper_state: + return True + return False + + oper_state = "up" if admin_state == "startup" else "down" + ip_ifs = self.get_backend_ip_ifs(duthost, frontend_asic) + + for intf, asic in ip_ifs.items(): + if asic != "asic{}".format(test_asic): + if admin_state == "startup": + duthost.asic_instance(frontend_asic).startup_interface(intf) + else: + duthost.asic_instance(frontend_asic).shutdown_interface(intf) + + # wait for port status to change + pytest_assert( + wait_until( + 10, 1, 0, is_intf_status, frontend_asic, intf, + oper_state + ), + "Failed to update port status {} {}".format( + intf, admin_state + ) + ) + + + def find_asic_traffic_ports(self, duthost, ptfhost, test_params): + """ + For a given pair of source IP and destination IP, identify + the path taken by the L3 packet. Path implies the backend ASIC + and its tx and rx ports. The path is identified by sending + a burst of packets and finding the difference in interface + counters before and after the burst. + + Assert is thrown if multiple ports or multiple backend ASICs + have similar interface counters. + """ + def find_traffic_ports(asic_id, c1, c2, diff): + + rx_port = None + tx_port = None + + a1 = c1[asic_id]["ansible_facts"]["int_counter"] + a2 = c2[asic_id]["ansible_facts"]["int_counter"] + + for port in a2.keys(): + rx_diff = int(a2[port]["RX_OK"]) - int(a1[port]["RX_OK"]) + + if rx_diff >= diff: + pytest_assert( + rx_port is None, + "Multiple rx ports with {} rx packets".format(diff) + ) + rx_port = port + + tx_diff = int(a2[port]["TX_OK"]) - int(a1[port]["TX_OK"]) + if tx_diff >= diff: + pytest_assert( + tx_port is None, + "Multiple tx ports with {} tx packets".format(diff) + ) + tx_port = port + + # return rx, tx ports that have a packet count difference of > diff + return rx_port, tx_port + + test_params["count"] = 100 + duthost.command("sonic-clear counters") + cnt_before = duthost.show_interface( + command="counter", asic_index="all", include_internal_intfs=True + ) + # send a burst of packets from a given src IP to dst IP + self.runPtfTest( + ptfhost, testCase="sai_qos_tests.PacketTransmit", + testParams=test_params + ) + time.sleep(8) + cnt_after = duthost.show_interface( + command="counter", asic_index="all", include_internal_intfs=True + ) + + asic_idx = None + rx_port = None + tx_port = None + + # identify the backend ASIC and the rx, tx ports on that ASIC + # that forwarded the traffic + for asic in duthost.get_backend_asic_ids(): + rx, tx = find_traffic_ports( + asic, cnt_before, cnt_after, test_params["count"] + ) + if rx and tx: + pytest_assert( + rx_port is None and tx_port is None, + "Multiple backend ASICs with rx/tx ports" + ) + rx_port, tx_port, asic_idx = rx, tx, asic + + pytest_assert(asic_idx is not None, "ASIC, rx and tx ports not found") + return ({ + "test_src_port_name": rx_port, + "test_dst_port_name": tx_port, + "asic_under_test": asic_idx, + } + ) + + def build_ip_interface(self, duthost, tbinfo): + """ + builds a list of active IP interfaces and port index + for each ASIC + + Returns: + { + asic_index: { + portIndex: { + "ipv4": peer ipv4, + "bgp_neighbor": BGP neighbor + } + . + . + } + . + . + } + """ + topo = tbinfo["topo"]["name"] + if topo not in self.SUPPORTED_T1_TOPOS: + pytest.skip("unsupported topology {}".format(topo)) + + pytest_require(duthost.is_multi_asic, "Not a multi asic platform") + + mg_facts = duthost.get_extended_minigraph_facts(tbinfo) + ip_ifaces = duthost.get_active_ip_interfaces(tbinfo, asic_index="all") + + port_ips = dict() + for idx in range(len(ip_ifaces)): + port_ips.update(self.build_port_ips(idx, ip_ifaces[idx], mg_facts)) + + return port_ips + + def build_test_ports(self, duthost, tbinfo): + """ + This fixture builds a list of active L3 interface ports on each + ASIC so that source and destination interfaces can be selected + from different ASICs. Returns a dict of 'src' and 'dst' interfaces + along with the ASIC ID + + Only frontend ASCIs connected to T0 devices are reachable end + to end on multi ASIC platform. + """ + # find asics with T0 neighbors + ip_interface = self.build_ip_interface(duthost, tbinfo) + ports = dict() + for k, v in ip_interface.items(): + try: + port_index = next(iter(v)) + port_info = v[port_index] + if port_info["bgp_neighbor"].lower().endswith("t0"): + ports.update({k: v}) + except StopIteration: + continue + + pytest_assert( + len(ports) >= 0, "Ports from at least two ASICs required" + ) + + test_ports = dict() + keys = ports.keys() + src_asic = keys.pop(0) + test_ports.update({"src": {src_asic: ports[src_asic]}}) + test_ports.update({"dst": dict()}) + for dst_asic in keys: + test_ports["dst"].update({dst_asic: ports[dst_asic]}) + + return test_ports + + def get_test_ports(self, duthost, tbinfo): + """ + Fixture to select test ports from a given list of active L3 + interfaces from multiple frontend ASICs. The source and + destination port will be on different ASICs. + + Fixture also returns the source and desitnation ASCIS IDs + """ + test_ports = self.build_test_ports(duthost, tbinfo) + + # source port + src_asic = test_ports["src"].keys().pop(0) + src_port_ids = test_ports["src"][src_asic].keys() + src_port_id = src_port_ids.pop(0) + src_port_ip = test_ports["src"][src_asic][src_port_id]["ipv4"] + + # destination port + dst_asic = test_ports["dst"].keys().pop(0) + dst_port_ids = test_ports["dst"][dst_asic].keys() + dst_port_id = dst_port_ids.pop(0) + dst_port_ip = test_ports["dst"][dst_asic][dst_port_id]["ipv4"] + + return { + "dst_port_id": dst_port_id, + "dst_port_ip": dst_port_ip, + "dst_asic": dst_asic, + "src_port_id": src_port_id, + "src_port_ip": src_port_ip, + "src_asic": src_asic, + } + + +class TestQosSaiMasic(QosSaiBaseMasic): + + def test_qos_masic_dscp_queue_mapping( + self, duthosts, rand_one_dut_hostname, enum_backend_asic_index, + ptfhost, dut_test_params, swapSyncd, tbinfo + ): + duthost = duthosts[rand_one_dut_hostname] + + if not duthost.sonichost.is_multi_asic: + pytest.skip("Test applies only to multi ASIC platform") + + if enum_backend_asic_index is None: + pytest.skip("Backend ASIC is None") + + # Verify all external and internal BGP sessions are up + config_facts = duthost.config_facts( + host=duthost.hostname, source="running" + )['ansible_facts'] + bgp_neighbors = config_facts.get('BGP_NEIGHBOR', {}) + bgp_neighbors.update(duthost.get_internal_bgp_peers()) + + if not wait_until( + 300, 10, 0, duthost.check_bgp_session_state, bgp_neighbors.keys() + ): + pytest.fail("Not all bgp sessions are Up. BGP Sessions: {}".format( + duthost.get_bgp_neighbors() + )) + + test_ports = self.get_test_ports(duthost, tbinfo) + src_asic = test_ports["src_asic"] + + try: + # Bring down port (channel) towards ASICs other than the ASIC + # under test, so that traffic always goes via ASIC under test + self.backend_ip_if_admin_state( + duthost, enum_backend_asic_index, src_asic, "shutdown" + ) + + test_params = dict() + test_params.update(dut_test_params["basicParams"]) + test_params.update(test_ports) + logger.debug(test_params) + + logging.debug( + "BGP neighbors after backend I/F shut: {}".format( + duthost.get_bgp_neighbors() + ) + ) + + # ensure the test destination IP has a path to backend ASIC + pytest_assert( + wait_until( + 300, 1, 0, self.check_v4route_backend_nhop, duthost, + test_params["src_asic"], test_params["dst_port_ip"] + ), + "Route {} doesn't have backend ASIC nexthop on ASIC {}, {}".format( + test_params["dst_port_ip"], + test_params["src_asic"], + duthost.command('vtysh -n {} -c "show ip route {} json"'.format( + test_params["src_asic"], test_params["dst_port_ip"]) + )["stdout"] + ) + ) + + duthost.asic_instance( + enum_backend_asic_index + ).create_ssh_tunnel_sai_rpc() + + # find traffic src/dst ports on the ASIC under test + test_params.update( + self.find_asic_traffic_ports(duthost, ptfhost, test_params) + ) + + self.runPtfTest( + ptfhost, testCase="sai_qos_tests.DscpMappingPB", + testParams=test_params + ) + + finally: + # bring up the backed IFs + self.backend_ip_if_admin_state( + duthost, enum_backend_asic_index, src_asic, "startup" + ) + + duthost.asic_instance( + enum_backend_asic_index + ).remove_ssh_tunnel_sai_rpc() diff --git a/tests/qos/test_qos_sai.py b/tests/qos/test_qos_sai.py index 138f48dbf20..682b2eb5400 100644 --- a/tests/qos/test_qos_sai.py +++ b/tests/qos/test_qos_sai.py @@ -22,15 +22,21 @@ import logging import pytest +import time +import json +from tests.common.fixtures.conn_graph_facts import fanout_graph_facts, conn_graph_facts +from tests.common.fixtures.duthost_utils import dut_qos_maps, separated_dscp_to_tc_map_on_uplink, load_dscp_to_pg_map # lgtm[py/unused-import] from tests.common.fixtures.ptfhost_utils import copy_ptftests_directory # lgtm[py/unused-import] from tests.common.fixtures.ptfhost_utils import copy_saitests_directory # lgtm[py/unused-import] from tests.common.fixtures.ptfhost_utils import change_mac_addresses # lgtm[py/unused-import] from tests.common.fixtures.ptfhost_utils import ptf_portmap_file # lgtm[py/unused-import] -from tests.common.fixtures.ptfhost_utils import set_ptf_port_mapping_mode +from tests.common.dualtor.dual_tor_utils import dualtor_ports, is_tunnel_qos_remap_enabled # lgtm[py/unused-import] from tests.common.helpers.assertions import pytest_assert -from tests.common.utilities import wait_until -from qos_sai_base import QosSaiBase, QosSaiBaseMasic +from tests.common.helpers.pfc_storm import PFCStorm +from tests.pfcwd.files.pfcwd_helper import set_pfc_timers, start_wd_on_ports +from qos_sai_base import QosSaiBase +from tests.common.cisco_data import get_markings_dut, setup_markings_dut logger = logging.getLogger(__name__) @@ -40,6 +46,15 @@ PTF_PORT_MAPPING_MODE = 'use_orig_interface' +@pytest.fixture(autouse=True) +def ignore_expected_loganalyzer_exception(rand_one_dut_hostname, loganalyzer): + """ignore the syslog ERR syncd0#syncd: [03:00.0] brcm_sai_set_switch_attribute:1920 updating switch mac addr failed with error -2""" + ignore_regex = [ + ".*ERR syncd[0-9]*#syncd.*brcm_sai_set_switch_attribute.*updating switch mac addr failed with error.*" + ] + if loganalyzer: + loganalyzer[rand_one_dut_hostname].ignore_regex.extend(ignore_regex) + class TestQosSai(QosSaiBase): """TestQosSai derives from QosSaiBase and contains collection of QoS SAI test cases. @@ -55,20 +70,22 @@ class TestQosSai(QosSaiBase): 'Arista-7260CX3-D108C8', 'Force10-S6100', 'Arista-7260CX3-Q64', - 'Arista-7050CX3-32S-C32' + 'Arista-7050CX3-32S-C32', + 'Arista-7050CX3-32S-D48C8' ] BREAKOUT_SKUS = ['Arista-7050-QX-32S'] def testParameter( self, duthost, dutConfig, dutQosConfig, ingressLosslessProfile, - ingressLossyProfile, egressLosslessProfile + ingressLossyProfile, egressLosslessProfile, dualtor_ports ): logger.info("asictype {}".format(duthost.facts["asic_type"])) logger.info("config {}".format(dutConfig)) logger.info("qosConfig {}".format(dutQosConfig)) + logger.info("dualtor_ports {}".format(dualtor_ports)) - @pytest.mark.parametrize("xoffProfile", ["xoff_1", "xoff_2"]) + @pytest.mark.parametrize("xoffProfile", ["xoff_1", "xoff_2", "xoff_3", "xoff_4"]) def testQosSaiPfcXoffLimit( self, xoffProfile, ptfhost, dutTestParams, dutConfig, dutQosConfig, ingressLosslessProfile, egressLosslessProfile @@ -92,6 +109,10 @@ def testQosSaiPfcXoffLimit( Raises: RunAnsibleModuleFail if ptf test fails """ + normal_profile = ["xoff_1", "xoff_2"] + if not dutConfig["dualTor"] and not xoffProfile in normal_profile: + pytest.skip("Additional DSCPs are not supported on non-dual ToR ports") + portSpeedCableLength = dutQosConfig["portSpeedCableLength"] if dutTestParams['hwsku'] in self.BREAKOUT_SKUS and 'backend' not in dutTestParams['topo']: qosConfig = dutQosConfig["param"][portSpeedCableLength]["breakout"] @@ -99,6 +120,7 @@ def testQosSaiPfcXoffLimit( qosConfig = dutQosConfig["param"][portSpeedCableLength] testParams = dict() testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) testParams.update({ "dscp": qosConfig[xoffProfile]["dscp"], "ecn": qosConfig[xoffProfile]["ecn"], @@ -122,11 +144,174 @@ def testQosSaiPfcXoffLimit( if "pkts_num_margin" in qosConfig[xoffProfile].keys(): testParams["pkts_num_margin"] = qosConfig[xoffProfile]["pkts_num_margin"] + if "packet_size" in qosConfig[xoffProfile].keys(): + testParams["packet_size"] = qosConfig[xoffProfile]["packet_size"] + self.runPtfTest( ptfhost, testCase="sai_qos_tests.PFCtest", testParams=testParams ) - @pytest.mark.parametrize("xonProfile", ["xon_1", "xon_2"]) + @pytest.mark.parametrize("xonProfile", ["xon_1", "xon_2", "xon_3", "xon_4"]) + def testPfcStormWithSharedHeadroomOccupancy( + self, xonProfile, ptfhost, fanouthosts, conn_graph_facts, fanout_graph_facts, + dutTestParams, dutConfig, dutQosConfig, sharedHeadroomPoolSize, ingressLosslessProfile + ): + """ + Verify if the PFC Frames are not sent from the DUT after a PFC Storm from peer link. + Ingress PG occupancy must cross into shared headroom region when the PFC Storm is seen + Only for MLNX Platforms + + Args: + xonProfile (pytest parameter): XON profile + ptfhost (AnsibleHost): Packet Test Framework (PTF) + dutTestParams (Fixture, dict): DUT host test params + dutConfig (Fixture, dict): Map of DUT config containing dut interfaces, test port IDs, test port IPs, + and test ports + fanout_graph_facts(fixture) : fanout graph info + fanouthosts(AnsibleHost): fanout instance + dutQosConfig (Fixture, dict): Map containing DUT host QoS configuration + ingressLosslessProfile (Fxiture): Map of egress lossless buffer profile attributes + + Returns: + None + + Raises: + RunAnsibleModuleFail if ptf test fails + """ + normal_profile = ["xon_1", "xon_2"] + if not dutConfig["dualTor"] and not xonProfile in normal_profile: + pytest.skip("Additional DSCPs are not supported on non-dual ToR ports") + + if dutTestParams["basicParams"]["sonic_asic_type"] != "mellanox": + pytest.skip("This Test Case is only meant for Mellanox ASIC") + + if not sharedHeadroomPoolSize or sharedHeadroomPoolSize == "0": + pytest.skip("Shared Headroom has to be enabled for this test") + + portSpeedCableLength = dutQosConfig["portSpeedCableLength"] + if xonProfile in dutQosConfig["param"][portSpeedCableLength].keys(): + qosConfig = dutQosConfig["param"][portSpeedCableLength] + else: + if dutTestParams['hwsku'] in self.BREAKOUT_SKUS and 'backend' not in dutTestParams['topo']: + qosConfig = dutQosConfig["param"][portSpeedCableLength]["breakout"] + else: + qosConfig = dutQosConfig["param"] + + testParams = dict() + testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) + testParams.update({ + "dscp": qosConfig[xonProfile]["dscp"], + "ecn": qosConfig[xonProfile]["ecn"], + "pg": qosConfig[xonProfile]["pg"], + "buffer_max_size": ingressLosslessProfile["size"], + "dst_port_id": dutConfig["testPorts"]["dst_port_id"], + "dst_port_ip": dutConfig["testPorts"]["dst_port_ip"], + "src_port_id": dutConfig["testPorts"]["src_port_id"], + "src_port_ip": dutConfig["testPorts"]["src_port_ip"], + "src_port_vlan": dutConfig["testPorts"]["src_port_vlan"], + "pkts_num_trig_pfc": qosConfig[xonProfile]["pkts_num_trig_pfc"], + "pkts_num_private_headrooom": dutQosConfig["param"]["pkts_num_private_headrooom"] + }) + + # Params required for generating a PFC Storm + duthost = dutConfig["dutInstance"] + pfcwd_timers = set_pfc_timers() + pfcwd_test_port_id = dutConfig["testPorts"]["src_port_id"] + pfcwd_test_port = dutConfig["dutInterfaces"][pfcwd_test_port_id] + fanout_neighbors = conn_graph_facts["device_conn"][duthost.hostname] + peerdevice = fanout_neighbors[pfcwd_test_port]["peerdevice"] + peerport = fanout_neighbors[pfcwd_test_port]["peerport"] + peer_info = { + 'peerdevice': peerdevice, + 'hwsku': fanout_graph_facts[peerdevice]["device_info"]["HwSku"], + 'pfc_fanout_interface': peerport + } + + queue_index = qosConfig[xonProfile]["pg"] + frames_number = 100000000 + + logging.info("PFC Storm Gen Params \n DUT iface: {} Fanout iface : {}\ + queue_index: {} peer_info: {}".format(pfcwd_test_port, + peerport, + queue_index, + peer_info)) + + # initialize PFC Storm Handler + storm_hndle = PFCStorm(duthost, fanout_graph_facts, fanouthosts, + pfc_queue_idx = queue_index, + pfc_frames_number = frames_number, + peer_info = peer_info) + storm_hndle.deploy_pfc_gen() + + # check if pfcwd status is enabled before running the test + prev_state = duthost.shell('sonic-db-cli CONFIG_DB HGETALL "PFC_WD|{}"'.format(pfcwd_test_port))['stdout'] + prev_poll_interval = duthost.shell('sonic-db-cli CONFIG_DB HGET "PFC_WD|GLOBAL" POLL_INTERVAL'.format(pfcwd_test_port))['stdout'] + + try: + prev_state = json.loads(prev_state) + except Exception as e: + logging.debug("Exception: {}, PFC_WD State: {}".format(str(e), prev_state)) + prev_state = {} + + try: + prev_poll_interval = int(prev_poll_interval) + except Exception as e: + logging.debug("Exception: {}, Poll Interval: {}".format(str(e), prev_poll_interval)) + prev_poll_interval = 0 + + # set poll interval for pfcwd + duthost.command("pfcwd interval {}".format(pfcwd_timers['pfc_wd_poll_time'])) + + logger.info("--- Start Pfcwd on port {}".format(pfcwd_test_port)) + start_wd_on_ports(duthost, + pfcwd_test_port, + pfcwd_timers['pfc_wd_restore_time'], + pfcwd_timers['pfc_wd_detect_time']) + + try: + logger.info("--- Fill the ingress buffers ---") + self.runPtfTest( + ptfhost, testCase="sai_qos_tests.PtfFillBuffer", testParams=testParams + ) + + # Trigger PfcWd + storm_hndle.start_storm() + logger.info("PfcWd Status: {}".format(duthost.command("pfcwd show stats")["stdout_lines"])) + time.sleep(10) + storm_hndle.stop_storm() + logger.info("PfcWd Status: {}".format(duthost.command("pfcwd show stats")["stdout_lines"])) + + logger.info("--- Enable dst iface and verify if the PFC frames are not sent from src port ---") + self.runPtfTest( + ptfhost, testCase="sai_qos_tests.PtfReleaseBuffer", testParams=testParams + ) + except Exception as e: + raise e + finally: + if prev_poll_interval: + logger.info("--- Restore original poll interval {} ---".format(prev_poll_interval)) + duthost.command("pfcwd interval {}".format(prev_poll_interval)) + else: + logger.info("--- Set Default Polling Interval ---".format()) + duthost.command("pfcwd interval {}".format("200")) + + if prev_state: + logger.info("--- Restore original config {} for PfcWd on {} ---".format(prev_state, pfcwd_test_port)) + start_wd_on_ports(duthost, + pfcwd_test_port, + prev_state.get("restoration_time", "200"), + prev_state.get("detection_time", "200"), + prev_state.get("action", "drop")) + else: + logger.info("--- Stop PfcWd on {} ---".format(pfcwd_test_port)) + duthost.command("pfcwd stop {}".format(pfcwd_test_port)) + + self.runPtfTest( + ptfhost, testCase="sai_qos_tests.PtfEnableDstPorts", testParams=testParams + ) + + @pytest.mark.parametrize("xonProfile", ["xon_1", "xon_2", "xon_3", "xon_4"]) def testQosSaiPfcXonLimit( self, xonProfile, ptfhost, dutTestParams, dutConfig, dutQosConfig, ingressLosslessProfile @@ -149,6 +334,10 @@ def testQosSaiPfcXonLimit( Raises: RunAnsibleModuleFail if ptf test fails """ + normal_profile = ["xon_1", "xon_2"] + if not dutConfig["dualTor"] and not xonProfile in normal_profile: + pytest.skip("Additional DSCPs are not supported on non-dual ToR ports") + portSpeedCableLength = dutQosConfig["portSpeedCableLength"] if xonProfile in dutQosConfig["param"][portSpeedCableLength].keys(): qosConfig = dutQosConfig["param"][portSpeedCableLength] @@ -170,6 +359,7 @@ def testQosSaiPfcXonLimit( testParams = dict() testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) testParams.update({ "dscp": qosConfig[xonProfile]["dscp"], "ecn": qosConfig[xonProfile]["ecn"], @@ -199,10 +389,139 @@ def testQosSaiPfcXonLimit( if "pkts_num_margin" in qosConfig[xonProfile].keys(): testParams["pkts_num_margin"] = qosConfig[xonProfile]["pkts_num_margin"] + if "packet_size" in qosConfig[xonProfile].keys(): + testParams["packet_size"] = qosConfig[xonProfile]["packet_size"] + self.runPtfTest( ptfhost, testCase="sai_qos_tests.PFCXonTest", testParams=testParams ) + @pytest.mark.parametrize("LosslessVoqProfile", ["lossless_voq_1", "lossless_voq_2", + "lossless_voq_3", "lossless_voq_4"]) + def testQosSaiLosslessVoq( + self, LosslessVoqProfile, ptfhost, dutTestParams, dutConfig, dutQosConfig + ): + """ + Test QoS SAI XOFF limits for various voq mode configurations + Args: + LosslessVoqProfile (pytest parameter): LosslessVoq Profile + ptfhost (AnsibleHost): Packet Test Framework (PTF) + dutTestParams (Fixture, dict): DUT host test params + dutConfig (Fixture, dict): Map of DUT config containing dut interfaces, test port IDs, test port IPs, + and test ports + dutQosConfig (Fixture, dict): Map containing DUT host QoS configuration + Returns: + None + Raises: + RunAnsibleModuleFail if ptf test fails + """ + if dutTestParams["basicParams"]["sonic_asic_type"] != "cisco-8000": + pytest.skip("Lossless Voq test is not supported") + portSpeedCableLength = dutQosConfig["portSpeedCableLength"] + if dutTestParams['hwsku'] in self.BREAKOUT_SKUS and 'backend' not in dutTestParams['topo']: + qosConfig = dutQosConfig["param"][portSpeedCableLength]["breakout"] + else: + qosConfig = dutQosConfig["param"][portSpeedCableLength] + testPortIps = dutConfig["testPortIps"] + testParams = dict() + testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) + testParams.update({ + "dscp": qosConfig[LosslessVoqProfile]["dscp"], + "ecn": qosConfig[LosslessVoqProfile]["ecn"], + "pg": qosConfig[LosslessVoqProfile]["pg"], + "dst_port_id": qosConfig[LosslessVoqProfile]["dst_port_id"], + "dst_port_ip": testPortIps[qosConfig[LosslessVoqProfile]["dst_port_id"]]['peer_addr'], + "src_port_1_id": qosConfig[LosslessVoqProfile]["src_port_1_id"], + "src_port_1_ip": testPortIps[qosConfig[LosslessVoqProfile]["src_port_1_id"]]['peer_addr'], + "src_port_2_id": qosConfig[LosslessVoqProfile]["src_port_2_id"], + "src_port_2_ip": testPortIps[qosConfig[LosslessVoqProfile]["src_port_2_id"]]['peer_addr'], + "num_of_flows": qosConfig[LosslessVoqProfile]["num_of_flows"], + "pkts_num_leak_out": qosConfig["pkts_num_leak_out"], + "pkts_num_trig_pfc": qosConfig[LosslessVoqProfile]["pkts_num_trig_pfc"] + }) + + if "pkts_num_margin" in qosConfig[LosslessVoqProfile].keys(): + testParams["pkts_num_margin"] = qosConfig[LosslessVoqProfile]["pkts_num_margin"] + + if "packet_size" in qosConfig[LosslessVoqProfile].keys(): + testParams["packet_size"] = qosConfig[LosslessVoqProfile]["packet_size"] + + self.runPtfTest( + ptfhost, testCase="sai_qos_tests.LosslessVoq", testParams=testParams + ) + + def correctPortIds(self, test_port_ids, src_port_ids, dst_port_ids): + ''' + if port id of test_port_ids/dst_port_ids is not existing in test_port_ids + correct it, make sure all src/dst id is valid + e.g. + Given below parameter: + test_port_ids: [0, 2, 4, 6, 8, 10, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 44, 46, 48, 50, 52, 54] + src_port_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9] + dst_port_ids: 10 + and run correctPortIds to get below result: + src_port_ids: [0, 2, 16, 4, 18, 6, 20, 8, 22] + dst_port_ids: 10 + ''' + # cache src port ids, and if its type isn't list, convert it to list + src_port = src_port_ids + src_is_list = True + if not isinstance(src_port_ids, list): + src_port = [src_port_ids] + src_is_list = False + + # cache dst port ids, and if its type isn't list, convert it to list + dst_port = dst_port_ids + dst_is_list = True + if not isinstance(dst_port_ids, list): + dst_port = [dst_port_ids] + dst_is_list = False + + if len(src_port) + len(dst_port) > len(test_port_ids): + logger.info('no enough ports for test') + return (None, None) + + # cache test port ids + ports = [pid for pid in test_port_ids] + + # check if all src port id is exist in test port ids + # if yes, remove consumed id from test port ids + # if no, record index of invaild src port id to invalid_src_idx variable + invalid_src_idx = [] + for idx, pid in enumerate(src_port): + if pid not in ports: + invalid_src_idx.append(idx) + else: + ports.remove(pid) + + # check if all dst port id is exist in test port ids + # if yes, remove consumed id from test port ids + # if no, record index of invaild dst port id to invalid_dst_idx variable + invalid_dst_idx = [] + for idx, pid in enumerate(dst_port): + if pid not in ports: + invalid_dst_idx.append(idx) + else: + ports.remove(pid) + + # pop the minimal test port id, and assign it to src port to replace its invalid port id + for idx in invalid_src_idx: + src_port[idx] = ports.pop(0) + + # pop the minimal test port id, and assign it to dst port to replace its invalid port id + for idx in invalid_dst_idx: + dst_port[idx] = ports.pop(0) + + # if src port is not list, conver it back to int + if not src_is_list: + src_port = src_port[0] + # if dst port is not list, conver it back to int + if not dst_is_list: + dst_port = dst_port[0] + + return (src_port, dst_port) + def testQosSaiHeadroomPoolSize( self, ptfhost, dutTestParams, dutConfig, dutQosConfig, ingressLosslessProfile @@ -234,8 +553,17 @@ def testQosSaiHeadroomPoolSize( if not 'hdrm_pool_size' in qosConfig.keys(): pytest.skip("Headroom pool size is not enabled on this DUT") + if not dutConfig['dualTor']: + qosConfig['hdrm_pool_size']['pgs'] = qosConfig['hdrm_pool_size']['pgs'][:2] + qosConfig['hdrm_pool_size']['dscps'] = qosConfig['hdrm_pool_size']['dscps'][:2] + + qosConfig["hdrm_pool_size"]["src_port_ids"], qosConfig["hdrm_pool_size"]["dst_port_id"] = self.correctPortIds( + dutConfig["testPortIds"], qosConfig["hdrm_pool_size"]["src_port_ids"], qosConfig["hdrm_pool_size"]["dst_port_id"]) + pytest_assert(qosConfig["hdrm_pool_size"]["src_port_ids"] != None and qosConfig["hdrm_pool_size"]["dst_port_id"] != None, "No enough test ports") + testParams = dict() testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) testParams.update({ "testbed_type": dutTestParams["topo"], "dscps": qosConfig["hdrm_pool_size"]["dscps"], @@ -274,6 +602,66 @@ def testQosSaiHeadroomPoolSize( testParams=testParams ) + @pytest.mark.parametrize("sharedResSizeKey", ["shared_res_size_1", "shared_res_size_2"]) + def testQosSaiSharedReservationSize( + self, sharedResSizeKey, ptfhost, dutTestParams, dutConfig, dutQosConfig + ): + """ + Test QoS SAI shared reservation size + Args: + sharedResSizeKey: qos.yml entry lookup key + ptfhost (AnsibleHost): Packet Test Framework (PTF) + dutTestParams (Fixture, dict): DUT host test params + dutConfig (Fixture, dict): Map of DUT config containing dut interfaces, test port IDs, test port IPs, + and test ports + dutQosConfig (Fixture, dict): Map containing DUT host QoS configuration + Returns: + None + Raises: + RunAnsibleModuleFail if ptf test fails + """ + if dutTestParams["basicParams"]["sonic_asic_type"] != "cisco-8000": + pytest.skip("Shared reservation size test is not supported") + + portSpeedCableLength = dutQosConfig["portSpeedCableLength"] + qosConfig = dutQosConfig["param"][portSpeedCableLength] + testPortIps = dutConfig["testPortIps"] + + if not sharedResSizeKey in qosConfig.keys(): + pytest.skip("Shared reservation size parametrization '%s' is not enabled" % sharedResSizeKey) + + testParams = dict() + testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) + testParams.update({ + "testbed_type": dutTestParams["topo"], + "dscps": qosConfig[sharedResSizeKey]["dscps"], + "ecn": qosConfig[sharedResSizeKey]["ecn"], + "pgs": qosConfig[sharedResSizeKey]["pgs"], + "queues": qosConfig[sharedResSizeKey]["queues"], + "src_port_ids": qosConfig[sharedResSizeKey]["src_port_ids"], + "src_port_ips": [testPortIps[port]['peer_addr'] for port in qosConfig[sharedResSizeKey]["src_port_ids"]], + "dst_port_ids": qosConfig[sharedResSizeKey]["dst_port_ids"], + "dst_port_ips": [testPortIps[port]['peer_addr'] for port in qosConfig[sharedResSizeKey]["dst_port_ids"]], + "pkt_counts": qosConfig[sharedResSizeKey]["pkt_counts"], + "shared_limit_bytes": qosConfig[sharedResSizeKey]["shared_limit_bytes"], + "hwsku":dutTestParams['hwsku'] + }) + + if "packet_size" in qosConfig[sharedResSizeKey]: + testParams["packet_size"] = qosConfig[sharedResSizeKey]["packet_size"] + + if "cell_size" in qosConfig[sharedResSizeKey]: + testParams["cell_size"] = qosConfig[sharedResSizeKey]["cell_size"] + + if "pkts_num_margin" in qosConfig[sharedResSizeKey]: + testParams["pkts_num_margin"] = qosConfig[sharedResSizeKey]["pkts_num_margin"] + + self.runPtfTest( + ptfhost, testCase="sai_qos_tests.SharedResSizeTest", + testParams=testParams + ) + def testQosSaiHeadroomPoolWatermark( self, duthosts, rand_one_dut_hostname, ptfhost, dutTestParams, dutConfig, dutQosConfig, ingressLosslessProfile, sharedHeadroomPoolSize, @@ -312,6 +700,7 @@ def testQosSaiHeadroomPoolWatermark( testParams = dict() testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) testParams.update({ "testbed_type": dutTestParams["topo"], "dscps": qosConfig["hdrm_pool_size"]["dscps"], @@ -333,6 +722,10 @@ def testQosSaiHeadroomPoolWatermark( "hwsku":dutTestParams['hwsku'] }) + margin = qosConfig["hdrm_pool_size"].get("margin") + if margin: + testParams["margin"] = margin + if "pkts_num_egr_mem" in qosConfig.keys(): testParams["pkts_num_egr_mem"] = qosConfig["pkts_num_egr_mem"] @@ -366,6 +759,8 @@ def testQosSaiBufferPoolWatermark( RunAnsibleModuleFail if ptf test fails """ disableTest = request.config.getoption("--disable_test") + if dutTestParams["basicParams"]["sonic_asic_type"] == 'cisco-8000': + disableTest = False if disableTest: pytest.skip("Buffer Pool watermark test is disabled") @@ -385,6 +780,7 @@ def testQosSaiBufferPoolWatermark( testParams = dict() testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) testParams.update({ "dscp": qosConfig[bufPool]["dscp"], "ecn": qosConfig[bufPool]["ecn"], @@ -400,6 +796,10 @@ def testQosSaiBufferPoolWatermark( "cell_size": qosConfig[bufPool]["cell_size"], "buf_pool_roid": buf_pool_roid }) + + if "packet_size" in qosConfig[bufPool].keys(): + testParams["packet_size"] = qosConfig[bufPool]["packet_size"] + self.runPtfTest( ptfhost, testCase="sai_qos_tests.BufferPoolWatermarkTest", testParams=testParams @@ -435,6 +835,7 @@ def testQosSaiLossyQueue( testParams = dict() testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) testParams.update({ "dscp": qosConfig["lossy_queue_1"]["dscp"], "ecn": qosConfig["lossy_queue_1"]["ecn"], @@ -468,18 +869,83 @@ def testQosSaiLossyQueue( testParams=testParams ) + @pytest.mark.parametrize("LossyVoq", ["lossy_queue_voq_1", "lossy_queue_voq_2"]) + def testQosSaiLossyQueueVoq( + self, LossyVoq, ptfhost, dutTestParams, dutConfig, dutQosConfig, + ingressLossyProfile, duthost, localhost + ): + """ + Test QoS SAI Lossy queue with non_default voq and default voq + Args: + LossyVoq : qos.yml entry lookup key + ptfhost (AnsibleHost): Packet Test Framework (PTF) + dutTestParams (Fixture, dict): DUT host test params + dutConfig (Fixture, dict): Map of DUT config containing dut interfaces, test port IDs, test port IPs, + and test ports + dutQosConfig (Fixture, dict): Map containing DUT host QoS configuration + ingressLossyProfile (Fxiture): Map of ingress lossy buffer profile attributes + duthost : DUT host params + localhost : local host params + Returns: + None + Raises: + RunAnsibleModuleFail if ptf test fails + """ + if dutTestParams["basicParams"]["sonic_asic_type"] != "cisco-8000": + pytest.skip("Lossy Queue Voq test is not supported") + portSpeedCableLength = dutQosConfig["portSpeedCableLength"] + qosConfig = dutQosConfig["param"][portSpeedCableLength] + testPortIps = dutConfig["testPortIps"] + + if "lossy_queue_voq_2" in LossyVoq: + original_voq_markings = get_markings_dut(duthost) + setup_markings_dut(duthost, localhost, voq_allocation_mode="default") + + try: + testParams = dict() + testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) + testParams.update({ + "dscp": qosConfig[LossyVoq]["dscp"], + "ecn": qosConfig[LossyVoq]["ecn"], + "pg": qosConfig[LossyVoq]["pg"], + "src_port_id": qosConfig[LossyVoq]["src_port_id"], + "src_port_ip": testPortIps[qosConfig[LossyVoq]["src_port_id"]]['peer_addr'], + "dst_port_id": qosConfig[LossyVoq]["dst_port_id"], + "dst_port_ip": testPortIps[qosConfig[LossyVoq]["dst_port_id"]]['peer_addr'], + "pkts_num_leak_out": dutQosConfig["param"][portSpeedCableLength]["pkts_num_leak_out"], + "pkts_num_trig_egr_drp": qosConfig[LossyVoq]["pkts_num_trig_egr_drp"] + }) + + if "packet_size" in qosConfig[LossyVoq].keys(): + testParams["packet_size"] = qosConfig[LossyVoq]["packet_size"] + testParams["cell_size"] = qosConfig[LossyVoq]["cell_size"] + + if "pkts_num_margin" in qosConfig[LossyVoq].keys(): + testParams["pkts_num_margin"] = qosConfig[LossyVoq]["pkts_num_margin"] + + self.runPtfTest( + ptfhost, testCase="sai_qos_tests.LossyQueueVoqTest", + testParams=testParams + ) + + finally: + if "lossy_queue_voq_2" in LossyVoq: + setup_markings_dut(duthost, localhost, **original_voq_markings) + def testQosSaiDscpQueueMapping( - self, ptfhost, dutTestParams, dutConfig + self, duthost, ptfhost, dutTestParams, dutConfig, dut_qos_maps ): """ Test QoS SAI DSCP to queue mapping Args: + duthost (AnsibleHost): The DUT host ptfhost (AnsibleHost): Packet Test Framework (PTF) dutTestParams (Fixture, dict): DUT host test params dutConfig (Fixture, dict): Map of DUT config containing dut interfaces, test port IDs, test port IPs, and test ports - + dut_qos_maps(Fixture): A fixture, return qos maps on DUT host Returns: None @@ -488,15 +954,22 @@ def testQosSaiDscpQueueMapping( """ if "backend" in dutTestParams["topo"]: pytest.skip("Dscp-queue mapping is not supported on {}".format(dutTestParams["topo"])) + + # Skip the regular dscp to pg mapping test. Will run another test case instead. + if separated_dscp_to_tc_map_on_uplink(duthost, dut_qos_maps): + pytest.skip("Skip this test since separated DSCP_TO_TC_MAP is applied") testParams = dict() testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) testParams.update({ "dst_port_id": dutConfig["testPorts"]["dst_port_id"], "dst_port_ip": dutConfig["testPorts"]["dst_port_ip"], "src_port_id": dutConfig["testPorts"]["src_port_id"], "src_port_ip": dutConfig["testPorts"]["src_port_ip"], - "hwsku":dutTestParams['hwsku'] + "hwsku":dutTestParams['hwsku'], + "dual_tor": dutConfig['dualTor'], + "dual_tor_scenario": dutConfig['dualTorScenario'] }) self.runPtfTest( @@ -504,6 +977,62 @@ def testQosSaiDscpQueueMapping( testParams=testParams ) + @pytest.mark.parametrize("direction", ["downstream", "upstream"]) + def testQosSaiSeparatedDscpQueueMapping(self, duthost, ptfhost, dutTestParams, dutConfig, direction, dut_qos_maps): + """ + Test QoS SAI DSCP to queue mapping. + We will have separated DSCP_TO_TC_MAP for uplink/downlink ports on T1 if PCBB enabled. + This test case will generate both upstream and downstream traffic to verify the behavior + + Args: + duthost (AnsibleHost): The DUT host + ptfhost (AnsibleHost): Packet Test Framework (PTF) + dutTestParams (Fixture, dict): DUT host test params + dutConfig (Fixture, dict): Map of DUT config containing dut interfaces, test port IDs, test port IPs, + and test ports + direction (str): upstream/downstream + dut_qos_maps(Fixture): A fixture, return qos maps on DUT host + Returns: + None + + Raises: + RunAnsibleModuleFail if ptf test fails + """ + # Only run this test on T1 testbed when separated DSCP_TO_TC_MAP is defined + if not separated_dscp_to_tc_map_on_uplink(duthost, dut_qos_maps): + pytest.skip("Skip this test since separated DSCP_TO_TC_MAP is not applied") + if "dualtor" in dutTestParams['topo']: + pytest.skip("Skip this test case on dualtor testbed") + + testParams = dict() + testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) + testParams.update({ + "hwsku": dutTestParams['hwsku'], + "dual_tor_scenario": True + }) + if direction == "downstream": + testParams.update({ + "dst_port_id": dutConfig["testPorts"]["downlink_port_ids"][0], + "dst_port_ip": dutConfig["testPorts"]["downlink_port_ips"][0], + "src_port_id": dutConfig["testPorts"]["uplink_port_ids"][0], + "src_port_ip": dutConfig["testPorts"]["uplink_port_ips"][0] + }) + testParams.update({"leaf_downstream": True}) + else: + testParams.update({ + "dst_port_id": dutConfig["testPorts"]["uplink_port_ids"][0], + "dst_port_ip": dutConfig["testPorts"]["uplink_port_ips"][0], + "src_port_id": dutConfig["testPorts"]["downlink_port_ids"][0], + "src_port_ip": dutConfig["testPorts"]["downlink_port_ips"][0] + }) + testParams.update({"leaf_downstream": False}) + + self.runPtfTest( + ptfhost, testCase="sai_qos_tests.DscpMappingPB", + testParams=testParams + ) + def testQosSaiDot1pQueueMapping( self, ptfhost, dutTestParams, dutConfig ): @@ -527,6 +1056,7 @@ def testQosSaiDot1pQueueMapping( testParams = dict() testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) testParams.update({ "dst_port_id": dutConfig["testPorts"]["dst_port_id"], "dst_port_ip": dutConfig["testPorts"]["dst_port_ip"], @@ -561,6 +1091,7 @@ def testQosSaiDot1pPgMapping( testParams = dict() testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) testParams.update({ "dst_port_id": dutConfig["testPorts"]["dst_port_id"], "dst_port_ip": dutConfig["testPorts"]["dst_port_ip"], @@ -574,13 +1105,14 @@ def testQosSaiDot1pPgMapping( ) def testQosSaiDwrr( - self, ptfhost, dutTestParams, dutConfig, dutQosConfig, + self, ptfhost, duthost, dutTestParams, dutConfig, dutQosConfig, ): """ Test QoS SAI DWRR Args: ptfhost (AnsibleHost): Packet Test Framework (PTF) + duthost (AnsibleHost): The DUT for testing dutTestParams (Fixture, dict): DUT host test params dutConfig (Fixture, dict): Map of DUT config containing dut interfaces, test port IDs, test port IPs, and test ports @@ -594,9 +1126,10 @@ def testQosSaiDwrr( """ portSpeedCableLength = dutQosConfig["portSpeedCableLength"] qosConfig = dutQosConfig["param"] - + qos_remap_enable = is_tunnel_qos_remap_enabled(duthost) testParams = dict() testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) testParams.update({ "dst_port_id": dutConfig["testPorts"]["dst_port_id"], "dst_port_ip": dutConfig["testPorts"]["dst_port_ip"], @@ -610,10 +1143,12 @@ def testQosSaiDwrr( "q4_num_of_pkts": qosConfig["wrr"]["q4_num_of_pkts"], "q5_num_of_pkts": qosConfig["wrr"]["q5_num_of_pkts"], "q6_num_of_pkts": qosConfig["wrr"]["q6_num_of_pkts"], + "q7_num_of_pkts": qosConfig["wrr"].get("q7_num_of_pkts", 0), "limit": qosConfig["wrr"]["limit"], "pkts_num_leak_out": qosConfig[portSpeedCableLength]["pkts_num_leak_out"], "hwsku":dutTestParams['hwsku'], - "topo": dutTestParams["topo"] + "topo": dutTestParams["topo"], + "qos_remap_enable": qos_remap_enable }) if "lossy_queue_1" in dutQosConfig["param"][portSpeedCableLength].keys(): @@ -665,6 +1200,7 @@ def testQosSaiPgSharedWatermark( testParams = dict() testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) testParams.update({ "dscp": qosConfig[pgProfile]["dscp"], "ecn": qosConfig[pgProfile]["ecn"], @@ -723,6 +1259,7 @@ def testQosSaiPgHeadroomWatermark( testParams = dict() testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) testParams.update({ "dscp": qosConfig["wm_pg_headroom"]["dscp"], "ecn": qosConfig["wm_pg_headroom"]["ecn"], @@ -750,6 +1287,56 @@ def testQosSaiPgHeadroomWatermark( testParams=testParams ) + def testQosSaiPGDrop( + self, ptfhost, dutTestParams, dutConfig, dutQosConfig + ): + """ + Test QoS SAI PG drop counter + Args: + ptfhost (AnsibleHost): Packet Test Framework (PTF) + dutTestParams (Fixture, dict): DUT host test params + dutConfig (Fixture, dict): Map of DUT config containing dut interfaces, test port IDs, test port IPs, + and test ports + dutQosConfig (Fixture, dict): Map containing DUT host QoS configuration + Returns: + None + Raises: + RunAnsibleModuleFail if ptf test fails + """ + if dutTestParams["basicParams"]["sonic_asic_type"] != "cisco-8000": + pytest.skip("PG drop size test is not supported") + + portSpeedCableLength = dutQosConfig["portSpeedCableLength"] + if "pg_drop" in dutQosConfig["param"][portSpeedCableLength].keys(): + qosConfig = dutQosConfig["param"][portSpeedCableLength] + else: + qosConfig = dutQosConfig["param"] + + testParams = dict() + testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) + pgDropKey = "pg_drop" + testParams.update({ + "dscp": qosConfig[pgDropKey]["dscp"], + "ecn": qosConfig[pgDropKey]["ecn"], + "pg": qosConfig[pgDropKey]["pg"], + "queue": qosConfig[pgDropKey]["queue"], + "dst_port_id": dutConfig["testPorts"]["dst_port_id"], + "dst_port_ip": dutConfig["testPorts"]["dst_port_ip"], + "src_port_id": dutConfig["testPorts"]["src_port_id"], + "src_port_ip": dutConfig["testPorts"]["src_port_ip"], + "src_port_vlan": dutConfig["testPorts"]["src_port_vlan"], + "pkts_num_trig_pfc": qosConfig[pgDropKey]["pkts_num_trig_pfc"], + "pkts_num_trig_ingr_drp": qosConfig[pgDropKey]["pkts_num_trig_ingr_drp"], + "pkts_num_margin": qosConfig[pgDropKey]["pkts_num_margin"], + "iterations": qosConfig[pgDropKey]["iterations"], + "hwsku":dutTestParams['hwsku'] + }) + + self.runPtfTest( + ptfhost, testCase="sai_qos_tests.PGDropTest", testParams=testParams + ) + @pytest.mark.parametrize("queueProfile", ["wm_q_shared_lossless", "wm_q_shared_lossy"]) def testQosSaiQSharedWatermark( self, queueProfile, ptfhost, dutTestParams, dutConfig, dutQosConfig, @@ -790,6 +1377,7 @@ def testQosSaiQSharedWatermark( testParams = dict() testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) testParams.update({ "dscp": qosConfig[queueProfile]["dscp"], "ecn": qosConfig[queueProfile]["ecn"], @@ -821,17 +1409,18 @@ def testQosSaiQSharedWatermark( ) def testQosSaiDscpToPgMapping( - self, request, ptfhost, dutTestParams, dutConfig, + self, duthost, request, ptfhost, dutTestParams, dutConfig, dut_qos_maps ): """ Test QoS SAI DSCP to PG mapping ptf test Args: + duthost (AnsibleHost): The DUT host ptfhost (AnsibleHost): Packet Test Framework (PTF) dutTestParams (Fixture, dict): DUT host test params dutConfig (Fixture, dict): Map of DUT config containing dut interfaces, test port IDs, test port IPs, and test ports - + dut_qos_maps(Fixture): A fixture, return qos maps on DUT host Returns: None @@ -841,12 +1430,16 @@ def testQosSaiDscpToPgMapping( disableTest = request.config.getoption("--disable_test") if disableTest: pytest.skip("DSCP to PG mapping test disabled") + # Skip the regular dscp to pg mapping test. Will run another test case instead. + if separated_dscp_to_tc_map_on_uplink(duthost, dut_qos_maps): + pytest.skip("Skip this test since separated DSCP_TO_TC_MAP is applied") if "backend" in dutTestParams["topo"]: pytest.skip("Dscp-PG mapping is not supported on {}".format(dutTestParams["topo"])) testParams = dict() testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) testParams.update({ "dst_port_id": dutConfig["testPorts"]["dst_port_id"], "dst_port_ip": dutConfig["testPorts"]["dst_port_ip"], @@ -858,8 +1451,62 @@ def testQosSaiDscpToPgMapping( testParams=testParams ) + @pytest.mark.parametrize("direction", ["downstream", "upstream"]) + def testQosSaiSeparatedDscpToPgMapping(self, duthost, request, ptfhost, dutTestParams, dutConfig, direction, dut_qos_maps): + """ + Test QoS SAI DSCP to PG mapping ptf test. + Since we are using different DSCP_TO_TC_MAP on uplink/downlink port, the test case also need to + run separately + + Args: + duthost (AnsibleHost) + ptfhost (AnsibleHost): Packet Test Framework (PTF) + dutTestParams (Fixture, dict): DUT host test params + dutConfig (Fixture, dict): Map of DUT config containing dut interfaces, test port IDs, test port IPs, + and test ports + direction (str): downstream or upstream + dut_qos_maps(Fixture): A fixture, return qos maps on DUT host + Returns: + None + + Raises: + RunAnsibleModuleFail if ptf test fails + """ + if not separated_dscp_to_tc_map_on_uplink(duthost, dut_qos_maps): + pytest.skip("Skip this test since separated DSCP_TO_TC_MAP is not applied") + if "dualtor" in dutTestParams['topo']: + pytest.skip("Skip this test case on dualtor testbed") + + testParams = dict() + testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) + if direction == "downstream": + testParams.update({ + "dst_port_id": dutConfig["testPorts"]["downlink_port_ids"][0], + "dst_port_ip": dutConfig["testPorts"]["downlink_port_ips"][0], + "src_port_id": dutConfig["testPorts"]["uplink_port_ids"][0], + "src_port_ip": dutConfig["testPorts"]["uplink_port_ips"][0] + }) + src_port_name = dutConfig["testPorts"]["uplink_port_names"][0] + else: + testParams.update({ + "dst_port_id": dutConfig["testPorts"]["uplink_port_ids"][0], + "dst_port_ip": dutConfig["testPorts"]["uplink_port_ips"][0], + "src_port_id": dutConfig["testPorts"]["downlink_port_ids"][0], + "src_port_ip": dutConfig["testPorts"]["downlink_port_ips"][0] + }) + src_port_name = dutConfig["testPorts"]["downlink_port_names"][0] + + testParams['dscp_to_pg_map'] = load_dscp_to_pg_map(duthost, src_port_name, dut_qos_maps) + + self.runPtfTest( + ptfhost, testCase="sai_qos_tests.DscpToPgMapping", + testParams=testParams + ) + + def testQosSaiDwrrWeightChange( - self, ptfhost, dutTestParams, dutConfig, dutQosConfig, + self, ptfhost, duthost, dutTestParams, dutConfig, dutQosConfig, updateSchedProfile ): """ @@ -884,9 +1531,10 @@ def testQosSaiDwrrWeightChange( portSpeedCableLength = dutQosConfig["portSpeedCableLength"] qosConfig = dutQosConfig["param"] - + qos_remap_enable = is_tunnel_qos_remap_enabled(duthost) testParams = dict() testParams.update(dutTestParams["basicParams"]) + testParams.update({"test_port_ids": dutConfig["testPortIds"]}) testParams.update({ "ecn": qosConfig["wrr_chg"]["ecn"], "dst_port_id": dutConfig["testPorts"]["dst_port_id"], @@ -904,71 +1552,9 @@ def testQosSaiDwrrWeightChange( "limit": qosConfig["wrr_chg"]["limit"], "pkts_num_leak_out": qosConfig[portSpeedCableLength]["pkts_num_leak_out"], "hwsku":dutTestParams['hwsku'], - "topo": dutTestParams["topo"] + "topo": dutTestParams["topo"], + "qos_remap_enable": qos_remap_enable }) self.runPtfTest( ptfhost, testCase="sai_qos_tests.WRRtest", testParams=testParams ) - - -class TestQosSaiMasic(QosSaiBaseMasic): - - def test_qos_masic_dscp_queue_mapping( - self, duthosts, rand_one_dut_hostname, enum_backend_asic_index, - ptfhost, dutTestParams, get_test_ports - ): - duthost = duthosts[rand_one_dut_hostname] - src_asic = get_test_ports["src_asic"] - - if not duthost.sonichost.is_multi_asic: - pytest.skip("Test applies to only multi ASIC platform") - - if enum_backend_asic_index is None: - pytest.skip("Backend ASIC is None") - - try: - # Bring down port (channel) towards ASICs other than the ASIC - # under test, so that traffic always goes via ASIC under test - self.backend_ip_if_admin_state( - duthost, enum_backend_asic_index, src_asic, "shutdown" - ) - - test_params = dict() - test_params.update(dutTestParams["basicParams"]) - test_params.update(get_test_ports) - logger.debug(test_params) - - # ensure the test destination IP has a path to backend ASIC - pytest_assert( - wait_until( - 30, 1, 0, self.check_v4route_backend_nhop, duthost, - test_params["src_asic"], test_params["dst_port_ip"] - ), - "Route {} doesn't have backend ASIC nexthop on ASIC {}".format( - test_params["dst_port_ip"], test_params["src_asic"] - ) - ) - - duthost.asic_instance( - enum_backend_asic_index - ).create_ssh_tunnel_sai_rpc() - - # find traffic src/dst ports on the ASIC under test - test_params.update( - self.find_asic_traffic_ports(duthost, ptfhost, test_params) - ) - - self.runPtfTest( - ptfhost, testCase="sai_qos_tests.DscpMappingPB", - testParams=test_params - ) - - finally: - # bring up the backed IFs - self.backend_ip_if_admin_state( - duthost, enum_backend_asic_index, src_asic, "startup" - ) - - duthost.asic_instance( - enum_backend_asic_index - ).remove_ssh_tunnel_sai_rpc() diff --git a/tests/qos/test_tunnel_qos_remap.py b/tests/qos/test_tunnel_qos_remap.py new file mode 100644 index 00000000000..60c8389ad85 --- /dev/null +++ b/tests/qos/test_tunnel_qos_remap.py @@ -0,0 +1,530 @@ +import logging +import pytest +import sys +import time +from ptf.mask import Mask +import ptf.packet as scapy +from tests.common.fixtures.ptfhost_utils import copy_ptftests_directory # lgtm[py/unused-import] +from tests.common.fixtures.ptfhost_utils import copy_saitests_directory # lgtm[py/unused-import] +from tests.common.fixtures.ptfhost_utils import change_mac_addresses # lgtm[py/unused-import] +from tests.common.fixtures.ptfhost_utils import run_icmp_responder # lgtm[py/unused-import] +from tests.common.fixtures.ptfhost_utils import run_garp_service # lgtm[py/unused-import] +from tests.common.fixtures.ptfhost_utils import set_ptf_port_mapping_mode # lgtm[py/unused-import] +from tests.common.fixtures.ptfhost_utils import ptf_portmap_file_module # lgtm[py/unused-import] +from tests.common.fixtures.duthost_utils import dut_qos_maps # lgtm[py/unused-import] +from tests.common.fixtures.duthost_utils import separated_dscp_to_tc_map_on_uplink +from tests.common.helpers.assertions import pytest_require, pytest_assert +from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_lower_tor, toggle_all_simulator_ports_to_rand_selected_tor, toggle_all_simulator_ports_to_rand_unselected_tor # lgtm[py/unused-import] +from tests.common.dualtor.dual_tor_utils import upper_tor_host, lower_tor_host, dualtor_info, get_t1_active_ptf_ports, mux_cable_server_ip, is_tunnel_qos_remap_enabled +from tunnel_qos_remap_base import build_testing_packet, check_queue_counter, dut_config, qos_config, load_tunnel_qos_map, run_ptf_test, toggle_mux_to_host, setup_module, update_docker_services, swap_syncd, counter_poll_config # lgtm[py/unused-import] +from tunnel_qos_remap_base import leaf_fanout_peer_info, start_pfc_storm, stop_pfc_storm, get_queue_counter +from ptf import testutils +from ptf.testutils import simple_tcp_packet +from tests.common.fixtures.conn_graph_facts import conn_graph_facts, fanout_graph_facts +from tests.common.helpers.pfc_storm import PFCStorm + + +pytestmark = [ + pytest.mark.topology('t0') +] + +logger = logging.getLogger(__name__) + +SERVER_IP = "192.168.0.2" +DUMMY_IP = "1.1.1.1" +DUMMY_MAC = "aa:aa:aa:aa:aa:aa" +VLAN_MAC = "00:aa:bb:cc:dd:ee" + +PFC_PKT_COUNT = 10000000 # Cost 32 seconds +PFC_PAUSE_TEST_RETRY_MAX = 5 + + +@pytest.fixture(scope='module', autouse=True) +def check_running_condition(tbinfo, duthost): + """ + The test can only be running on tunnel_qos_remap enabled dualtor testbed + """ + # Check dualtor topo + pytest_require("dualtor" in tbinfo["topo"]["name"], "Only run on dualtor testbed.", True) + + # Check tunnel_qos_remap is enabled + pytest_require(is_tunnel_qos_remap_enabled(duthost), "Only run when tunnel_qos_remap is enabled", True) + + +def _last_port_in_last_lag(lags): + """ + A helper function to get the last LAG member in the last portchannel + """ + last_lag = sorted(list(lags.keys()))[-1] + return lags[last_lag][-1] + + +def test_encap_dscp_rewrite(ptfhost, upper_tor_host, lower_tor_host, toggle_all_simulator_ports_to_lower_tor, tbinfo, ptfadapter): + """ + The test is to verify the dscp rewriting of encapped packets. + Test steps + 1. Toggle mux to lower tor, so all mux ports are standby on upper_tor + 2. Generate packets with certain DSCP value + 3. Send the generated packets via portchannels + 4. Verify the packets are encapped with expected DSCP value + """ + DSCP_COMBINATIONS = [ + # DSCP in generated packets, expected DSCP in encapped packets + (8, 8), + (0, 0), + (33, 33), + (3, 2), + (4, 6), + (46, 46), + (48, 48) + ] + dualtor_meta = dualtor_info(ptfhost, upper_tor_host, lower_tor_host, tbinfo) + active_tor_mac = lower_tor_host.facts['router_mac'] + + t1_ports = get_t1_active_ptf_ports(upper_tor_host, tbinfo) + # Always select the last port in the last LAG as src_port + src_port = _last_port_in_last_lag(t1_ports) + dst_ports = [] + for ports in t1_ports.values(): + dst_ports.extend(ports) + + for dscp_combination in DSCP_COMBINATIONS: + pkt, expected_pkt = build_testing_packet(src_ip=DUMMY_IP, + dst_ip=SERVER_IP, + active_tor_mac=active_tor_mac, + standby_tor_mac=dualtor_meta['standby_tor_mac'], + active_tor_ip=dualtor_meta['active_tor_ip'], + standby_tor_ip=dualtor_meta['standby_tor_ip'], + inner_dscp=dscp_combination[0], + outer_dscp=dscp_combination[1], + ecn=1) + ptfadapter.dataplane.flush() + # Send original packet + testutils.send(ptfadapter, src_port, pkt) + # Verify encaped packet + testutils.verify_packet_any_port(ptfadapter, expected_pkt, dst_ports) + + +def test_bounced_back_traffic_in_expected_queue(ptfhost, upper_tor_host, lower_tor_host, toggle_all_simulator_ports_to_lower_tor, tbinfo, ptfadapter): + """ + The test case is to verify the encapped packet is mapped to the correct queue + Test steps: + 1. Toggle mux to lower tor, so all mux ports are standby on upper_tor + 2. Generate packets with certain DSCP value + 3. Send the generated packets via portchannels + 4. Verify the packets are outgoing from expected queue + """ + TEST_DATA = [ + #DSCP QUEUE + (8, 0), + (0, 1), + (33, 1), + (3, 2), + (4, 6), + (46, 5), + (48, 7) + ] + dualtor_meta = dualtor_info(ptfhost, upper_tor_host, lower_tor_host, tbinfo) + active_tor_mac = lower_tor_host.facts['router_mac'] + t1_ports = get_t1_active_ptf_ports(upper_tor_host, tbinfo) + # Always select the last port in the last LAG as src_port + src_port = _last_port_in_last_lag(t1_ports) + mg_facts = upper_tor_host.get_extended_minigraph_facts(tbinfo) + portchannel_info = mg_facts['minigraph_portchannels'] + tor_pc_intfs = list() + for pc in portchannel_info.values(): + for member in pc['members']: + tor_pc_intfs.append(member) + PKT_NUM = 100 + + for dscp, queue in TEST_DATA: + pkt, _ = build_testing_packet(src_ip=DUMMY_IP, + dst_ip=SERVER_IP, + active_tor_mac=active_tor_mac, + standby_tor_mac=dualtor_meta['standby_tor_mac'], + active_tor_ip=dualtor_meta['active_tor_ip'], + standby_tor_ip=dualtor_meta['standby_tor_ip'], + inner_dscp=dscp, + outer_dscp=0, + ecn=1) + # Clear queuecounters before sending traffic + upper_tor_host.shell('sonic-clear queuecounters') + # Send original packet + testutils.send_packet(ptfadapter, src_port, pkt, PKT_NUM) + # Verify queue counters in all possible interfaces + time.sleep(15) + + pytest_assert(check_queue_counter(upper_tor_host, tor_pc_intfs, queue, PKT_NUM), + "The queue counter for DSCP {} Queue {} is not as expected".format(dscp, queue)) + + +def test_tunnel_decap_dscp_to_queue_mapping(ptfhost, rand_selected_dut, rand_unselected_dut, toggle_all_simulator_ports_to_rand_selected_tor, tbinfo, ptfadapter): + """ + The test case is to verify the decapped packet on active ToR are egressed to server from expected queue. + Test steps: + 1. Toggle mux to the randomly selected ToR + 2. Generate IPinIP packets with different DSCP combination (inner and outer) + 3. Send the generated packets via portchannels + 4. Verify the packets are decapped, and outgoing from the expected queue + """ + dualtor_meta = dualtor_info(ptfhost, rand_unselected_dut, rand_selected_dut, tbinfo) + t1_ports = get_t1_active_ptf_ports(rand_selected_dut, tbinfo) + # Always select the last port in the last LAG as src_port + src_port = _last_port_in_last_lag(t1_ports) + active_tor_mac = rand_selected_dut.facts['router_mac'] + # Set queue counter polling interval to 1 second to speed up the test + counter_poll_config(rand_selected_dut, 'queue', 1000) + tunnel_qos_map = load_tunnel_qos_map() + PKT_NUM = 100 + try: + # Walk through all DSCP values + for inner_dscp in range(0, 64): + outer_dscp = tunnel_qos_map['inner_dscp_to_outer_dscp_map'][inner_dscp] + _, exp_packet = build_testing_packet(src_ip=DUMMY_IP, + dst_ip=dualtor_meta['target_server_ip'], + active_tor_mac=active_tor_mac, + standby_tor_mac=dualtor_meta['standby_tor_mac'], + active_tor_ip=dualtor_meta['active_tor_ip'], + standby_tor_ip=dualtor_meta['standby_tor_ip'], + inner_dscp=inner_dscp, + outer_dscp=outer_dscp, + ecn=1) + tunnel_packet = exp_packet.exp_pkt + # Clear queuecounters before sending traffic + rand_selected_dut.shell('sonic-clear queuecounters') + time.sleep(1) + # Send tunnel packets + testutils.send(ptfadapter, src_port, tunnel_packet, PKT_NUM) + # Wait 2 seconds for queue counter to be refreshed + time.sleep(2) + # Verify counter at expected queue at the server facing port + pytest_assert(check_queue_counter(rand_selected_dut, [dualtor_meta['selected_port']], tunnel_qos_map['inner_dscp_to_queue_map'][inner_dscp], PKT_NUM), + "The queue counter for DSCP {} Queue {} is not as expected".format(inner_dscp, tunnel_qos_map['inner_dscp_to_queue_map'][inner_dscp])) + + finally: + counter_poll_config(rand_selected_dut, 'queue', 10000) + + +def test_separated_qos_map_on_tor(ptfhost, rand_selected_dut, rand_unselected_dut, toggle_all_simulator_ports_to_rand_selected_tor, tbinfo, ptfadapter, dut_qos_maps): + """ + The test case is to verify separated DSCP_TO_TC_MAP/TC_TO_QUEUE_MAP on uplink and downlink ports of dualtor + Test steps + 1. Build IPinIP encapsulated packet with dummy src ip and dst ip (must not be the loopback address of dualtor) + 2. Ingress the packet from uplink port, verify the packets egressed from expected queue + 3. Build regular packet with dst_ip = dummy IP (routed by default route) + 4. Ingress the packet from downlink port, verify the packets egressed from expected queue + """ + pytest_require(separated_dscp_to_tc_map_on_uplink(rand_selected_dut, dut_qos_maps), + "Skip test because separated QoS map is not applied") + dualtor_meta = dualtor_info(ptfhost, rand_unselected_dut, rand_selected_dut, tbinfo) + t1_ports = get_t1_active_ptf_ports(rand_selected_dut, tbinfo) + mg_facts = rand_selected_dut.get_extended_minigraph_facts(tbinfo) + portchannel_info = mg_facts['minigraph_portchannels'] + tor_pc_intfs = list() + for pc in portchannel_info.values(): + for member in pc['members']: + tor_pc_intfs.append(member) + active_tor_mac = rand_selected_dut.facts['router_mac'] + # Set queue counter polling interval to 1 second to speed up the test + counter_poll_config(rand_selected_dut, 'queue', 1000) + PKT_NUM = 100 + # DSCP 2/6 are mapped to lossless queue 2/6 on uplink ports + UP_LINK_TEST_DATA = { + # Inner DSCP, Outer DSCP, Expected queue + (3, 2, 2), + (4, 6, 6) + } + # DSCP 2/6 are mapped to lossy queue 1 on downlink ports + DOWN_LINK_TEST_DATA = { + # DSCP, Expected queue + (2, 1), + (6, 1) + } + try: + # uplink port test + # Always select the last port in the last LAG as src_port + src_port = _last_port_in_last_lag(t1_ports) + for inner_dscp, outer_dscp, queue in UP_LINK_TEST_DATA: + # We use the IPinIP packet only + _, exp_packet = build_testing_packet(src_ip=DUMMY_IP, + dst_ip=dualtor_meta['target_server_ip'], + active_tor_mac=active_tor_mac, + standby_tor_mac=dualtor_meta['standby_tor_mac'], + active_tor_ip='20.2.0.22', # The active/standby tor ip must be fake value so that the pack is not decaped + standby_tor_ip='20.2.0.21', + inner_dscp=inner_dscp, + outer_dscp=outer_dscp) + ipinip_packet = exp_packet.exp_pkt + # Clear queuecounters before sending traffic + rand_selected_dut.shell('sonic-clear queuecounters') + time.sleep(1) + # Send tunnel packets + testutils.send(ptfadapter, src_port, ipinip_packet, PKT_NUM) + # Wait 2 seconds for queue counter to be refreshed + time.sleep(2) + # Since the packet will not be decaped by active ToR, we expected to see the packet egress from any uplink ports + pytest_assert(check_queue_counter(rand_selected_dut, tor_pc_intfs, queue, PKT_NUM), + "Uplink test: the queue counter for DSCP {} Queue {} is not as expected".format(outer_dscp, queue)) + + # downlink port test + src_port = dualtor_meta['target_server_port'] + for dscp, queue in DOWN_LINK_TEST_DATA: + pkt = simple_tcp_packet(eth_dst=VLAN_MAC, + ip_src=dualtor_meta['target_server_ip'], + ip_dst=DUMMY_IP, # A dummy IP that will hit default route, + ip_dscp=dscp) + + # Clear queuecounters before sending traffic + rand_selected_dut.shell('sonic-clear queuecounters') + time.sleep(1) + # Send tunnel packets + testutils.send(ptfadapter, src_port, pkt, PKT_NUM) + # Wait 2 seconds for queue counter to be refreshed + time.sleep(2) + # We expected to see the packet egress from any uplink ports since the dst IP will hit the default route + pytest_assert(check_queue_counter(rand_selected_dut, tor_pc_intfs, queue, PKT_NUM), + "Downlink test: the queue counter for DSCP {} Queue {} is not as expected".format(dscp, queue)) + finally: + counter_poll_config(rand_selected_dut, 'queue', 10000) + + +def pfc_pause_test(storm_handler, peer_info, prio, ptfadapter, dut, port, queue, pkt, src_port, exp_pkt, dst_ports): + try: + # Start PFC storm from leaf fanout switch + start_pfc_storm(storm_handler, peer_info, prio) + ptfadapter.dataplane.flush() + # Record the queue counter before sending test packet + base_queue_count = get_queue_counter(dut, port, queue, True) + # Send testing packet again + testutils.send_packet(ptfadapter, src_port, pkt, 1) + # The packet should be paused + testutils.verify_no_packet_any(ptfadapter, exp_pkt, dst_ports) + # Check the queue counter didn't increase + queue_count = get_queue_counter(dut, port, queue, False) + assert base_queue_count == queue_count + return True + except AssertionError: + logger.info('assert {}'.format(sys.exc_info())) + return False + except Exception: + logger.info('exception {}'.format(sys.exc_info())) + return False + finally: + stop_pfc_storm(storm_handler) + + +def test_pfc_pause_extra_lossless_standby(ptfhost, fanouthosts, rand_selected_dut, rand_unselected_dut, toggle_all_simulator_ports_to_rand_unselected_tor, tbinfo, ptfadapter, conn_graph_facts, fanout_graph_facts): + """ + The test case is to verify PFC pause frame can pause extra lossless queues in dualtor deployment. + Test steps: + 1. Toggle mux ports to rand_unselected_dut, so all mux ports are standby on the selected ToR + 2. Generate packets with different DSCPs, ingress to standby ToR. The traffic will be bounced back to T1 + 3. Generate PFC pause on fanout switch (T1 ports) + 4. Verify lossless traffic are paused + """ + TEST_DATA = { + # Inner DSCP, Outer DSCP, Priority + (3, 2, 2, 2), + (4, 6, 6, 6) + } + dualtor_meta = dualtor_info(ptfhost, rand_selected_dut, rand_unselected_dut, tbinfo) + t1_ports = get_t1_active_ptf_ports(rand_selected_dut, tbinfo) + # Always select the last port in the last LAG as src_port + src_port = _last_port_in_last_lag(t1_ports) + # The encapsulated packets can egress from any uplink port + dst_ports = [] + for ports in t1_ports.values(): + dst_ports.extend(ports) + active_tor_mac = rand_unselected_dut.facts['router_mac'] + mg_facts = rand_selected_dut.get_extended_minigraph_facts(tbinfo) + ptfadapter.dataplane.flush() + for inner_dscp, outer_dscp, prio, queue in TEST_DATA: + pkt, exp_pkt = build_testing_packet(src_ip=DUMMY_IP, + dst_ip=SERVER_IP, + active_tor_mac=active_tor_mac, + standby_tor_mac=dualtor_meta['standby_tor_mac'], + active_tor_ip=dualtor_meta['active_tor_ip'], + standby_tor_ip=dualtor_meta['standby_tor_ip'], + inner_dscp=inner_dscp, + outer_dscp=outer_dscp, + ecn=1) + # Ingress packet from uplink port + testutils.send(ptfadapter, src_port, pkt, 1) + # Get the actual egress port + result = testutils.verify_packet_any_port(ptfadapter, exp_pkt, dst_ports) + actual_port = dst_ports[result[0]] + # Get the port name from mgfacts + for port_name, idx in mg_facts['minigraph_ptf_indices'].items(): + if idx == actual_port: + actual_port_name = port_name + break + pytest_assert(actual_port_name) + peer_info = leaf_fanout_peer_info(rand_selected_dut, conn_graph_facts, mg_facts, actual_port) + storm_handler = PFCStorm(rand_selected_dut, fanout_graph_facts, fanouthosts, + pfc_queue_idx=prio, + pfc_frames_number=PFC_PKT_COUNT, + peer_info=peer_info) + + retry = 0 + while retry < PFC_PAUSE_TEST_RETRY_MAX: + try: + if pfc_pause_test(storm_handler, peer_info, prio, ptfadapter, rand_selected_dut, actual_port_name, + queue, pkt, src_port, exp_pkt, dst_ports): + break + except AssertionError: + retry += 1 + if retry == PFC_PAUSE_TEST_RETRY_MAX: + pytest_assert(False, "The queue {} for port {} counter increased unexpectedly".format( + queue, actual_port_name)) + except Exception: + retry += 1 + if retry == PFC_PAUSE_TEST_RETRY_MAX: + pytest_assert(False, "The queue {} for port {} counter increased unexpectedly".format( + queue, actual_port_name)) + time.sleep(5) + + +def test_pfc_pause_extra_lossless_active(ptfhost, fanouthosts, rand_selected_dut, rand_unselected_dut, toggle_all_simulator_ports_to_rand_selected_tor, tbinfo, ptfadapter, conn_graph_facts, fanout_graph_facts): + """ + The test case is to verify PFC pause frame can pause extra lossless queues in dualtor deployment. + Test steps: + 1. Toggle mux ports to rand_selected_dut, so all mux ports are standby on the unselected ToR + 2. Generate IPinIP packets with different DSCP combinations, ingress to active ToR. + 3. Generate PFC pause on fanout switch (Server facing ports) + 4. Verify lossless traffic are paused + """ + TEST_DATA = { + # Inner DSCP, Outer DSCP, Priority, Queue + (3, 2, 3, 3), + (4, 6, 4, 4) + } + dualtor_meta = dualtor_info(ptfhost, rand_unselected_dut, rand_selected_dut, tbinfo) + t1_ports = get_t1_active_ptf_ports(rand_selected_dut, tbinfo) + # Always select the last port in the last LAG as src_port + src_port = _last_port_in_last_lag(t1_ports) + active_tor_mac = rand_selected_dut.facts['router_mac'] + mg_facts = rand_unselected_dut.get_extended_minigraph_facts(tbinfo) + ptfadapter.dataplane.flush() + for inner_dscp, outer_dscp, prio, queue in TEST_DATA: + pkt, tunnel_pkt = build_testing_packet(src_ip=DUMMY_IP, + dst_ip=dualtor_meta['target_server_ip'], + active_tor_mac=active_tor_mac, + standby_tor_mac=dualtor_meta['standby_tor_mac'], + active_tor_ip=dualtor_meta['active_tor_ip'], + standby_tor_ip=dualtor_meta['standby_tor_ip'], + inner_dscp=inner_dscp, + outer_dscp=outer_dscp, + ecn=1) + # Ingress packet from uplink port + testutils.send(ptfadapter, src_port, tunnel_pkt.exp_pkt, 1) + pkt.ttl -= 2 # TTL is decreased by 1 at tunnel forward and decap, + exp_pkt = Mask(pkt) + exp_pkt.set_do_not_care_scapy(scapy.Ether, "dst") + exp_pkt.set_do_not_care_scapy(scapy.Ether, "src") + exp_pkt.set_do_not_care_scapy(scapy.IP, "chksum") + # Verify packet is decapsulated and egress to server + testutils.verify_packet(ptfadapter, exp_pkt, dualtor_meta['target_server_port']) + peer_info = leaf_fanout_peer_info(rand_selected_dut, conn_graph_facts, mg_facts, dualtor_meta['target_server_port']) + storm_handler = PFCStorm(rand_selected_dut, fanout_graph_facts, fanouthosts, + pfc_queue_idx=prio, + pfc_frames_number=PFC_PKT_COUNT, + peer_info=peer_info) + + retry = 0 + while retry < PFC_PAUSE_TEST_RETRY_MAX: + try: + if pfc_pause_test(storm_handler, peer_info, prio, ptfadapter, rand_selected_dut, + dualtor_meta['selected_port'], queue, tunnel_pkt.exp_pkt, src_port, exp_pkt, + dualtor_meta['target_server_port']): + break + except AssertionError: + retry += 1 + if retry == PFC_PAUSE_TEST_RETRY_MAX: + pytest_assert(False, "The queue {} for port {} counter increased unexpectedly".format( + queue, dualtor_meta['selected_port'])) + except Exception: + retry += 1 + if retry == PFC_PAUSE_TEST_RETRY_MAX: + pytest_assert(False, "The queue {} for port {} counter increased unexpectedly".format( + queue, dualtor_meta['selected_port'])) + time.sleep(5) + + +@pytest.mark.disable_loganalyzer +def test_tunnel_decap_dscp_to_pg_mapping(rand_selected_dut, ptfhost, dut_config, setup_module): + """ + Test steps: + 1. Toggle all ports to active on randomly selected ToR + 2. Populate ARP table by GARP service + 3. Disable Tx on target port + 4. Send encapsulated packets from T1 to Active ToR + 5. Verify the watermark increased as expected + """ + toggle_mux_to_host(rand_selected_dut) + asic = rand_selected_dut.get_asic_name() + # TODO: Get the cell size for other ASIC + if asic == 'th2': + cell_size = 208 + else: + cell_size = 256 + + tunnel_qos_map = load_tunnel_qos_map() + test_params = dict() + test_params.update({ + "src_port_id": dut_config["lag_port_ptf_id"], + "dst_port_id": dut_config["server_port_ptf_id"], + "dst_port_ip": dut_config["server_ip"], + "active_tor_mac": dut_config["selected_tor_mac"], + "active_tor_ip": dut_config["selected_tor_loopback"], + "standby_tor_mac": dut_config["unselected_tor_mac"], + "standby_tor_ip": dut_config["unselected_tor_loopback"], + "server": dut_config["selected_tor_mgmt"], + "inner_dscp_to_pg_map": tunnel_qos_map["inner_dscp_to_pg_map"], + "port_map_file": dut_config["port_map_file"], + "sonic_asic_type": dut_config["asic_type"], + "cell_size": cell_size + }) + + run_ptf_test( + ptfhost, + test_case="sai_qos_tests.TunnelDscpToPgMapping", + test_params=test_params + ) + + +@pytest.mark.disable_loganalyzer +@pytest.mark.parametrize("xoff_profile", ["pcbb_xoff_1", "pcbb_xoff_2", "pcbb_xoff_3", "pcbb_xoff_4"]) +def test_xoff_for_pcbb(rand_selected_dut, ptfhost, dut_config, qos_config, xoff_profile, setup_module): + """ + The test is to verify xoff threshold for PCBB (Priority Control for Bounced Back traffic) + Test steps + 1. Toggle all ports to active on randomly selected ports + 2. Populate ARP table by GARP service + 3. Disable Tx on egress port + 4. Verify bounced back traffic (tunnel traffic, IPinIP) can trigger PFC at expected queue + 5. Verify regular traffic can trigger PFC at expected queue + """ + toggle_mux_to_host(rand_selected_dut) + # Delay 5 seconds between each test run + time.sleep(5) + test_params = dict() + test_params.update({ + "src_port_id": dut_config["lag_port_ptf_id"], + "dst_port_id": dut_config["server_port_ptf_id"], + "dst_port_ip": dut_config["server_ip"], + "active_tor_mac": dut_config["selected_tor_mac"], + "active_tor_ip": dut_config["selected_tor_loopback"], + "standby_tor_mac": dut_config["unselected_tor_mac"], + "standby_tor_ip": dut_config["unselected_tor_loopback"], + "server": dut_config["selected_tor_mgmt"], + "port_map_file": dut_config["port_map_file"], + "sonic_asic_type": dut_config["asic_type"], + }) + # Update qos config into test_params + test_params.update(qos_config[xoff_profile]) + # Run test on ptfhost + run_ptf_test( + ptfhost, + test_case="sai_qos_tests.PCBBPFCTest", + test_params=test_params + ) diff --git a/tests/qos/tunnel_qos_remap_base.py b/tests/qos/tunnel_qos_remap_base.py new file mode 100644 index 00000000000..d17adeb9448 --- /dev/null +++ b/tests/qos/tunnel_qos_remap_base.py @@ -0,0 +1,411 @@ + +import copy +import ipaddress +import pytest +import logging +import json +import yaml +import time +import ptf.packet as scapy +from ptf.mask import Mask +from ptf.testutils import simple_tcp_packet, simple_ipv4ip_packet +from tests.common.dualtor.dual_tor_utils import mux_cable_server_ip +from tests.common.helpers.assertions import pytest_assert +from tests.common.system_utils import docker +from tests.common.dualtor.mux_simulator_control import mux_server_url, toggle_all_simulator_ports +from tests.common.fixtures.ptfhost_utils import ptf_portmap_file_module # lgtm[py/unused-import] + +logger = logging.getLogger(__name__) + +def build_testing_packet(src_ip, dst_ip, active_tor_mac, standby_tor_mac, active_tor_ip, standby_tor_ip, inner_dscp, outer_dscp, ecn=1): + pkt = simple_tcp_packet( + eth_dst=standby_tor_mac, + ip_src=src_ip, + ip_dst=dst_ip, + ip_dscp=inner_dscp, + ip_ecn=ecn, + ip_ttl=64 + ) + # The ttl of inner_frame is decreased by 1 + pkt.ttl -= 1 + ipinip_packet = simple_ipv4ip_packet( + eth_dst=active_tor_mac, + eth_src=standby_tor_mac, + ip_src=standby_tor_ip, + ip_dst=active_tor_ip, + ip_dscp=outer_dscp, + ip_ecn=ecn, + inner_frame=pkt[IP] + ) + pkt.ttl += 1 + exp_tunnel_pkt = Mask(ipinip_packet) + exp_tunnel_pkt.set_do_not_care_scapy(scapy.Ether, "dst") + exp_tunnel_pkt.set_do_not_care_scapy(scapy.Ether, "src") + exp_tunnel_pkt.set_do_not_care_scapy(scapy.IP, "id") # since src and dst changed, ID would change too + exp_tunnel_pkt.set_do_not_care_scapy(scapy.IP, "ttl") # ttl in outer packet is kept default (64) + exp_tunnel_pkt.set_do_not_care_scapy(scapy.IP, "chksum") # checksum would differ as the IP header is not the same + + return pkt, exp_tunnel_pkt + + +def get_queue_counter(duthost, port, queue, clear_before_read=False): + """ + Return the counter for given queue in given port + """ + if clear_before_read: + duthost.shell("sonic-clear queuecounters") + # Wait a default interval (10 seconds) + time.sleep(10) + cmd = "show queue counters {}".format(port) + output = duthost.shell(cmd)['stdout_lines'] + """ + Port TxQ Counter/pkts Counter/bytes Drop/pkts Drop/bytes + --------- ----- -------------- --------------- ----------- ------------ + Ethernet4 UC0 0 0 0 0 + """ + txq = "UC{}".format(queue) + for line in output: + fields = line.split() + if fields[1] == txq: + return int(fields[2]) + + return 0 + + +def check_queue_counter(duthost, intfs, queue, counter): + output = duthost.shell('show queue counters')['stdout_lines'] + for line in output: + fields = line.split() + if len(fields) == 6 and fields[0] in intfs and fields[1] == 'UC{}'.format(queue): + if int(fields[2]) >= counter: + return True + + return False + + +def counter_poll_config(duthost, type, interval_ms): + """ + A helper function to config the interval of counterpoll + """ + cmd = 'counterpoll {} interval {}'.format(type, interval_ms) + duthost.shell(cmd) + # Sleep for 10 seconds (default interval) for the new interval to be applied + if interval_ms < 10000: + time.sleep(10) + + +def load_tunnel_qos_map(): + """ + Read DSCP_TO_TC_MAP/TC_TO_PRIORITY_GROUP_MAP/TC_TO_DSCP_MAP/TC_TO_QUEUE_MAP from file + return a dict + """ + TUNNEL_QOS_MAP_FILENAME = r"qos/files/tunnel_qos_map.json" + TUNNEL_MAP_NAME = "AZURE_TUNNEL" + MAP_NAME = "AZURE" + ret = {} + with open(TUNNEL_QOS_MAP_FILENAME, "r") as f: + maps = json.load(f) + # inner_dscp_to_pg map, a map for mapping dscp to priority group at decap side + ret['inner_dscp_to_pg_map'] = {} + for k, v in maps['DSCP_TO_TC_MAP'][TUNNEL_MAP_NAME].items(): + ret['inner_dscp_to_pg_map'][int(k)] = int(maps['TC_TO_PRIORITY_GROUP_MAP'][TUNNEL_MAP_NAME][v]) + # inner_dscp_to_outer_dscp_map, a map for rewriting DSCP in the encapsulated packets + ret['inner_dscp_to_outer_dscp_map'] = {} + for k, v in maps['DSCP_TO_TC_MAP'][MAP_NAME].items(): + ret['inner_dscp_to_outer_dscp_map'][int(k)] = int(maps['TC_TO_DSCP_MAP'][TUNNEL_MAP_NAME][v]) + # inner_dscp_to_queue_map, a map for mapping the tunnel traffic to egress queue at decap side + ret['inner_dscp_to_queue_map'] = {} + for k, v in maps['DSCP_TO_TC_MAP'][TUNNEL_MAP_NAME].items(): + ret['inner_dscp_to_queue_map'][int(k)] = int(maps['TC_TO_QUEUE_MAP'][MAP_NAME][v]) + + return ret + + +def get_iface_ip(mg_facts, ifacename): + for loopback in mg_facts['minigraph_lo_interfaces']: + if loopback['name'] == ifacename and ipaddress.ip_address(loopback['addr']).version == 4: + return loopback['addr'] + return None + + +@pytest.fixture(scope='module') +def dut_config(rand_selected_dut, rand_unselected_dut, tbinfo, ptf_portmap_file_module): + ''' + Generate a dict including test required params + ''' + duthost = rand_selected_dut + mg_facts = duthost.get_extended_minigraph_facts(tbinfo) + + asic_type = duthost.facts["asic_type"] + # Always use the first portchannel member + lag_port_name = list(mg_facts['minigraph_portchannels'].values())[0]['members'][0] + lag_port_ptf_id = mg_facts['minigraph_ptf_indices'][lag_port_name] + + muxcable_info = mux_cable_server_ip(duthost) + server_port_name = list(muxcable_info.keys())[0] + server_ip = muxcable_info[server_port_name]['server_ipv4'].split('/')[0] + server_port_ptf_id = mg_facts['minigraph_ptf_indices'][server_port_name] + server_port_slice = mg_facts['minigraph_port_indices'][server_port_name] + + selected_tor_mgmt = mg_facts['minigraph_mgmt_interface']['addr'] + selected_tor_mac = rand_selected_dut.facts['router_mac'] + selected_tor_loopback = get_iface_ip(mg_facts, 'Loopback0') + + unselected_dut_mg_facts = rand_unselected_dut.get_extended_minigraph_facts(tbinfo) + unselected_tor_mgmt = unselected_dut_mg_facts['minigraph_mgmt_interface']['addr'] + unselected_tor_mac = rand_unselected_dut.facts['router_mac'] + unselected_tor_loopback = get_iface_ip(unselected_dut_mg_facts, 'Loopback0') + + return { + "asic_type": asic_type, + "lag_port_name": lag_port_name, + "lag_port_ptf_id": lag_port_ptf_id, + "server_port_name": server_port_name, + "server_ip": server_ip, + "server_port_ptf_id": server_port_ptf_id, + "server_port_slice": server_port_slice, + "selected_tor_mgmt": selected_tor_mgmt, + "selected_tor_mac": selected_tor_mac, + "selected_tor_loopback": selected_tor_loopback, + "unselected_tor_mgmt": unselected_tor_mgmt, + "unselected_tor_mac": unselected_tor_mac, + "unselected_tor_loopback": unselected_tor_loopback, + "port_map_file": ptf_portmap_file_module + } + + +def _lossless_profile_name(dut, port_name, pgs='2-4'): + """ + Read lossless PG name for given port + """ + cmd = "sonic-db-cli APPL_DB hget \'BUFFER_PG_TABLE:{}:{}\' \'profile\'".format(port_name, pgs) + profile_name = dut.shell(cmd)['stdout'] + pytest_assert(profile_name != "") + # The output can be pg_lossless_100000_300m_profile or [BUFFER_PROFILE_TABLE:pg_lossless_100000_300m_profile] + profile_name = profile_name.split(':')[-1].rstrip(']') + return profile_name + + +@pytest.fixture(scope='module') +def qos_config(rand_selected_dut, tbinfo, dut_config): + duthost = rand_selected_dut + SUPPORTED_ASIC_LIST = ["gb", "td2", "th", "th2", "spc1", "spc2", "spc3", "td3", "th3", "j2c+", "jr2"] + + qos_configs = {} + with open(r"qos/files/qos.yml") as file: + qos_configs = yaml.load(file, Loader=yaml.FullLoader) + + mg_facts = duthost.get_extended_minigraph_facts(tbinfo) + vendor = duthost.facts["asic_type"] + hostvars = duthost.host.options['variable_manager']._hostvars[duthost.hostname] + dut_asic = None + for asic in SUPPORTED_ASIC_LIST: + vendor_asic = "{0}_{1}_hwskus".format(vendor, asic) + if vendor_asic in hostvars.keys() and mg_facts["minigraph_hwsku"] in hostvars[vendor_asic]: + dut_asic = asic + break + + pytest_assert(dut_asic, "Cannot identify DUT ASIC type") + + dut_topo = "topo-" + topo = tbinfo["topo"]["name"] + if dut_topo + topo in qos_configs['qos_params'].get(dut_asic, {}): + dut_topo = dut_topo + topo + else: + # Default topo is any + dut_topo = dut_topo + "any" + + # Get profile name for src port + lag_port_name = dut_config["lag_port_name"] + profile_name = _lossless_profile_name(duthost, lag_port_name, '2-4') + profile_name = profile_name.lstrip('pg_lossless_').rstrip('_profile') + + return qos_configs['qos_params'][dut_asic][dut_topo][profile_name] + + +def _create_ssh_tunnel_to_syncd_rpc(duthost): + dut_asic = duthost.asic_instance() + dut_asic.create_ssh_tunnel_sai_rpc() + + +def _remove_ssh_tunnel_to_syncd_rpc(duthost): + dut_asic = duthost.asic_instance() + dut_asic.remove_ssh_tunnel_sai_rpc() + + +@pytest.fixture(scope='module') +def swap_syncd(request, rand_selected_dut, creds): + public_docker_reg = request.config.getoption("--public_docker_registry") + new_creds = None + if public_docker_reg: + new_creds = copy.deepcopy(creds) + new_creds['docker_registry_host'] = new_creds['public_docker_registry_host'] + new_creds['docker_registry_username'] = '' + new_creds['docker_registry_password'] = '' + else: + new_creds = creds + # Swap syncd container + docker.swap_syncd(rand_selected_dut, new_creds) + _create_ssh_tunnel_to_syncd_rpc(rand_selected_dut) + yield + # Restore syncd container + docker.restore_default_syncd(rand_selected_dut, new_creds) + _remove_ssh_tunnel_to_syncd_rpc(rand_selected_dut) + + +def _update_docker_service(duthost, docker="", action="", service=""): + """ + A helper function to start/stop service + """ + cmd = "docker exec {docker} supervisorctl {action} {service}".format(docker=docker, action=action, service=service) + duthost.shell(cmd) + logger.info("{}ed {}".format(action, service)) + + +@pytest.fixture(scope='module') +def update_docker_services(rand_selected_dut, swap_syncd, disable_container_autorestart, enable_container_autorestart): + """ + Disable/enable lldp and bgp + """ + feature_list = ['lldp', 'bgp', 'syncd', 'swss'] + disable_container_autorestart(rand_selected_dut, testcase="test_tunnel_qos_remap", feature_list=feature_list) + + SERVICES = [ + {"docker": "lldp", "service": "lldp-syncd"}, + {"docker": "lldp", "service": "lldpd"}, + {"docker": "bgp", "service": "bgpd"}, + {"docker": "bgp", "service": "bgpmon"} + ] + for service in SERVICES: + _update_docker_service(rand_selected_dut, action="stop", **service) + + yield + + enable_container_autorestart(rand_selected_dut, testcase="test_tunnel_qos_remap", feature_list=feature_list) + for service in SERVICES: + _update_docker_service(rand_selected_dut, action="start", **service) + + +def _update_mux_feature(duthost, state): + cmd = "sudo config feature state mux {}".format(state) + duthost.shell(cmd) + + +def _update_muxcable_mode(duthost, mode): + cmd = "sudo config muxcable mode {} all".format(mode) + duthost.shell(cmd) + + +def _update_counterpoll_state(duthost, counter_name, state): + cmd = "sudo counterpoll {} {}".format(counter_name, state) + duthost.shell(cmd) + + +@pytest.fixture(scope='module') +def setup_module(rand_selected_dut, rand_unselected_dut, update_docker_services): + ''' + Module level setup/teardown + ''' + # Set the muxcable mode to manual so that the mux cable won't be toggled by heartbeat + _update_muxcable_mode(rand_selected_dut, "manual") + _update_muxcable_mode(rand_unselected_dut, "manual") + # Disable the counter for watermark so that the cached counter in SAI is not cleared periodically + _update_counterpoll_state(rand_selected_dut, 'watermark', 'disable') + _update_counterpoll_state(rand_unselected_dut, 'watermark', 'disable') + + yield + + # Set the muxcable mode to auto + _update_muxcable_mode(rand_selected_dut, "auto") + _update_muxcable_mode(rand_unselected_dut, "auto") + # Enable the counter for watermark + _update_counterpoll_state(rand_selected_dut, 'watermark', 'enable') + _update_counterpoll_state(rand_unselected_dut, 'watermark', 'enable') + + +def toggle_mux_to_host(duthost): + ''' + Toggle the muxcable status with write_standby.py script + ''' + WRITE_STANDBY = "/usr/local/bin/write_standby.py" + cmd = "{} -s active".format(WRITE_STANDBY) + duthost.shell(cmd) + TIMEOUT = 90 + while TIMEOUT > 0: + muxcables = json.loads(duthost.shell("show muxcable status --json")['stdout']) + inactive_muxcables = [intf for intf, muxcable in muxcables['MUX_CABLE'].items() if muxcable['STATUS'] != 'active'] + if len(inactive_muxcables) > 0: + logger.info('Found muxcables not active on {}: {}'.format(duthost.hostname, json.dumps(inactive_muxcables))) + time.sleep(10) + TIMEOUT -= 10 + else: + logger.info("Mux cable toggled to {}".format(duthost.hostname)) + break + + pytest_assert(TIMEOUT > 0, "Failed to toggle muxcable to {}".format(duthost.hostname)) + + +def leaf_fanout_peer_info(duthost, conn_graph_facts, mg_facts, port_idx): + dut_intf_paused = "" + for port, indice in mg_facts['minigraph_ptf_indices'].items(): + if indice == port_idx: + dut_intf_paused = port + break + pytest_assert(dut_intf_paused, "Failed to find port for idx {}".format(port_idx)) + + peer_device = conn_graph_facts['device_conn'][duthost.hostname][dut_intf_paused]['peerdevice'] + peer_port = conn_graph_facts['device_conn'][duthost.hostname][dut_intf_paused]['peerport'] + peer_info = { + 'peerdevice': peer_device, + 'pfc_fanout_interface': peer_port + } + return peer_info + + +def start_pfc_storm(storm_handler, peer_info, prio): + """ + Start sending PFC pause frames from fanout switch + """ + storm_handler.deploy_pfc_gen() + storm_handler.start_storm() + # Wait for PFC pause frame generation + time.sleep(2) + + +def stop_pfc_storm(storm_handler): + """ + Stop sending PFC pause frames from fanout switch + """ + storm_handler.stop_storm() + + +def run_ptf_test(ptfhost, test_case='', test_params={}): + """ + A helper function to run test script on ptf host + """ + logger.info("Start running {} on ptf host".format(test_case)) + pytest_assert(ptfhost.shell( + argv = [ + "ptf", + "--test-dir", + "saitests", + test_case, + "--platform-dir", + "ptftests", + "--platform", + "remote", + "-t", + ";".join(["{}={}".format(k, repr(v)) for k, v in test_params.items()]), + "--disable-ipv6", + "--disable-vxlan", + "--disable-geneve", + "--disable-erspan", + "--disable-mpls", + "--disable-nvgre", + "--log-file", + "/tmp/{0}.log".format(test_case), + "--test-case-timeout", + "600" + ], + chdir = "/root", + )["rc"] == 0, "Failed when running test '{0}'".format(test_case)) diff --git a/tests/radv/test_radv_ipv6_ra.py b/tests/radv/test_radv_ipv6_ra.py index 6faf1db6a1b..e9cd9dccfc7 100644 --- a/tests/radv/test_radv_ipv6_ra.py +++ b/tests/radv/test_radv_ipv6_ra.py @@ -14,7 +14,7 @@ from tests.ptf_runner import ptf_runner pytestmark = [ - pytest.mark.topology('t0'), + pytest.mark.topology('t0', 'm0'), pytest.mark.device_type('vs') ] diff --git a/tests/restapi/conftest.py b/tests/restapi/conftest.py index f13838094a6..395edfaef73 100644 --- a/tests/restapi/conftest.py +++ b/tests/restapi/conftest.py @@ -120,13 +120,14 @@ def get_endpoint(path): return endpoint return get_endpoint + @pytest.fixture def vlan_members(duthosts, rand_one_dut_hostname, tbinfo): duthost = duthosts[rand_one_dut_hostname] VLAN_INDEX = 0 mg_facts = duthost.get_extended_minigraph_facts(tbinfo) - vlan_interfaces = mg_facts["minigraph_vlans"].values()[VLAN_INDEX]["members"] - if vlan_interfaces is not None: - return vlan_interfaces - else: - return [] + if mg_facts["minigraph_vlans"] != {}: + vlan_interfaces = mg_facts["minigraph_vlans"].values()[VLAN_INDEX]["members"] + if vlan_interfaces is not None: + return vlan_interfaces + return [] diff --git a/tests/restapi/helper.py b/tests/restapi/helper.py index ffc482324f2..f6d216b2f6c 100644 --- a/tests/restapi/helper.py +++ b/tests/restapi/helper.py @@ -1,6 +1,7 @@ import time -RESTAPI_SERVER_START_WAIT_TIME = 15 +# The restapi service requires around 30 seconds to start +RESTAPI_SERVER_START_WAIT_TIME = 40 def apply_cert_config(duthost): # Set client certificate subject name in config DB diff --git a/tests/restapi/restapi_operations.py b/tests/restapi/restapi_operations.py index 8517f127fb7..fb46538bf85 100644 --- a/tests/restapi/restapi_operations.py +++ b/tests/restapi/restapi_operations.py @@ -205,3 +205,12 @@ def heartbeat(self, construct_url): pytest_assert(r.status_code == 200) else: logger.error("Malformed URL for "+path+"!") + + def post_config_tunnel_decap(self, construct_url, params): + path = API_VERSION+'/config/tunnel/decap/vxlan' + url = construct_url(path) + if url: + return self.request(POST, url,params) + else: + logger.error("Malformed URL for "+path+"!") + diff --git a/tests/restapi/test_restapi_vxlan_ecmp.py b/tests/restapi/test_restapi_vxlan_ecmp.py new file mode 100644 index 00000000000..184620d526d --- /dev/null +++ b/tests/restapi/test_restapi_vxlan_ecmp.py @@ -0,0 +1,133 @@ +import pytest +import logging +import json + +from tests.common.helpers.assertions import pytest_assert +from restapi_operations import Restapi + + +logger = logging.getLogger(__name__) + +pytestmark = [ + pytest.mark.topology('t0'), + pytest.mark.disable_loganalyzer +] + +CLIENT_CERT = 'restapiclient.crt' +CLIENT_KEY = 'restapiclient.key' + +restapi = Restapi(CLIENT_CERT, CLIENT_KEY) + +''' +This test runs the following sequence to stress the restapi behaviour. +add 2 routes A,B +loop 10 times + verify A,B by reading routes. + add 3 more routes C,D,E + verify 5 routes by reading routes A, B, C, D, E 10 times. + delete the 3 added routes. C, D, E +delete the last 2 routes A, B +verify all routes deleted. +''' + + +def test_vxlan_ecmp_multirequest(construct_url, vlan_members): + # test to emulate common scenario in pilot. + + # Create Generic tunnel + params = '{"ip_addr": "100.78.1.1"}' + logger.info("Creating default vxlan tunnel") + r = restapi.post_config_tunnel_decap(construct_url, params) + pytest_assert(r.status_code == 204) + + # Create VNET + params = '{"vnid": 703}' + logger.info("Creating VNET vnet-guid-3 with vnid: 703") + r = restapi.post_config_vrouter_vrf_id(construct_url, 'vnet-default', params) + pytest_assert(r.status_code == 204) + + # Verify VNET has been created + r = restapi.get_config_vrouter_vrf_id(construct_url, 'vnet-default') + pytest_assert(r.status_code == 200) + logger.info(r.json()) + expected = '{"attr": {"vnid": 703}, "vnet_id": "vnet-default"}' + pytest_assert(r.json() == json.loads(expected)) + logger.info("VNET with vnet_id: vnet-guid-4 has been successfully created with vnid: 7039115") + + # Add first 2 routes + params = '[{"cmd": "add", "ip_prefix": "10.1.0.1/32", "nexthop": "100.78.60.37,100.78.61.37"}, \ + {"cmd": "add", "ip_prefix": "10.1.0.5/32", "nexthop": "100.78.60.41,100.78.61.41"}]' + logger.info("Adding routes with vnid: 703 to VNET vnet-default") + r = restapi.patch_config_vrouter_vrf_id_routes(construct_url, 'vnet-default', params) + pytest_assert(r.status_code == 204) + + for i in range(1, 10): + # Read the 2 routes + params = '{}' + r = restapi.get_config_vrouter_vrf_id_routes(construct_url, 'vnet-default', params) + pytest_assert(r.status_code == 200) + logger.info(r.json()) + expected = [{"nexthop": "100.78.60.37,100.78.61.37", "ip_prefix": "10.1.0.1/32"}, + {"nexthop": "100.78.60.41,100.78.61.41", "ip_prefix": "10.1.0.5/32"}] + for route in expected: + pytest_assert(route in r.json()) + logger.info("Routes with vnid: 703 to VNET vnet-default have been added successfully") + + # Add 3 more routes + params = '[{"cmd": "add", "ip_prefix": "10.1.0.2/32", "nexthop": "100.78.60.38,100.78.61.38"}, \ + {"cmd": "add", "ip_prefix": "10.1.0.3/32", "nexthop": "100.78.60.39,100.78.61.39"}, \ + {"cmd": "add", "ip_prefix": "10.1.0.4/32", "nexthop": "100.78.60.40,100.78.61.40"}]' + logger.info("Adding routes with vnid: 703 to VNET vnet-default") + r = restapi.patch_config_vrouter_vrf_id_routes(construct_url, 'vnet-default', params) + pytest_assert(r.status_code == 204) + + # Read all the routes 10 times. + params = '{}' + for j in range(1, 10): + r = restapi.get_config_vrouter_vrf_id_routes(construct_url, 'vnet-default', params) + pytest_assert(r.status_code == 200) + logger.info(r.json()) + expected = [{"nexthop": "100.78.60.37,100.78.61.37", "ip_prefix": "10.1.0.1/32"}, + {"nexthop": "100.78.60.38,100.78.61.38", "ip_prefix": "10.1.0.2/32"}, + {"nexthop": "100.78.60.39,100.78.61.39", "ip_prefix": "10.1.0.3/32"}, + {"nexthop": "100.78.60.40,100.78.61.40", "ip_prefix": "10.1.0.4/32"}, + {"nexthop": "100.78.60.41,100.78.61.41", "ip_prefix": "10.1.0.5/32"}] + for route in expected: + pytest_assert(route in r.json()) + logger.info("Routes with vnid: 703 to VNET vnet-default have been added successfully") + + # Delete the 3 added routes + params = '[{"cmd": "delete", "ip_prefix": "10.1.0.2/32", "nexthop": "100.78.60.38,100.78.61.38"}, \ + {"cmd": "delete", "ip_prefix": "10.1.0.3/32", "nexthop": "100.78.60.39,100.78.61.39"}, \ + {"cmd": "delete", "ip_prefix": "10.1.0.4/32", "nexthop": "100.78.60.40,100.78.61.40"}]' + logger.info("Deleting routes with vnid: 703 from VNET vnet-default") + r = restapi.patch_config_vrouter_vrf_id_routes(construct_url, 'vnet-default', params) + pytest_assert(r.status_code == 204) + + # Verify first 2 routes + params = '{}' + r = restapi.get_config_vrouter_vrf_id_routes(construct_url, 'vnet-default', params) + pytest_assert(r.status_code == 200) + logger.info(r.json()) + expected = [{"nexthop": "100.78.60.37,100.78.61.37", "ip_prefix": "10.1.0.1/32"}, + {"nexthop": "100.78.60.41,100.78.61.41", "ip_prefix": "10.1.0.5/32"}] + + for route in expected: + pytest_assert(route in r.json()) + logger.info("Routes with vnid: 703 to VNET vnet-default have been added successfully") + + # Delete routes + params = '[{"cmd": "delete", "ip_prefix": "10.1.0.1/32", "nexthop": "100.78.60.37,100.78.61.37"}, \ + {"cmd": "delete", "ip_prefix": "10.1.0.5/32", "nexthop": "100.78.60.41,100.78.61.41"}]' + logger.info("Deleting routes with vnid: 703 from VNET vnet-default") + r = restapi.patch_config_vrouter_vrf_id_routes(construct_url, 'vnet-default', params) + pytest_assert(r.status_code == 204) + + # Verify route absence. + params = '{}' + r = restapi.get_config_vrouter_vrf_id_routes(construct_url, 'vnet-default', params) + pytest_assert(r.status_code == 200) + logger.info(r.json()) + pytest_assert(len(r.json()) == 0) + logger.info("Routes with vnid: 703 from VNET vnet-default have been deleted successfully") + diff --git a/tests/route/test_default_route.py b/tests/route/test_default_route.py index 78511a16793..b9e1c9611b5 100644 --- a/tests/route/test_default_route.py +++ b/tests/route/test_default_route.py @@ -16,6 +16,17 @@ logger = logging.getLogger(__name__) +def get_upstream_neigh_type(topo): + if 't0' in topo or 'dualtor' in topo: + return 't1' + elif 't1' in topo: + return 't2' + elif 't2' in topo: + return 't3' + elif 'm0' in topo: + return 'm1' + else: + return None def get_upstream_neigh(tb): """ @@ -23,17 +34,6 @@ def get_upstream_neigh(tb): returns dict: {"upstream_neigh_name" : (ipv4_intf_ip, ipv6_intf_ip)} """ - - def get_upstream_neigh_type(topo): - if 't0' in topo or 'dualtor' in topo: - return 't1' - elif 't1' in topo: - return 't2' - elif 't2' in topo: - return 't3' - else: - return None - upstream_neighbors = {} neigh_type = get_upstream_neigh_type(tb['topo']['name']) logging.info("testbed topo {} upstream neigh type {}".format( @@ -66,22 +66,38 @@ def get_upstream_neigh_type(topo): upstream_neighbors.update({neigh_name: (ipv4_addr, ipv6_addr)}) return upstream_neighbors +def get_uplink_ns(tbinfo, bgp_name_to_ns_mapping): + neigh_type = get_upstream_neigh_type(tbinfo['topo']['name']) + asics = set() + for name, asic in bgp_name_to_ns_mapping.items(): + if neigh_type not in name.lower(): + continue + asics.add(asic) + return asics -def verify_default_route_in_app_db(asichost, tbinfo, af): +def verify_default_route_in_app_db(duthost, tbinfo, af, uplink_ns): """ Verify the nexthops for the default routes match the ip interfaces configured on the peer device """ - default_route = asichost.get_default_route_from_app_db(af) + default_route = duthost.get_default_route_from_app_db(af) pytest_assert(default_route, "default route not present in APP_DB") logging.info("default route from app db {}".format(default_route)) - # There is only one default route in app_db - key = default_route.keys()[0] - - nexthop_list = default_route[key].get('value', {}).get('nexthop', None) - pytest_assert(nexthop_list is not None, "Default route has not nexthops") - logging.info("nexthop list in app_db {}".format(nexthop_list) ) - nexthops = set(nexthop_list.split(',')) + + nexthops = set() + if uplink_ns: + # multi-asic case: Now we have all routes on all asics, get the uplink routes only + for ns in uplink_ns: + nexthop_list = default_route[ns].values()[0]['value']['nexthop'].split(',') + nexthops.update(set(nexthop_list)) + else: + key = default_route.keys()[0] + nexthop_list = default_route[key].get('value', {}).get('nexthop', None) + nexthops.update(set(nexthop_list.split(','))) + + pytest_assert(nexthops is not None, "Default route has not nexthops") + logging.info("nexthops in app_db {}".format(nexthops) ) + upstream_neigh = get_upstream_neigh(tbinfo) pytest_assert(upstream_neigh is not None, "No upstream neighbors in the testbed") @@ -148,7 +164,7 @@ def test_default_ipv6_route_next_hop_global_address(duthosts, enum_rand_one_per_ "use link local address {} for nexthop".format(nh[0])) -def test_default_route_with_bgp_flap(duthosts, enum_rand_one_per_hwsku_frontend_hostname, enum_asic_index, tbinfo): +def test_default_route_with_bgp_flap(duthosts, enum_rand_one_per_hwsku_frontend_hostname, tbinfo): """ Check the default route present in app_db has the correct nexthops ip Check the default route is removed when the bgp sessions are shutdown @@ -160,15 +176,21 @@ def test_default_route_with_bgp_flap(duthosts, enum_rand_one_per_hwsku_frontend_ .format(tbinfo['topo']['name'])) duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] - asichost = duthost.asic_instance(enum_asic_index) + config_facts = duthost.config_facts(host=duthost.hostname, source="running")['ansible_facts'] bgp_neighbors = config_facts.get('BGP_NEIGHBOR', {}) + uplink_ns = None + # Get uplink namespaces/asics for multi-asic + if duthost.is_multi_asic: + bgp_name_to_ns_mapping = duthost.get_bgp_name_to_ns_mapping() + uplink_ns = get_uplink_ns(tbinfo, bgp_name_to_ns_mapping) + af_list = ['ipv4', 'ipv6'] # verify the default route is correct in the app db for af in af_list: - verify_default_route_in_app_db(asichost, tbinfo, af) + verify_default_route_in_app_db(duthost, tbinfo, af, uplink_ns) duthost.command("sudo config bgp shutdown all") if not wait_until(120, 2, 0, duthost.is_bgp_state_idle): @@ -176,7 +198,7 @@ def test_default_route_with_bgp_flap(duthosts, enum_rand_one_per_hwsku_frontend_ 'BGP Shutdown Timeout: BGP sessions not shutdown after 120 seconds') # give some more time for default route to be removed - if not wait_until(120, 2, 0, asichost.is_default_route_removed_from_app_db): + if not wait_until(120, 2, 0, duthost.is_default_route_removed_from_app_db, uplink_ns): pytest.fail( 'Default route is not removed from APP_DB') diff --git a/tests/route/test_route_perf.py b/tests/route/test_route_perf.py index 10f4b2c63ab..5d6611c0fe0 100644 --- a/tests/route/test_route_perf.py +++ b/tests/route/test_route_perf.py @@ -23,11 +23,9 @@ def ignore_expected_loganalyzer_exceptions(enum_rand_one_per_hwsku_frontend_hostname, loganalyzer): """ Ignore expected failures logs during test execution. - The route_checker script will compare routes in APP_DB and ASIC_DB, and an ERROR will be recorded if mismatch. The testcase will add 10,000 routes to APP_DB, and route_checker may detect mismatch during this period. So a new pattern is added to ignore possible error logs. - Args: duthost: DUT fixture loganalyzer: Loganalyzer utility fixture @@ -69,22 +67,22 @@ def set_polling_interval(duthosts, enum_rand_one_per_hwsku_frontend_hostname): logger.info("Waiting {} sec for CRM counters to become updated".format(wait_time)) time.sleep(wait_time) -def prepare_dut(duthost, intf_neighs): +def prepare_dut(asichost, intf_neighs): for intf_neigh in intf_neighs: # Set up interface - duthost.shell('sudo config interface ip add {} {}'.format(intf_neigh['interface'], intf_neigh['ip'])) + asichost.config_ip_intf(intf_neigh['interface'], intf_neigh['ip'], "add") # Set up neighbor - duthost.shell('sudo ip neigh replace {} lladdr {} dev {}'.format(intf_neigh['neighbor'], intf_neigh['mac'], intf_neigh['interface'])) + asichost.run_ip_neigh_cmd("replace " + intf_neigh['neighbor'] + " lladdr " + intf_neigh['mac'] + " dev " + intf_neigh['interface']) -def cleanup_dut(duthost, intf_neighs): +def cleanup_dut(asichost, intf_neighs): for intf_neigh in intf_neighs: # Delete neighbor - duthost.shell('sudo ip neigh del {} dev {}'.format(intf_neigh['neighbor'], intf_neigh['interface'])) + asichost.run_ip_neigh_cmd("del " + intf_neigh['neighbor'] + " dev " + intf_neigh['interface']) # remove interface - duthost.shell('sudo config interface ip remove {} {}'.format(intf_neigh['interface'], intf_neigh['ip'])) + asichost.config_ip_intf(intf_neigh['interface'], intf_neigh['ip'], 'remove') -def generate_intf_neigh(duthost, num_neigh, ip_version): - interfaces = duthost.show_interface(command='status')['ansible_facts']['int_status'] +def generate_intf_neigh(asichost, num_neigh, ip_version): + interfaces = asichost.show_interface(command='status')['ansible_facts']['int_status'] up_interfaces = [] for intf, values in interfaces.items(): if values['admin_state'] == 'up' and values['oper_state'] == 'up': @@ -105,8 +103,9 @@ def generate_intf_neigh(duthost, num_neigh, ip_version): if ip_version == 4: intf_neigh = { 'interface' : itfs_name, - 'ip' : '10.%d.0.1/24' % (idx_neigh + 1), - 'neighbor' : '10.%d.0.2' % (idx_neigh + 1), + # change prefix ip starting with 3 to avoid overlap with any bgp ip + 'ip' : '30.%d.0.1/24' % (idx_neigh + 1), + 'neighbor' : '30.%d.0.2' % (idx_neigh + 1), 'mac' : '54:54:00:ad:48:%0.2x' % idx_neigh } else: @@ -148,21 +147,18 @@ def generate_route_file(duthost, prefixes, str_intf_nexthop, dir, op): # Copy json file to DUT duthost.copy(content=json.dumps(route_data, indent=4), dest=dir, verbose=False) -def count_routes(host): - num = host.shell( - 'sonic-db-cli ASIC_DB eval "return #redis.call(\'keys\', \'{}*\')" 0'.format(ROUTE_TABLE_NAME), - module_ignore_errors=True, verbose=True)['stdout'] - return int(num) -def exec_routes(duthost, prefixes, str_intf_nexthop, op): +def exec_routes(duthost, enum_rand_one_frontend_asic_index, prefixes, str_intf_nexthop, op): # Create a tempfile for routes route_file_dir = duthost.shell('mktemp')['stdout'] # Generate json file for routes generate_route_file(duthost, prefixes, str_intf_nexthop, route_file_dir, op) + logger.info('Route file generated and copied') # Check the number of routes in ASIC_DB - start_num_route = count_routes(duthost) + asichost = duthost.asic_instance(enum_rand_one_frontend_asic_index) + start_num_route = asichost.count_routes(ROUTE_TABLE_NAME) # Calculate timeout as a function of the number of routes route_timeout = max(len(prefixes) / 250, 1) # Allow at least 1 second even when there is a limited number of routes @@ -176,54 +172,64 @@ def exec_routes(duthost, prefixes, str_intf_nexthop, op): pytest.fail('Operation {} not supported'.format(op)) start_time = datetime.now() + logger.info('Before pushing route to swssconfig') # Apply routes with swssconfig - result = duthost.shell('docker exec -i swss swssconfig /dev/stdin < {}'.format(route_file_dir), - module_ignore_errors=True) + json_name = '/dev/stdin < {}'.format(route_file_dir) + result = duthost.docker_exec_swssconfig(json_name, 'swss', enum_rand_one_frontend_asic_index) + if result['rc'] != 0: pytest.fail('Failed to apply route configuration file: {}'.format(result['stderr'])) - - # Wait until the routes set/del applys to ASIC_DB - def _check_num_routes(expected_num_routes): - # Check the number of routes in ASIC_DB - return count_routes(duthost) == expected_num_routes - - if not wait_until(route_timeout, 0.5, 0, _check_num_routes, expected_num_routes): - pytest.fail('failed to add routes within time limit') + logger.info('All route entries have been pushed') + + total_delay = 0 + actual_num_routes = asichost.count_routes(ROUTE_TABLE_NAME) + while actual_num_routes != expected_num_routes: + diff = abs(expected_num_routes - actual_num_routes) + delay = max(diff / 5000, 1) + now = datetime.now() + total_delay = (now - start_time).total_seconds() + logger.info('Current {} expected {} delayed {} will delay {}'.format(actual_num_routes, expected_num_routes, total_delay, delay)) + time.sleep(delay) + actual_num_routes = asichost.count_routes(ROUTE_TABLE_NAME) + if total_delay >= route_timeout: + break # Record time when all routes show up in ASIC_DB end_time = datetime.now() + logger.info('All route entries have been installed in ASIC_DB in {} seconds'.format((end_time - start_time).total_seconds())) # Check route entries are correct - asic_route_keys = duthost.shell('sonic-db-cli ASIC_DB eval "return redis.call(\'keys\', \'{}*\')" 0'\ - .format(ROUTE_TABLE_NAME), verbose=False)['stdout_lines'] - asic_prefixes = [] - for key in asic_route_keys: - json_obj = key[len(ROUTE_TABLE_NAME) + 1 : ] - asic_prefixes.append(json.loads(json_obj)['dest']) + asic_route_keys = asichost.get_route_key(ROUTE_TABLE_NAME) + table_name_length = len(ROUTE_TABLE_NAME) + asic_route_keys_set = set([re.search("\"dest\":\"([0-9a-f:/\.]*)\"", x[table_name_length:]).group(1) for x in asic_route_keys]) + prefixes_set = set(prefixes) + diff = prefixes_set - asic_route_keys_set if op == 'SET': - assert all(prefix in asic_prefixes for prefix in prefixes) + if diff: + pytest.fail("The following entries have not been installed into ASIC {}".format(diff)) elif op == 'DEL': - assert all(prefix not in asic_prefixes for prefix in prefixes) - else: - pytest.fail('Operation {} not supported'.format(op)) + if diff != prefixes_set: + pytest.fail("The following entries have not been withdrawn from ASIC {}".format(prefixes_set - diff)) # Retuen time used for set/del routes return (end_time - start_time).total_seconds() -def test_perf_add_remove_routes(duthosts, enum_rand_one_per_hwsku_frontend_hostname, request, ip_versions): +def test_perf_add_remove_routes(duthosts, enum_rand_one_per_hwsku_frontend_hostname, request, ip_versions, enum_rand_one_frontend_asic_index): duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] + asichost = duthost.asic_instance(enum_rand_one_frontend_asic_index) # Number of routes for test set_num_routes = request.config.getoption("--num_routes") # Generate interfaces and neighbors - NUM_NEIGHS = 8 - intf_neighs, str_intf_nexthop = generate_intf_neigh(duthost, NUM_NEIGHS, ip_versions) - + NUM_NEIGHS = 50 # Update max num neighbors for multi-asic + intf_neighs, str_intf_nexthop = generate_intf_neigh(asichost, NUM_NEIGHS, ip_versions) + route_tag = "ipv{}_route".format(ip_versions) - used_routes_count = duthost.get_crm_resources().get("main_resources").get(route_tag, {}).get("used") - avail_routes_count = duthost.get_crm_resources().get("main_resources").get(route_tag, {}).get("available") + used_routes_count = asichost.count_crm_resources("main_resources", route_tag, "used") + avail_routes_count = asichost.count_crm_resources("main_resources", route_tag, "available") pytest_assert(avail_routes_count, "CRM main_resources data is not ready within adjusted CRM polling time {}s".\ format(CRM_POLL_INTERVAL)) + num_routes = min(avail_routes_count, set_num_routes) logger.info("IP route utilization before test start: Used: {}, Available: {}, Test count: {}"\ .format(used_routes_count, avail_routes_count, num_routes)) @@ -237,14 +243,14 @@ def test_perf_add_remove_routes(duthosts, enum_rand_one_per_hwsku_frontend_hostn for idx_route in range(num_routes)] try: # Set up interface and interface for routes - prepare_dut(duthost, intf_neighs) + prepare_dut(asichost, intf_neighs) # Add routes - time_set = exec_routes(duthost, prefixes, str_intf_nexthop, 'SET') + time_set = exec_routes(duthost, enum_rand_one_frontend_asic_index, prefixes, str_intf_nexthop, 'SET') logger.info('Time to set %d ipv%d routes is %.2f seconds.' % (num_routes, ip_versions, time_set)) # Remove routes - time_del = exec_routes(duthost, prefixes, str_intf_nexthop, 'DEL') + time_del = exec_routes(duthost, enum_rand_one_frontend_asic_index, prefixes, str_intf_nexthop, 'DEL') logger.info('Time to del %d ipv%d routes is %.2f seconds.' % (num_routes, ip_versions, time_del)) finally: - cleanup_dut(duthost, intf_neighs) + cleanup_dut(asichost, intf_neighs) diff --git a/tests/route/test_static_route.py b/tests/route/test_static_route.py index a3226e9c5c8..12466067ea9 100644 --- a/tests/route/test_static_route.py +++ b/tests/route/test_static_route.py @@ -7,11 +7,16 @@ import re from collections import defaultdict -from tests.common.fixtures.ptfhost_utils import change_mac_addresses, copy_arp_responder_py +from tests.common.fixtures.ptfhost_utils import ( + change_mac_addresses, + copy_arp_responder_py, +) from tests.common.dualtor.dual_tor_utils import mux_cable_server_ip from tests.common.dualtor.dual_tor_utils import get_t1_ptf_ports from tests.common.dualtor.mux_simulator_control import mux_server_url -from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_rand_selected_tor_m +from tests.common.dualtor.mux_simulator_control import ( + toggle_all_simulator_ports_to_rand_selected_tor_m, +) from tests.common.utilities import wait_until, get_intf_by_sub_intf from tests.common import config_reload import ptf.testutils as testutils @@ -21,16 +26,12 @@ from tests.common import constants -pytestmark = [ - pytest.mark.topology('t0'), - pytest.mark.device_type('vs') -] +pytestmark = [pytest.mark.topology("t0", "m0"), pytest.mark.device_type("vs")] def skip_201911_and_older(duthost): - """ Skip the current test if the DUT version is 201911 or older. - """ - if parse_version(duthost.kernel_version) <= parse_version('4.9.0'): + """Skip the current test if the DUT version is 201911 or older.""" + if parse_version(duthost.kernel_version) <= parse_version("4.9.0"): pytest.skip("Test not supported for 201911 images or older. Skipping the test") @@ -39,36 +40,55 @@ def is_dualtor(tbinfo): return "dualtor" in tbinfo["topo"]["name"] -def add_ipaddr(ptfadapter, ptfhost, nexthop_addrs, prefix_len, nexthop_interfaces, ipv6=False): +def add_ipaddr( + ptfadapter, ptfhost, nexthop_addrs, prefix_len, nexthop_interfaces, ipv6=False +): if ipv6: for idx in range(len(nexthop_addrs)): - ptfhost.shell("ip -6 addr add {}/{} dev eth{}".format(nexthop_addrs[idx], prefix_len, nexthop_interfaces[idx]), module_ignore_errors=True) + ptfhost.shell( + "ip -6 addr add {}/{} dev eth{}".format( + nexthop_addrs[idx], prefix_len, nexthop_interfaces[idx] + ), + module_ignore_errors=True, + ) else: vlan_host_map = defaultdict(dict) for idx in range(len(nexthop_addrs)): - mac = ptfadapter.dataplane.get_mac(0, int(get_intf_by_sub_intf(nexthop_interfaces[idx]))).replace(":", "") + mac = ptfadapter.dataplane.get_mac( + 0, int(get_intf_by_sub_intf(nexthop_interfaces[idx])) + ).replace(":", "") vlan_host_map[nexthop_interfaces[idx]][nexthop_addrs[idx]] = mac arp_responder_conf = {} for port in vlan_host_map: - arp_responder_conf['eth{}'.format(port)] = vlan_host_map[port] + arp_responder_conf["eth{}".format(port)] = vlan_host_map[port] with open("/tmp/from_t1.json", "w") as ar_config: json.dump(arp_responder_conf, ar_config) ptfhost.copy(src="/tmp/from_t1.json", dest="/tmp/from_t1.json") - ptfhost.host.options["variable_manager"].extra_vars.update({"arp_responder_args": "-e"}) - ptfhost.template(src="templates/arp_responder.conf.j2", dest="/etc/supervisor/conf.d/arp_responder.conf") + ptfhost.host.options["variable_manager"].extra_vars.update( + {"arp_responder_args": "-e"} + ) + ptfhost.template( + src="templates/arp_responder.conf.j2", + dest="/etc/supervisor/conf.d/arp_responder.conf", + ) - ptfhost.shell('supervisorctl reread && supervisorctl update') - ptfhost.shell('supervisorctl restart arp_responder') + ptfhost.shell("supervisorctl reread && supervisorctl update") + ptfhost.shell("supervisorctl restart arp_responder") def del_ipaddr(ptfhost, nexthop_addrs, prefix_len, nexthop_devs, ipv6=False): if ipv6: for idx in range(len(nexthop_addrs)): - ptfhost.shell("ip -6 addr del {}/{} dev eth{}".format(nexthop_addrs[idx], prefix_len, nexthop_devs[idx]), module_ignore_errors=True) + ptfhost.shell( + "ip -6 addr del {}/{} dev eth{}".format( + nexthop_addrs[idx], prefix_len, nexthop_devs[idx] + ), + module_ignore_errors=True, + ) else: - ptfhost.shell('supervisorctl stop arp_responder') + ptfhost.shell("supervisorctl stop arp_responder") def clear_arp_ndp(duthost, ipv6=False): @@ -78,36 +98,40 @@ def clear_arp_ndp(duthost, ipv6=False): duthost.shell("sonic-clear arp") -def generate_and_verify_traffic(duthost, ptfadapter, tbinfo, ip_dst, expected_ports, ipv6=False): +def generate_and_verify_traffic( + duthost, ptfadapter, tbinfo, ip_dst, expected_ports, ipv6=False +): if ipv6: pkt = testutils.simple_tcpv6_packet( eth_dst=duthost.facts["router_mac"], eth_src=ptfadapter.dataplane.get_mac(0, 0), - ipv6_src='2001:db8:85a3::8a2e:370:7334', + ipv6_src="2001:db8:85a3::8a2e:370:7334", ipv6_dst=ip_dst, ipv6_hlim=64, tcp_sport=1234, - tcp_dport=4321) + tcp_dport=4321, + ) else: pkt = testutils.simple_tcp_packet( eth_dst=duthost.facts["router_mac"], eth_src=ptfadapter.dataplane.get_mac(0, 0), - ip_src='1.1.1.1', + ip_src="1.1.1.1", ip_dst=ip_dst, ip_ttl=64, tcp_sport=1234, - tcp_dport=4321) + tcp_dport=4321, + ) exp_pkt = pkt.copy() exp_pkt = mask.Mask(exp_pkt) - exp_pkt.set_do_not_care_scapy(packet.Ether, 'dst') - exp_pkt.set_do_not_care_scapy(packet.Ether, 'src') + exp_pkt.set_do_not_care_scapy(packet.Ether, "dst") + exp_pkt.set_do_not_care_scapy(packet.Ether, "src") if ipv6: - exp_pkt.set_do_not_care_scapy(packet.IPv6, 'hlim') - exp_pkt.set_do_not_care_scapy(packet.IPv6, 'chksum') + exp_pkt.set_do_not_care_scapy(packet.IPv6, "hlim") + exp_pkt.set_do_not_care_scapy(packet.IPv6, "chksum") else: - exp_pkt.set_do_not_care_scapy(packet.IP, 'ttl') - exp_pkt.set_do_not_care_scapy(packet.IP, 'chksum') + exp_pkt.set_do_not_care_scapy(packet.IP, "ttl") + exp_pkt.set_do_not_care_scapy(packet.IP, "chksum") ptf_t1_intf = random.choice(get_t1_ptf_ports(duthost, tbinfo)) ptf_t1_intf_index = int(ptf_t1_intf.strip("eth")) @@ -115,12 +139,18 @@ def generate_and_verify_traffic(duthost, ptfadapter, tbinfo, ip_dst, expected_po testutils.send(ptfadapter, ptf_t1_intf_index, pkt) testutils.verify_packet_any_port(ptfadapter, exp_pkt, ports=expected_ports) + def wait_all_bgp_up(duthost): - config_facts = duthost.config_facts(host=duthost.hostname, source="running")['ansible_facts'] - bgp_neighbors = config_facts.get('BGP_NEIGHBOR', {}) - if not wait_until(300, 10, 0, duthost.check_bgp_session_state, bgp_neighbors.keys()): + config_facts = duthost.config_facts(host=duthost.hostname, source="running")[ + "ansible_facts" + ] + bgp_neighbors = config_facts.get("BGP_NEIGHBOR", {}) + if not wait_until( + 300, 10, 0, duthost.check_bgp_session_state, bgp_neighbors.keys() + ): pytest.fail("not all bgp sessions are up after config reload") + def check_route_redistribution(duthost, prefix, ipv6, removed=False): if ipv6: bgp_neighbor_addr_regex = re.compile(r"^([0-9a-fA-F]{1,4}:[0-9a-fA-F:]+)") @@ -131,7 +161,9 @@ def check_route_redistribution(duthost, prefix, ipv6, removed=False): SHOW_BGP_SUMMARY_CMD = "show ip bgp summary" SHOW_BGP_ADV_ROUTES_CMD_TEMPLATE = "show ip bgp neighbor {} advertised-routes" - bgp_summary = duthost.shell(SHOW_BGP_SUMMARY_CMD, module_ignore_errors=True)["stdout"].split("\n") + bgp_summary = duthost.shell(SHOW_BGP_SUMMARY_CMD, module_ignore_errors=True)[ + "stdout" + ].split("\n") bgp_neighbors = [] @@ -142,47 +174,150 @@ def check_route_redistribution(duthost, prefix, ipv6, removed=False): def _check_routes(): for neighbor in bgp_neighbors: - adv_routes = duthost.shell(SHOW_BGP_ADV_ROUTES_CMD_TEMPLATE.format(neighbor))["stdout"] + adv_routes = duthost.shell( + SHOW_BGP_ADV_ROUTES_CMD_TEMPLATE.format(neighbor) + )["stdout"] if removed and prefix in adv_routes: return False if not removed and prefix not in adv_routes: return False return True - assert(wait_until(60, 15, 0, _check_routes)) + assert wait_until(60, 15, 0, _check_routes) + + +# output example of ip [-6] route show +# ip route show 1.1.1.0/24 +# 1.1.1.0/24 proto 196 metric 20 +# nexthop via 192.168.0.2 dev Vlan1000 weight 1 +# nexthop via 192.168.0.3 dev Vlan1000 weight 1 +# nexthop via 192.168.0.4 dev Vlan1000 weight 1 +# ip -6 route show 20c0:afa8::/64 +# 20c0:afa8::/64 proto bgp src fc00:1::32 metric 20 +# nexthop via fc00::22 dev PortChannel101 weight 1 +# nexthop via fc00::26 dev PortChannel102 weight 1 +# nexthop via fc00::2a dev PortChannel103 weight 1 +# nexthop via fc00::2e dev PortChannel104 weight 1 pref medium +def check_static_route(duthost, prefix, nexthop_addrs, ipv6): + if ipv6: + SHOW_STATIC_ROUTE_CMD = "ip -6 route show {}".format(prefix) + else: + SHOW_STATIC_ROUTE_CMD = "ip route show {}".format(prefix) + output = duthost.shell(SHOW_STATIC_ROUTE_CMD, module_ignore_errors=True)[ + "stdout" + ].split("\n") + + def _check_nh_in_output(nexthop): + for line in output: + if nexthop in line: + return True + return False + + check_result = True + for nh in nexthop_addrs: + if not _check_nh_in_output(nh): + check_result = False + + assert check_result, "config static route: {} nexthop {}\nreal:\n{}".format( + prefix, ",".join(nexthop_addrs), output + ) + + +def run_static_route_test( + duthost, + unselected_duthost, + ptfadapter, + ptfhost, + tbinfo, + prefix, + nexthop_addrs, + prefix_len, + nexthop_devs, + nexthop_interfaces, + ipv6=False, + config_reload_test=False, +): + is_dual_tor = False + if "dualtor" in tbinfo["topo"]["name"] and unselected_duthost is not None: + is_dual_tor = True -def run_static_route_test(duthost, ptfadapter, ptfhost, tbinfo, prefix, nexthop_addrs, prefix_len, nexthop_devs, nexthop_interfaces, ipv6=False, config_reload_test=False): # Clean up arp or ndp clear_arp_ndp(duthost, ipv6=ipv6) + if is_dual_tor: + clear_arp_ndp(unselected_duthost, ipv6=ipv6) # Add ipaddresses in ptf - add_ipaddr(ptfadapter, ptfhost, nexthop_addrs, prefix_len, nexthop_interfaces, ipv6=ipv6) + add_ipaddr( + ptfadapter, ptfhost, nexthop_addrs, prefix_len, nexthop_interfaces, ipv6=ipv6 + ) try: # Add static route - duthost.shell("sonic-db-cli CONFIG_DB hmset 'STATIC_ROUTE|{}' nexthop {}".format(prefix, ",".join(nexthop_addrs))) + duthost.shell( + "sonic-db-cli CONFIG_DB hmset 'STATIC_ROUTE|{}' nexthop {}".format( + prefix, ",".join(nexthop_addrs) + ) + ) + if is_dual_tor: + unselected_duthost.shell( + "sonic-db-cli CONFIG_DB hmset 'STATIC_ROUTE|{}' nexthop {}".format( + prefix, ",".join(nexthop_addrs) + ) + ) + time.sleep(5) + # check if the static route in kernel is what we expect + check_static_route(duthost, prefix, nexthop_addrs, ipv6=ipv6) + + # try to refresh arp entry before traffic testing to improve stability + for nexthop_addr in nexthop_addrs: + duthost.shell( + "timeout 1 ping -c 1 -w 1 {}".format(nexthop_addr), + module_ignore_errors=True, + ) + # Check traffic get forwarded to the nexthop ip_dst = str(ipaddress.ip_network(unicode(prefix))[1]) - generate_and_verify_traffic(duthost, ptfadapter, tbinfo, ip_dst, nexthop_devs, ipv6=ipv6) + generate_and_verify_traffic( + duthost, ptfadapter, tbinfo, ip_dst, nexthop_devs, ipv6=ipv6 + ) # Check the route is advertised to the neighbors check_route_redistribution(duthost, prefix, ipv6) # Config save and reload if specified if config_reload_test: - duthost.shell('config save -y') - config_reload(duthost, wait=350) - #FIXME: We saw re-establishing BGP sessions can takes around 7 minutes + # config reload on active tor + duthost.shell("config save -y") + if duthost.facts["platform"] == "x86_64-cel_e1031-r0": + config_reload(duthost, wait=500) + else: + config_reload(duthost, wait=450) + # FIXME: We saw re-establishing BGP sessions can takes around 7 minutes # on some devices (like 4600) after config reload, so we need below patch wait_all_bgp_up(duthost) - generate_and_verify_traffic(duthost, ptfadapter, tbinfo, ip_dst, nexthop_devs, ipv6=ipv6) + for nexthop_addr in nexthop_addrs: + duthost.shell( + "timeout 1 ping -c 1 -w 1 {}".format(nexthop_addr), + module_ignore_errors=True, + ) + generate_and_verify_traffic( + duthost, ptfadapter, tbinfo, ip_dst, nexthop_devs, ipv6=ipv6 + ) check_route_redistribution(duthost, prefix, ipv6) finally: # Remove static route - duthost.shell("sonic-db-cli CONFIG_DB del 'STATIC_ROUTE|{}'".format(prefix), module_ignore_errors=True) + duthost.shell( + "sonic-db-cli CONFIG_DB del 'STATIC_ROUTE|{}'".format(prefix), + module_ignore_errors=True, + ) + if is_dual_tor: + unselected_duthost.shell( + "sonic-db-cli CONFIG_DB del 'STATIC_ROUTE|{}'".format(prefix), + module_ignore_errors=True, + ) # Delete ipaddresses in ptf del_ipaddr(ptfhost, nexthop_addrs, prefix_len, nexthop_devs, ipv6=ipv6) @@ -193,16 +328,20 @@ def run_static_route_test(duthost, ptfadapter, ptfhost, tbinfo, prefix, nexthop_ # Config save if the saved config_db was updated if config_reload_test: - duthost.shell('config save -y') + duthost.shell("config save -y") + if is_dual_tor: + unselected_duthost.shell("config save -y") # Clean up arp or ndp clear_arp_ndp(duthost, ipv6=ipv6) + if is_dual_tor: + clear_arp_ndp(unselected_duthost, ipv6=ipv6) def get_nexthops(duthost, tbinfo, ipv6=False, count=1): mg_facts = duthost.get_extended_minigraph_facts(tbinfo) - vlan_intf = mg_facts['minigraph_vlan_interfaces'][1 if ipv6 else 0] - prefix_len = vlan_intf['prefixlen'] + vlan_intf = mg_facts["minigraph_vlan_interfaces"][1 if ipv6 else 0] + prefix_len = vlan_intf["prefixlen"] is_backend_topology = mg_facts.get(constants.IS_BACKEND_TOPOLOGY_KEY, False) if is_dualtor(tbinfo): @@ -213,51 +352,149 @@ def get_nexthops(duthost, tbinfo, ipv6=False, count=1): nexthop_addrs = [server_ips[_][server_ip_key].split("/")[0] for _ in vlan_intfs] nexthop_interfaces = nexthop_devs else: - vlan_subnet = ipaddress.ip_network(vlan_intf['subnet']) - vlan = mg_facts['minigraph_vlans'][mg_facts['minigraph_vlan_interfaces'][1 if ipv6 else 0]['attachto']] - vlan_ports = vlan['members'] - vlan_id = vlan['vlanid'] - vlan_ptf_ports = [mg_facts['minigraph_ptf_indices'][port] for port in vlan_ports] + vlan_subnet = ipaddress.ip_network(vlan_intf["subnet"]) + vlan = mg_facts["minigraph_vlans"][ + mg_facts["minigraph_vlan_interfaces"][1 if ipv6 else 0]["attachto"] + ] + vlan_ports = vlan["members"] + vlan_id = vlan["vlanid"] + vlan_ptf_ports = [ + mg_facts["minigraph_ptf_indices"][port] for port in vlan_ports + ] nexthop_devs = vlan_ptf_ports # backend topology use ethx.x(e.g. eth30.1000) during servers and T0 in ptf # in other topology use ethx(e.g. eth30) if is_backend_topology: - nexthop_interfaces = [str(dev) + constants.VLAN_SUB_INTERFACE_SEPARATOR + str(vlan_id) for dev in nexthop_devs] + nexthop_interfaces = [ + str(dev) + constants.VLAN_SUB_INTERFACE_SEPARATOR + str(vlan_id) + for dev in nexthop_devs + ] else: nexthop_interfaces = nexthop_devs nexthop_addrs = [str(vlan_subnet[i + 2]) for i in range(len(nexthop_devs))] count = min(count, len(nexthop_devs)) indices = random.sample(list(range(len(nexthop_devs))), k=count) - return prefix_len, [nexthop_addrs[_] for _ in indices], [nexthop_devs[_] for _ in indices], [nexthop_interfaces[_] for _ in indices] - - -def test_static_route(rand_selected_dut, ptfadapter, ptfhost, tbinfo, toggle_all_simulator_ports_to_rand_selected_tor_m): + return ( + prefix_len, + [nexthop_addrs[_] for _ in indices], + [nexthop_devs[_] for _ in indices], + [nexthop_interfaces[_] for _ in indices], + ) + + +def test_static_route( + rand_selected_dut, + rand_unselected_dut, + ptfadapter, + ptfhost, + tbinfo, + toggle_all_simulator_ports_to_rand_selected_tor_m, +): duthost = rand_selected_dut + unselected_duthost = rand_unselected_dut skip_201911_and_older(duthost) - prefix_len, nexthop_addrs, nexthop_devs, nexthop_interfaces = get_nexthops(duthost, tbinfo) - run_static_route_test(duthost, ptfadapter, ptfhost, tbinfo, "1.1.1.0/24", - nexthop_addrs, prefix_len, nexthop_devs, nexthop_interfaces) - - -def test_static_route_ecmp(rand_selected_dut, ptfadapter, ptfhost, tbinfo, toggle_all_simulator_ports_to_rand_selected_tor_m): + prefix_len, nexthop_addrs, nexthop_devs, nexthop_interfaces = get_nexthops( + duthost, tbinfo + ) + run_static_route_test( + duthost, + unselected_duthost, + ptfadapter, + ptfhost, + tbinfo, + "1.1.1.0/24", + nexthop_addrs, + prefix_len, + nexthop_devs, + nexthop_interfaces, + ) + + +@pytest.mark.disable_loganalyzer +def test_static_route_ecmp( + rand_selected_dut, + rand_unselected_dut, + ptfadapter, + ptfhost, + tbinfo, + toggle_all_simulator_ports_to_rand_selected_tor_m, +): duthost = rand_selected_dut + unselected_duthost = rand_unselected_dut skip_201911_and_older(duthost) - prefix_len, nexthop_addrs, nexthop_devs, nexthop_interfaces = get_nexthops(duthost, tbinfo, count=3) - run_static_route_test(duthost, ptfadapter, ptfhost, tbinfo, "2.2.2.0/24", - nexthop_addrs, prefix_len, nexthop_devs, nexthop_interfaces, config_reload_test=True) - - -def test_static_route_ipv6(rand_selected_dut, ptfadapter, ptfhost, tbinfo, toggle_all_simulator_ports_to_rand_selected_tor_m): + prefix_len, nexthop_addrs, nexthop_devs, nexthop_interfaces = get_nexthops( + duthost, tbinfo, count=3 + ) + run_static_route_test( + duthost, + unselected_duthost, + ptfadapter, + ptfhost, + tbinfo, + "2.2.2.0/24", + nexthop_addrs, + prefix_len, + nexthop_devs, + nexthop_interfaces, + config_reload_test=True, + ) + + +def test_static_route_ipv6( + rand_selected_dut, + rand_unselected_dut, + ptfadapter, + ptfhost, + tbinfo, + toggle_all_simulator_ports_to_rand_selected_tor_m, +): duthost = rand_selected_dut + unselected_duthost = rand_unselected_dut skip_201911_and_older(duthost) - prefix_len, nexthop_addrs, nexthop_devs, nexthop_interfaces = get_nexthops(duthost, tbinfo, ipv6=True) - run_static_route_test(duthost, ptfadapter, ptfhost, tbinfo, "2000:1::/64", - nexthop_addrs, prefix_len, nexthop_devs, nexthop_interfaces, ipv6=True) - - -def test_static_route_ecmp_ipv6(rand_selected_dut, ptfadapter, ptfhost, tbinfo, toggle_all_simulator_ports_to_rand_selected_tor_m): + prefix_len, nexthop_addrs, nexthop_devs, nexthop_interfaces = get_nexthops( + duthost, tbinfo, ipv6=True + ) + run_static_route_test( + duthost, + unselected_duthost, + ptfadapter, + ptfhost, + tbinfo, + "2000:1::/64", + nexthop_addrs, + prefix_len, + nexthop_devs, + nexthop_interfaces, + ipv6=True, + ) + + +@pytest.mark.disable_loganalyzer +def test_static_route_ecmp_ipv6( + rand_selected_dut, + rand_unselected_dut, + ptfadapter, + ptfhost, + tbinfo, + toggle_all_simulator_ports_to_rand_selected_tor_m, +): duthost = rand_selected_dut + unselected_duthost = rand_unselected_dut skip_201911_and_older(duthost) - prefix_len, nexthop_addrs, nexthop_devs, nexthop_interfaces = get_nexthops(duthost, tbinfo, ipv6=True, count=3) - run_static_route_test(duthost, ptfadapter, ptfhost, tbinfo, "2000:2::/64", - nexthop_addrs, prefix_len, nexthop_devs, nexthop_interfaces, ipv6=True, config_reload_test=True) + prefix_len, nexthop_addrs, nexthop_devs, nexthop_interfaces = get_nexthops( + duthost, tbinfo, ipv6=True, count=3 + ) + run_static_route_test( + duthost, + unselected_duthost, + ptfadapter, + ptfhost, + tbinfo, + "2000:2::/64", + nexthop_addrs, + prefix_len, + nexthop_devs, + nexthop_interfaces, + ipv6=True, + config_reload_test=True, + ) diff --git a/tests/run_tests.sh b/tests/run_tests.sh index 569670e04ad..ebe7ced1543 100755 --- a/tests/run_tests.sh +++ b/tests/run_tests.sh @@ -45,14 +45,14 @@ function get_dut_from_testbed_file() { DUT_NAME=${ARRAY[9]//[\[\] ]/} elif [[ $TESTBED_FILE == *.yaml ]]; then - content=$(python -c "from __future__ import print_function; import yaml; print('+'.join(str(tb) for tb in yaml.safe_load(open('$TESTBED_FILE')) if '$TESTBED_NAME'==tb['conf-name']))") + content=$(python2 -c "from __future__ import print_function; import yaml; print('+'.join(str(tb) for tb in yaml.safe_load(open('$TESTBED_FILE')) if '$TESTBED_NAME'==tb['conf-name']))") if [[ -z ${content} ]]; then echo "Unable to find testbed '$TESTBED_NAME' in testbed file '$TESTBED_FILE'" show_help_and_exit 4 fi IFS=$'+' read -r -a tb_lines <<< $content tb_line=${tb_lines[0]} - DUT_NAME=$(python -c "from __future__ import print_function; tb=eval(\"$tb_line\"); print(\",\".join(tb[\"dut\"]))") + DUT_NAME=$(python2 -c "from __future__ import print_function; tb=eval(\"$tb_line\"); print(\",\".join(tb[\"dut\"]))") fi fi } @@ -110,7 +110,7 @@ function setup_environment() export ANSIBLE_CONFIG=${BASE_PATH}/ansible export ANSIBLE_LIBRARY=${BASE_PATH}/ansible/library/ export ANSIBLE_CONNECTION_PLUGINS=${BASE_PATH}/ansible/plugins/connection - export ANSIBLE_CLICONF_PLUGINS=${BASE_PATH}/ansible/cliconf_plugins + export ANSIBLE_CLICONF_PLUGINS=${BASE_PATH}/ansible/cliconf_plugins export ANSIBLE_TERMINAL_PLUGINS=${BASE_PATH}/ansible/terminal_plugins # Kill pytest and ansible-playbook process @@ -130,7 +130,7 @@ function setup_test_options() # for the scenario of specifying test scripts using pattern like `subfolder/test_*.py`. The pattern will be # expanded to matched test scripts by bash. Among the expanded scripts, we may want to skip a few. Then we can # explicitly specify the script to be skipped. - ignores=$(python -c "print '|'.join('''$SKIP_FOLDERS'''.split())") + ignores=$(python2 -c "print '|'.join('''$SKIP_FOLDERS'''.split())") if [[ -z ${TEST_CASES} ]]; then # When TEST_CASES is not specified, find all the possible scripts, ignore the scripts under $SKIP_FOLDERS all_scripts=$(find ./ -name 'test_*.py' | sed s:^./:: | grep -vE "^(${ignores})") @@ -143,18 +143,18 @@ function setup_test_options() fi # Ignore the scripts specified in $SKIP_SCRIPTS if [[ x"${TEST_INPUT_ORDER}" == x"True" ]]; then - TEST_CASES=$(python -c "print '\n'.join([testcase for testcase in list('''$all_scripts'''.split()) if testcase not in set('''$SKIP_SCRIPTS'''.split())])") + TEST_CASES=$(python2 -c "print '\n'.join([testcase for testcase in list('''$all_scripts'''.split()) if testcase not in set('''$SKIP_SCRIPTS'''.split())])") else - TEST_CASES=$(python -c "print '\n'.join(set('''$all_scripts'''.split()) - set('''$SKIP_SCRIPTS'''.split()))" | sort) + TEST_CASES=$(python2 -c "print '\n'.join(set('''$all_scripts'''.split()) - set('''$SKIP_SCRIPTS'''.split()))" | sort) fi # Check against $INCLUDE_FOLDERS, filter out test cases not in the specified folders FINAL_CASES="" - includes=$(python -c "print '|'.join('''$INCLUDE_FOLDERS'''.split())") + includes=$(python2 -c "print '|'.join('''$INCLUDE_FOLDERS'''.split())") for test_case in ${TEST_CASES}; do FINAL_CASES="${FINAL_CASES} $(echo ${test_case} | grep -E "^(${includes})")" done - TEST_CASES=$(python -c "print '\n'.join('''${FINAL_CASES}'''.split())") + TEST_CASES=$(python2 -c "print '\n'.join('''${FINAL_CASES}'''.split())") if [[ -z $TEST_CASES ]]; then echo "No test case to run based on conditions of '-c', '-I' and '-S'. Please check..." @@ -253,25 +253,30 @@ function run_debug_tests() function prepare_dut() { echo "=== Preparing DUT for subsequent tests ===" - pytest ${PYTEST_UTIL_OPTS} ${PRET_LOGGING_OPTIONS} ${UTIL_TOPOLOGY_OPTIONS} ${EXTRA_PARAMETERS} -m pretest + echo Running: python2 -m pytest ${PYTEST_UTIL_OPTS} ${PRET_LOGGING_OPTIONS} ${UTIL_TOPOLOGY_OPTIONS} ${EXTRA_PARAMETERS} -m pretest + python2 -m pytest ${PYTEST_UTIL_OPTS} ${PRET_LOGGING_OPTIONS} ${UTIL_TOPOLOGY_OPTIONS} ${EXTRA_PARAMETERS} -m pretest } function cleanup_dut() { echo "=== Cleaning up DUT after tests ===" - pytest ${PYTEST_UTIL_OPTS} ${POST_LOGGING_OPTIONS} ${UTIL_TOPOLOGY_OPTIONS} ${EXTRA_PARAMETERS} -m posttest + echo Running python2 -m pytest ${PYTEST_UTIL_OPTS} ${POST_LOGGING_OPTIONS} ${UTIL_TOPOLOGY_OPTIONS} ${EXTRA_PARAMETERS} -m posttest + python2 -m pytest ${PYTEST_UTIL_OPTS} ${POST_LOGGING_OPTIONS} ${UTIL_TOPOLOGY_OPTIONS} ${EXTRA_PARAMETERS} -m posttest } function run_group_tests() { echo "=== Running tests in groups ===" - pytest ${TEST_CASES} ${PYTEST_COMMON_OPTS} ${TEST_LOGGING_OPTIONS} ${TEST_TOPOLOGY_OPTIONS} ${EXTRA_PARAMETERS} + echo Running python2 -m pytest ${TEST_CASES} ${PYTEST_COMMON_OPTS} ${TEST_LOGGING_OPTIONS} ${TEST_TOPOLOGY_OPTIONS} ${EXTRA_PARAMETERS} --cache-clear + python2 -m pytest ${TEST_CASES} ${PYTEST_COMMON_OPTS} ${TEST_LOGGING_OPTIONS} ${TEST_TOPOLOGY_OPTIONS} ${EXTRA_PARAMETERS} --cache-clear } function run_individual_tests() { EXIT_CODE=0 + CACHE_CLEAR="--cache-clear" + echo "=== Running tests individually ===" for test_script in ${TEST_CASES}; do if [[ x"${OMIT_FILE_LOG}" != x"True" ]]; then @@ -284,15 +289,27 @@ function run_individual_tests() TEST_LOGGING_OPTIONS="--log-file ${LOG_PATH}/${test_dir}/${test_name}.log --junitxml=${LOG_PATH}/${test_dir}/${test_name}.xml" fi - pytest ${test_script} ${PYTEST_COMMON_OPTS} ${TEST_LOGGING_OPTIONS} ${TEST_TOPOLOGY_OPTIONS} ${EXTRA_PARAMETERS} + echo Running python2 -m pytest ${test_script} ${PYTEST_COMMON_OPTS} ${TEST_LOGGING_OPTIONS} ${TEST_TOPOLOGY_OPTIONS} ${EXTRA_PARAMETERS} ${CACHE_CLEAR} + python2 -m pytest ${test_script} ${PYTEST_COMMON_OPTS} ${TEST_LOGGING_OPTIONS} ${TEST_TOPOLOGY_OPTIONS} ${EXTRA_PARAMETERS} ${CACHE_CLEAR} ret_code=$? + # Clear pytest cache for the first run + if [[ -n ${CACHE_CLEAR} ]]; then + CACHE_CLEAR="" + fi + # If test passed, no need to keep its log. if [ ${ret_code} -eq 0 ]; then if [[ x"${OMIT_FILE_LOG}" != x"True" && x"${RETAIN_SUCCESS_LOG}" == x"False" ]]; then rm -f ${LOG_PATH}/${test_dir}/${test_name}.log fi else + # rc 10 means pre-test sanity check failed, rc 12 means boths pre-test and post-test sanity check failed + if [ ${ret_code} -eq 10 ] || [ ${ret_code} -eq 12 ]; then + echo "=== Sanity check failed for $test_script. Skip rest of the scripts if there is any. ===" + return ${ret_code} + fi + EXIT_CODE=1 if [[ ${TEST_MAX_FAIL} != 0 ]]; then return ${EXIT_CODE} @@ -399,7 +416,10 @@ if [[ x"${TEST_METHOD}" != x"debug" && x"${BYPASS_UTIL}" == x"False" ]]; then echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" echo "!!!!! Prepare DUT failed, skip testing !!!!!" echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" - exit ${RESULT} + # exit with specific code 65 for pretest failed. + # user-defined exit codes is the range 64 - 113. + # nightly test pipeline can check this code to decide if fails pipeline. + exit 65 fi fi diff --git a/tests/saitests/sai_base_test.py b/tests/saitests/sai_base_test.py index 3c0fb1daa5a..13d7d42e9e9 100644 --- a/tests/saitests/sai_base_test.py +++ b/tests/saitests/sai_base_test.py @@ -15,6 +15,7 @@ from ptf import config import ptf.dataplane as dataplane import ptf.testutils as testutils +import socket ################################################################ # @@ -26,6 +27,10 @@ from thrift.transport import TSocket from thrift.transport import TTransport from thrift.protocol import TBinaryProtocol +import socket +import sys +import paramiko +from paramiko.ssh_exception import BadHostKeyException, AuthenticationException, SSHException interface_to_front_mapping = {} @@ -41,6 +46,7 @@ def setUp(self): server = self.test_params['server'] else: server = 'localhost' + self.server = server if self.test_params.has_key("port_map"): user_input = self.test_params['port_map'] @@ -73,6 +79,39 @@ def tearDown(self): BaseTest.tearDown(self) self.transport.close() + def exec_cmd_on_dut(self, hostname, username, password, cmd): + client = paramiko.SSHClient() + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + if isinstance(cmd, list): + cmd = ' '.join(cmd) + + stdOut = stdErr = [] + retValue = 1 + try: + client.connect(hostname, username=username, password=password, allow_agent=False) + si, so, se = client.exec_command(cmd, timeout=20) + stdOut = so.readlines() + stdErr = se.readlines() + retValue = 0 + except AuthenticationException as authenticationException: + sys.stderr.write('SSH Authentication failure with message: %s' % authenticationException) + except SSHException as sshException: + sys.stderr.write('SSH Command failed with message: %s' % sshException) + except BadHostKeyException as badHostKeyException: + sys.stderr.write('SSH Authentication failure with message: %s' % badHostKeyException) + except socket.timeout as e: + # The ssh session will timeout in case of a successful reboot + sys.stderr.write('Caught exception socket.timeout: {}, {}, {}'.format(repr(e), str(e), type(e))) + retValue = 255 + except Exception as e: + sys.stderr.write('Exception caught: {}, {}, type: {}'.format(repr(e), str(e), type(e))) + sys.stderr.write(sys.exc_info()) + finally: + client.close() + + return stdOut, stdErr, retValue + class ThriftInterfaceDataPlane(ThriftInterface): """ Root class that sets up the thrift interface and dataplane diff --git a/tests/saitests/sai_qos_tests.py b/tests/saitests/sai_qos_tests.py index 215bb936968..43666d6b374 100644 --- a/tests/saitests/sai_qos_tests.py +++ b/tests/saitests/sai_qos_tests.py @@ -9,13 +9,17 @@ import sai_base_test import operator import sys +import texttable import math from ptf.testutils import (ptf_ports, simple_arp_packet, send_packet, simple_tcp_packet, + simple_udp_packet, simple_qinq_tcp_packet, - simple_ip_packet) + simple_ip_packet, + simple_ipv4ip_packet, + port_to_tuple) from ptf.mask import Mask from switch import (switch_init, sai_thrift_create_scheduler_profile, @@ -29,6 +33,7 @@ sai_thrift_read_pg_shared_watermark, sai_thrift_read_buffer_pool_watermark, sai_thrift_read_headroom_pool_watermark, + sai_thrift_read_queue_occupancy, sai_thrift_port_tx_disable, sai_thrift_port_tx_enable) from switch_sai_thrift.ttypes import (sai_thrift_attribute_value_t, @@ -40,12 +45,55 @@ # The index number comes from the append order in sai_thrift_read_port_counters EGRESS_DROP = 0 INGRESS_DROP = 1 +PFC_PRIO_0 = 2 +PFC_PRIO_1 = 3 +PFC_PRIO_2 = 4 PFC_PRIO_3 = 5 PFC_PRIO_4 = 6 +PFC_PRIO_5 = 7 +PFC_PRIO_6 = 8 +PFC_PRIO_7 = 9 TRANSMITTED_OCTETS = 10 TRANSMITTED_PKTS = 11 INGRESS_PORT_BUFFER_DROP = 12 EGRESS_PORT_BUFFER_DROP = 13 +RECEIVED_PKTS = 14 +RECEIVED_NON_UC_PKTS = 15 +TRANSMITTED_NON_UC_PKTS = 16 +EGRESS_PORT_QLEN = 17 +port_counter_fields = ['OutDiscard', # SAI_PORT_STAT_IF_OUT_DISCARDS + 'InDiscard', # SAI_PORT_STAT_IF_IN_DISCARDS + 'Pfc0TxPkt', # SAI_PORT_STAT_PFC_0_TX_PKTS + 'Pfc1TxPkt', # SAI_PORT_STAT_PFC_1_TX_PKTS + 'Pfc2TxPkt', # SAI_PORT_STAT_PFC_2_TX_PKTS + 'Pfc3TxPkt', # SAI_PORT_STAT_PFC_3_TX_PKTS + 'Pfc4TxPkt', # SAI_PORT_STAT_PFC_4_TX_PKTS + 'Pfc5TxPkt', # SAI_PORT_STAT_PFC_5_TX_PKTS + 'Pfc6TxPkt', # SAI_PORT_STAT_PFC_6_TX_PKTS + 'Pfc7TxPkt', # SAI_PORT_STAT_PFC_7_TX_PKTS + 'OutOct', # SAI_PORT_STAT_IF_OUT_OCTETS + 'OutUcPkt', # SAI_PORT_STAT_IF_OUT_UCAST_PKTS + 'InDropPkt', # SAI_PORT_STAT_IN_DROPPED_PKTS + 'OutDropPkt', # SAI_PORT_STAT_OUT_DROPPED_PKTS + 'InUcPkt', # SAI_PORT_STAT_IF_IN_UCAST_PKTS + 'InNonUcPkt', # SAI_PORT_STAT_IF_IN_NON_UCAST_PKTS + 'OutNonUcPkt', # SAI_PORT_STAT_IF_OUT_NON_UCAST_PKTS + 'OutQlen'] # SAI_PORT_STAT_IF_OUT_QLEN + +queue_counter_field_template = 'Que{}Cnt' # SAI_QUEUE_STAT_PACKETS + +# sai_thrift_read_port_watermarks +queue_share_wm_field_template = 'Que{}ShareWm' # SAI_QUEUE_STAT_SHARED_WATERMARK_BYTES +pg_share_wm_field_template = 'Pg{}ShareWm' # SAI_INGRESS_PRIORITY_GROUP_STAT_SHARED_WATERMARK_BYTES +pg_headroom_wm_field_template = 'pg{}headroomWm' # SAI_INGRESS_PRIORITY_GROUP_STAT_XOFF_ROOM_WATERMARK_BYTES + +# sai_thrift_read_pg_counters +pg_counter_field_template = 'Pg{}Cnt' # SAI_INGRESS_PRIORITY_GROUP_STAT_PACKETS + +# sai_thrift_read_pg_drop_counters +pg_drop_field_template = 'Pg{}Drop' # SAI_INGRESS_PRIORITY_GROUP_STAT_DROPPED_PACKETS + + QUEUE_0 = 0 QUEUE_1 = 1 QUEUE_2 = 2 @@ -53,6 +101,7 @@ QUEUE_4 = 4 QUEUE_5 = 5 QUEUE_6 = 6 +QUEUE_7 = 7 PG_NUM = 8 QUEUE_NUM = 8 @@ -61,6 +110,113 @@ RELEASE_PORT_MAX_RATE = 0 ECN_INDEX_IN_HEADER = 53 # Fits the ptf hex_dump_buffer() parse function DSCP_INDEX_IN_HEADER = 52 # Fits the ptf hex_dump_buffer() parse function +COUNTER_MARGIN = 2 # Margin for counter CHECK + + +def read_ptf_counters(dataplane, port): + ptfdev, ptfport = port_to_tuple(port) + rx, tx = dataplane.get_counters(ptfdev, ptfport) + return [rx, tx] + + +def show_counter(counter_name, ptftest, asic_type, ports, current=None, base=None, indexes=None, banner=None, silent=False): + # counter_name counter_fields counter_query offset sai_thrift + counter_info = {'PortCnt' : [port_counter_fields, sai_thrift_read_port_counters, 0, True], + 'QueCnt' : [[queue_counter_field_template.format(i) for i in range(QUEUE_NUM)], sai_thrift_read_port_counters, 1, True], + 'QueShareWm' : [[queue_share_wm_field_template.format(i) for i in range(QUEUE_NUM)], sai_thrift_read_port_watermarks, 0, True], + 'PgShareWm' : [[pg_share_wm_field_template.format(i) for i in range(PG_NUM)], sai_thrift_read_port_watermarks, 1, True], + 'PgHeadroomWm' : [[pg_headroom_wm_field_template.format(i) for i in range(PG_NUM)], sai_thrift_read_port_watermarks, 2, True], + 'PgCnt' : [[pg_counter_field_template.format(i) for i in range(PG_NUM)], sai_thrift_read_pg_counters, None, True], + 'PgDrop' : [[pg_drop_field_template.format(i) for i in range(PG_NUM)], sai_thrift_read_pg_drop_counters, None, True], + 'PtfCnt' : [['rx', 'tx'], read_ptf_counters, None, False]} + if counter_name not in counter_info or ports == None: + return (None, None) + + counter_fields = counter_info[counter_name][0] + counter_query = counter_info[counter_name][1] + data_offset = counter_info[counter_name][2] + sai_thrift = counter_info[counter_name][3] + + num = len(counter_fields) + fields = counter_fields + if indexes != None: + fields = [counter_fields[fidx] for fidx in indexes] + + table = texttable.TextTable(['port'] + fields) + query_data = [] + for pidx, port in enumerate(ports): + if base != None: + data_base = base[pidx] if pidx < len(base) else [None] * num + table.add_row([str(port) + ' base'] + data_base if indexes == None else [str(port) + ' base'] + [data_base[fidx] for fidx in indexes]) + + data = None + if current != None: + data = current[pidx] if pidx < len(current) else [None] * num + else: + if sai_thrift: + data = counter_query(ptftest.client, port_list[port]) + else: + data = counter_query(ptftest.dataplane, port) + if data_offset != None: + data = data[data_offset] + query_data.append(data) + table.add_row([port] + data if indexes == None else [port] + [data[fidx] for fidx in indexes]) + if not silent: + sys.stderr.write('show counter {}{}\n{}\n'.format(counter_name, '' if banner == None else ' [' + banner + ']', table)) + return (query_data, table) + + +def show_stats(banner, ptftest, asic_type, ports, bases=None, silent=False): + results = [] + i = 0 + base = None if bases == None or i >= len(bases) else bases[i] + results.append(show_counter('PtfCnt', ptftest, asic_type, ports, current=None, base=base, indexes=None, banner=banner, silent=silent)[0]) + i += 1 + base = None if bases == None or i >= len(bases) else bases[i] + results.append(show_counter('PortCnt', ptftest, asic_type, ports, current=None, base=base, indexes=None, banner=banner, silent=silent)[0]) + i += 1 + base = None if bases == None or i >= len(bases) else bases[i] + results.append(show_counter('QueCnt', ptftest, asic_type, ports, current=None, base=base, indexes=None, banner=banner, silent=silent)[0]) + i += 1 + base = None if bases == None or i >= len(bases) else bases[i] + results.append(show_counter('QueShareWm', ptftest, asic_type, ports, current=None, base=base, indexes=None, banner=banner, silent=silent)[0]) + i += 1 + base = None if bases == None or i >= len(bases) else bases[i] + results.append(show_counter('PgShareWm', ptftest, asic_type, ports, current=None, base=base, indexes=None, banner=banner, silent=silent)[0]) + i += 1 + base = None if bases == None or i >= len(bases) else bases[i] + results.append(show_counter('PgHeadroomWm', ptftest, asic_type, ports, current=None, base=base, indexes=None, banner=banner, silent=silent)[0]) + i += 1 + base = None if bases == None or i >= len(bases) else bases[i] + results.append(show_counter('PgCnt', ptftest, asic_type, ports, current=None, base=base, indexes=None, banner=banner, silent=silent)[0]) + i += 1 + base = None if bases == None or i >= len(bases) else bases[i] + results.append(show_counter('PgDrop', ptftest, asic_type, ports, current=None, base=base, indexes=None, banner=banner, silent=silent)[0]) + return results + + +def check_leackout_compensation_support(asic, hwsku): + if 'broadcom' in asic.lower(): + return True + return False + + +def dynamically_compensate_leakout(thrift_client, counter_checker, check_port, check_field, base, ptf_test, compensate_port, compensate_pkt, max_retry): + prev = base + curr, _ = counter_checker(thrift_client, check_port) + leakout_num = curr[check_field] - prev[check_field] + retry = 0 + num = 0 + while leakout_num > 0 and retry < max_retry: + send_packet(ptf_test, compensate_port, compensate_pkt, leakout_num) + num += leakout_num + prev = curr + curr, _ = counter_checker(thrift_client, check_port) + leakout_num = curr[check_field] - prev[check_field] + retry += 1 + sys.stderr.write('Compensate {} packets to port {}, and retry {} times\n'.format(num, compensate_port, retry)) + return num + def construct_ip_pkt(pkt_len, dst_mac, src_mac, src_ip, dst_ip, dscp, src_vlan, **kwargs): ecn = kwargs.get('ecn', 1) @@ -97,7 +253,6 @@ def construct_ip_pkt(pkt_len, dst_mac, src_mac, src_ip, dst_ip, dscp, src_vlan, masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "chksum") masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "ttl") masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "len") - masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "len") if src_vlan is not None: masked_exp_pkt.set_do_not_care_scapy(scapy.Dot1Q, "vlan") return masked_exp_pkt @@ -146,6 +301,26 @@ def get_counter_names(sonic_version): return ingress_counters, egress_counters +def fill_leakout_plus_one(test_case, src_port_id, dst_port_id, pkt, queue, asic_type): + # Attempts to queue 1 packet while compensating for a varying packet leakout. + # Returns whether 1 packet was successfully enqueued. + if asic_type in ['cisco-8000']: + queue_counters_base = sai_thrift_read_queue_occupancy(test_case.client, dst_port_id) + max_packets = 500 + for packet_i in range(max_packets): + send_packet(test_case, src_port_id, pkt, 1) + queue_counters = sai_thrift_read_queue_occupancy(test_case.client, dst_port_id) + if queue_counters[queue] > queue_counters_base[queue]: + print >> sys.stderr, "fill_leakout_plus_one: Success, sent %d packets, queue occupancy bytes rose from %d to %d" % (packet_i + 1, queue_counters_base[queue], queue_counters[queue]) + return True + raise RuntimeError( + "fill_leakout_plus_one: Couldn't raise queue occupancy:" + "src_port:{}, dst_port_id:{}, pkt:{}, queue:{}".format( + src_port_id, dst_port_id, pkt.__repr__()[0:180], queue)) + else: + return False + + class ARPpopulate(sai_base_test.ThriftInterfaceDataPlane): def setUp(self): sai_base_test.ThriftInterfaceDataPlane.setUp(self) @@ -170,6 +345,8 @@ def setUp(self): self.dst_port_3_ip = self.test_params['dst_port_3_ip'] self.dst_port_3_mac = self.dataplane.get_mac(0, self.dst_port_3_id) self.dst_vlan_3 = self.test_params['dst_port_3_vlan'] + self.test_port_ids = self.test_params.get("testPortIds", None) + self.test_port_ips = self.test_params.get("testPortIps", None) def tearDown(self): sai_base_test.ThriftInterfaceDataPlane.tearDown(self) @@ -185,6 +362,13 @@ def runTest(self): send_packet(self, self.dst_port_2_id, arpreq_pkt) arpreq_pkt = construct_arp_pkt('ff:ff:ff:ff:ff:ff', self.dst_port_3_mac, 1, self.dst_port_3_ip, '192.168.0.1', '00:00:00:00:00:00', self.dst_vlan_3) send_packet(self, self.dst_port_3_id, arpreq_pkt) + + # ptf don't know the address of neighbor, use ping to learn relevant arp entries instead of send arp request + if self.test_port_ids and self.test_port_ips: + for portid in self.test_port_ids: + self.exec_cmd_on_dut(self.server, self.test_params['dut_username'], self.test_params['dut_password'], + 'ping -q -c 3 {}'.format(self.test_port_ips[portid]['peer_addr'])) + time.sleep(8) @@ -227,6 +411,7 @@ def get_port_id(self, port_name): def runTest(self): switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) router_mac = self.test_params['router_mac'] dst_port_id = int(self.test_params['dst_port_id']) @@ -235,16 +420,23 @@ def runTest(self): src_port_id = int(self.test_params['src_port_id']) src_port_ip = self.test_params['src_port_ip'] src_port_mac = self.dataplane.get_mac(0, src_port_id) + dual_tor_scenario = self.test_params.get('dual_tor_scenario', None) + dual_tor = self.test_params.get('dual_tor', None) + leaf_downstream = self.test_params.get('leaf_downstream', None) exp_ip_id = 101 exp_ttl = 63 pkt_dst_mac = router_mac if router_mac != '' else dst_port_mac print >> sys.stderr, "dst_port_id: %d, src_port_id: %d" % (dst_port_id, src_port_id) - # in case dst_port_id is part of LAG, find out the actual dst port - # for given IP parameters - dst_port_id = get_rx_port( - self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip - ) + try: + # in case dst_port_id is part of LAG, find out the actual dst port + # for given IP parameters + dst_port_id = get_rx_port( + self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip + ) + except: + show_stats(self.__class__.__name__ + ' no rx pkt', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + raise print >> sys.stderr, "actual dst_port_id: %d" % (dst_port_id) print >> sys.stderr, "dst_port_mac: %s, src_port_mac: %s, src_port_ip: %s, dst_port_ip: %s" % (dst_port_mac, src_port_mac, src_port_ip, dst_port_ip) print >> sys.stderr, "port list {}".format(port_list) @@ -289,6 +481,7 @@ def runTest(self): result = self.dataplane.poll(device_number=0, port_number=dst_port_id, timeout=3) if isinstance(result, self.dataplane.PollFailure): self.fail("Expected packet was not received on port %d. Total received: %d.\n%s" % (dst_port_id, cnt, result.format())) + recv_pkt = scapy.Ether(result.packet) cnt += 1 @@ -310,32 +503,57 @@ def runTest(self): port_results, queue_results = sai_thrift_read_port_counters(self.client, sai_dst_port_id) print >> sys.stderr, map(operator.sub, queue_results, queue_results_base) + # dual_tor_scenario: represents whether the device is deployed into a dual ToR scenario + # dual_tor: represents whether the source and destination ports are configured with additional lossless queues # According to SONiC configuration all dscp are classified to queue 1 except: - # dscp 8 -> queue 0 - # dscp 5 -> queue 2 - # dscp 3 -> queue 3 - # dscp 4 -> queue 4 - # dscp 46 -> queue 5 - # dscp 48 -> queue 6 - # So for the 64 pkts sent the mapping should be -> 58 queue 1, - # and 1 for queue0, queue2, queue3, queue4, queue5, and queue6 - # Check results - # LAG ports can have LACP packets on queue 0, hence using >= comparison - assert(queue_results[QUEUE_0] >= 1 + queue_results_base[QUEUE_0]) - assert(queue_results[QUEUE_1] == 58 + queue_results_base[QUEUE_1]) - assert(queue_results[QUEUE_2] == 1 + queue_results_base[QUEUE_2]) + # Normal scenario Dual ToR scenario Leaf router with separated DSCP_TO_TC_MAP + # All ports Normal ports Ports with additional lossless queues downstream (source is T2) upstream (source is T0) + # dscp 8 -> queue 0 queue 0 queue 0 queue 0 queue 0 + # dscp 5 -> queue 2 queue 1 queue 1 queue 1 queue 1 + # dscp 3 -> queue 3 queue 3 queue 3 queue 3 queue 3 + # dscp 4 -> queue 4 queue 4 queue 4 queue 4 queue 4 + # dscp 46 -> queue 5 queue 5 queue 5 queue 5 queue 5 + # dscp 48 -> queue 6 queue 7 queue 7 queue 7 queue 7 + # dscp 2 -> queue 1 queue 1 queue 2 queue 1 queue 2 + # dscp 6 -> queue 1 queue 1 queue 6 queue 1 queue 6 + # rest 56 dscps -> queue 1 + # So for the 64 pkts sent the mapping should be the following: + # queue 1 56 + 2 = 58 56 + 3 = 59 56 + 1 = 57 59 57 + # queue 2/6 1 0 1 0 0 + # queue 3/4 1 1 1 1 1 + # queue 5 1 1 1 1 1 + # queue 7 0 1 1 1 1 + assert(queue_results[QUEUE_0] == 1 + queue_results_base[QUEUE_0]) assert(queue_results[QUEUE_3] == 1 + queue_results_base[QUEUE_3]) assert(queue_results[QUEUE_4] == 1 + queue_results_base[QUEUE_4]) assert(queue_results[QUEUE_5] == 1 + queue_results_base[QUEUE_5]) - assert(queue_results[QUEUE_6] == 1 + queue_results_base[QUEUE_6]) + if dual_tor or (dual_tor_scenario == False) or (leaf_downstream == False): + assert(queue_results[QUEUE_2] == 1 + queue_results_base[QUEUE_2]) + assert(queue_results[QUEUE_6] == 1 + queue_results_base[QUEUE_6]) + else: + assert(queue_results[QUEUE_2] == queue_results_base[QUEUE_2]) + assert(queue_results[QUEUE_6] == queue_results_base[QUEUE_6]) + if dual_tor_scenario: + if (dual_tor == False) or leaf_downstream: + assert(queue_results[QUEUE_1] == 59 + queue_results_base[QUEUE_1]) + else: + assert(queue_results[QUEUE_1] == 57 + queue_results_base[QUEUE_1]) + # LAG ports can have LACP packets on queue 7, hence using >= comparison + assert(queue_results[QUEUE_7] >= 1 + queue_results_base[QUEUE_7]) + else: + assert(queue_results[QUEUE_1] == 58 + queue_results_base[QUEUE_1]) + # LAG ports can have LACP packets on queue 7, hence using >= comparison + assert(queue_results[QUEUE_7] >= queue_results_base[QUEUE_7]) finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) print >> sys.stderr, "END OF TEST" # DOT1P to queue mapping class Dot1pToQueueMapping(sai_base_test.ThriftInterfaceDataPlane): def runTest(self): switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) # Parse input parameters router_mac = self.test_params['router_mac'] @@ -442,12 +660,14 @@ def runTest(self): continue finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) print >> sys.stderr, "END OF TEST" # DSCP to pg mapping class DscpToPgMapping(sai_base_test.ThriftInterfaceDataPlane): def runTest(self): switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) # Parse input parameters router_mac = self.test_params['router_mac'] @@ -459,24 +679,35 @@ def runTest(self): src_port_id = int(self.test_params['src_port_id']) src_port_ip = self.test_params['src_port_ip'] src_port_mac = self.dataplane.get_mac(0, src_port_id) + dscp_to_pg_map = self.test_params.get('dscp_to_pg_map', None) + print >> sys.stderr, "dst_port_id: %d, src_port_id: %d" % (dst_port_id, src_port_id) print >> sys.stderr, "dst_port_mac: %s, src_port_mac: %s, src_port_ip: %s, dst_port_ip: %s" % (dst_port_mac, src_port_mac, src_port_ip, dst_port_ip) exp_ip_id = 100 exp_ttl = 63 - # According to SONiC configuration all dscps are classified to pg 0 except: - # dscp 3 -> pg 3 - # dscp 4 -> pg 4 - # So for the 64 pkts sent the mapping should be -> 62 pg 0, 1 for pg 3, and 1 for pg 4 - lossy_dscps = range(0, 64) - lossy_dscps.remove(3) - lossy_dscps.remove(4) - pg_dscp_map = { - 3 : [3], - 4 : [4], - 0 : lossy_dscps - } + if not dscp_to_pg_map: + # According to SONiC configuration all dscps are classified to pg 0 except: + # dscp 3 -> pg 3 + # dscp 4 -> pg 4 + # So for the 64 pkts sent the mapping should be -> 62 pg 0, 1 for pg 3, and 1 for pg 4 + lossy_dscps = list(range(0, 64)) + lossy_dscps.remove(3) + lossy_dscps.remove(4) + pg_dscp_map = { + 3: [3], + 4: [4], + 0: lossy_dscps + } + else: + pg_dscp_map = {} + for dscp, pg in dscp_to_pg_map.items(): + if pg in pg_dscp_map: + pg_dscp_map[int(pg)].append(int(dscp)) + else: + pg_dscp_map[int(pg)] = [int(dscp)] + print >> sys.stderr, pg_dscp_map try: @@ -535,12 +766,117 @@ def runTest(self): continue finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) print >> sys.stderr, "END OF TEST" + +# Tunnel DSCP to PG mapping test +class TunnelDscpToPgMapping(sai_base_test.ThriftInterfaceDataPlane): + + def _build_testing_pkt(self, active_tor_mac, standby_tor_mac, active_tor_ip, standby_tor_ip, inner_dscp, outer_dscp, dst_ip, ecn=1): + pkt = simple_tcp_packet( + eth_dst=standby_tor_mac, + ip_src='1.1.1.1', + ip_dst=dst_ip, + ip_dscp=inner_dscp, + ip_ecn=ecn, + ip_ttl=64 + ) + + ipinip_packet = simple_ipv4ip_packet( + eth_dst=active_tor_mac, + eth_src=standby_tor_mac, + ip_src=standby_tor_ip, + ip_dst=active_tor_ip, + ip_dscp=outer_dscp, + ip_ecn=ecn, + inner_frame=pkt[scapy.IP] + ) + return ipinip_packet + + def runTest(self): + """ + This test case is to tx some ip_in_ip packet from Mux tunnel, and check if the traffic is + mapped to expected PGs. + """ + switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) + + # Parse input parameters + active_tor_mac = self.test_params['active_tor_mac'] + active_tor_ip = self.test_params['active_tor_ip'] + standby_tor_mac = self.test_params['standby_tor_mac'] + standby_tor_ip = self.test_params['standby_tor_ip'] + src_port_id = self.test_params['src_port_id'] + dst_port_id = self.test_params['dst_port_id'] + dst_port_ip = self.test_params['dst_port_ip'] + + dscp_to_pg_map = self.test_params['inner_dscp_to_pg_map'] + asic_type = self.test_params['sonic_asic_type'] + cell_size = self.test_params['cell_size'] + PKT_NUM = 100 + # There is background traffic during test, so we need to add error tolerance to ignore such pakcets + ERROR_TOLERANCE = { + 0: 10, + 1: 0, + 2: 0, + 3: 0, + 4: 0, + 5: 0, + 6: 0, + 7: 0 + } + + try: + # Disable tx on EGRESS port so that headroom buffer cannot be free + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) + + # There are packet leak even port tx is disabled (18 packets leak on TD3 found) + # Hence we send some packet to fill the leak before testing + for dscp, _ in dscp_to_pg_map.items(): + pkt = self._build_testing_pkt( + active_tor_mac=active_tor_mac, + standby_tor_mac=standby_tor_mac, + active_tor_ip=active_tor_ip, + standby_tor_ip=standby_tor_ip, + inner_dscp=dscp, + outer_dscp=0, + dst_ip=dst_port_ip + ) + send_packet(self, src_port_id, pkt, 20) + time.sleep(10) + + for dscp, pg in dscp_to_pg_map.items(): + # Build and send packet to active tor. + # The inner DSCP is set to testing value, and the outer DSCP is set to 0 as it has no impact on remapping + pkt = self._build_testing_pkt( + active_tor_mac=active_tor_mac, + standby_tor_mac=standby_tor_mac, + active_tor_ip=active_tor_ip, + standby_tor_ip=standby_tor_ip, + inner_dscp=dscp, + outer_dscp=0, + dst_ip=dst_port_ip + ) + pg_shared_wm_res_base = sai_thrift_read_pg_shared_watermark(self.client, asic_type, port_list[src_port_id]) + send_packet(self, src_port_id, pkt, PKT_NUM) + # validate pg counters increment by the correct pkt num + time.sleep(8) + pg_shared_wm_res = sai_thrift_read_pg_shared_watermark(self.client, asic_type, port_list[src_port_id]) + + assert(pg_shared_wm_res[pg] - pg_shared_wm_res_base[pg] <= (PKT_NUM + ERROR_TOLERANCE[pg]) * cell_size) + assert(pg_shared_wm_res[pg] - pg_shared_wm_res_base[pg] >= (PKT_NUM - ERROR_TOLERANCE[pg]) * cell_size) + finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + # Enable tx on dest port + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) + + # DOT1P to pg mapping class Dot1pToPgMapping(sai_base_test.ThriftInterfaceDataPlane): def runTest(self): switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) # Parse input parameters router_mac = self.test_params['router_mac'] @@ -643,6 +979,7 @@ def runTest(self): continue finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) print >> sys.stderr, "END OF TEST" # This test is to measure the Xoff threshold, and buffer limit @@ -650,6 +987,7 @@ class PFCtest(sai_base_test.ThriftInterfaceDataPlane): def runTest(self): time.sleep(5) switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) # Parse input parameters dscp = int(self.test_params['dscp']) @@ -685,8 +1023,17 @@ def runTest(self): # Prepare IP packet data ttl = 64 - default_packet_length = 64 - pkt = construct_ip_pkt(default_packet_length, + if 'packet_size' in self.test_params.keys(): + packet_length = int(self.test_params['packet_size']) + else: + packet_length = 64 + + is_dualtor = self.test_params.get('is_dualtor', False) + def_vlan_mac = self.test_params.get('def_vlan_mac', None) + if is_dualtor and def_vlan_mac != None: + pkt_dst_mac = def_vlan_mac + + pkt = construct_ip_pkt(packet_length, pkt_dst_mac, src_port_mac, src_port_ip, @@ -699,17 +1046,21 @@ def runTest(self): print >> sys.stderr, "test dst_port_id: {}, src_port_id: {}, src_vlan: {}".format( dst_port_id, src_port_id, src_port_vlan ) - # in case dst_port_id is part of LAG, find out the actual dst port - # for given IP parameters - dst_port_id = get_rx_port( - self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip, src_port_vlan - ) + try: + # in case dst_port_id is part of LAG, find out the actual dst port + # for given IP parameters + dst_port_id = get_rx_port( + self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip, src_port_vlan + ) + except: + show_stats(self.__class__.__name__ + ' no rx pkt', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + raise print >> sys.stderr, "actual dst_port_id: {}".format(dst_port_id) # get a snapshot of counter values at recv and transmit ports # queue_counters value is not of our interest here - recv_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) - xmit_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + recv_counters_base, _ = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters_base, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) # Add slight tolerance in threshold characterization to consider # the case that cpu puts packets in the egress queue after we pause the egress # or the leak out is simply less than expected as we have occasionally observed @@ -725,40 +1076,45 @@ def runTest(self): sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) try: - # Since there is variability in packet leakout in hwsku Arista-7050CX3-32S-D48C8 and - # Arista-7050CX3-32S-C32. Starting with zero pkts_num_leak_out and trying to find - # actual leakout by sending packets and reading actual leakout from HW - if hwsku == 'Arista-7050CX3-32S-D48C8' or hwsku == 'Arista-7050CX3-32S-C32' or hwsku == 'DellEMC-Z9332f-M-O16C64' or hwsku == 'DellEMC-Z9332f-O32': - pkts_num_leak_out = 0 + # Since there is variability in packet leakout in hwsku Arista-7050CX3-32S-D48C8 and + # Arista-7050CX3-32S-C32. Starting with zero pkts_num_leak_out and trying to find + # actual leakout by sending packets and reading actual leakout from HW. + # And apply dynamically compensation to all device using Broadcom ASIC. + if check_leackout_compensation_support(asic_type, hwsku): + pkts_num_leak_out = 0 # send packets short of triggering pfc if hwsku == 'DellEMC-Z9332f-M-O16C64' or hwsku == 'DellEMC-Z9332f-O32': # send packets short of triggering pfc send_packet(self, src_port_id, pkt, pkts_num_egr_mem + pkts_num_leak_out + pkts_num_trig_pfc - 1 - margin) + elif 'cisco-8000' in asic_type: + fill_leakout_plus_one(self, src_port_id, dst_port_id, pkt, int(self.test_params['pg']), asic_type) + # Send 1 less packet due to leakout filling + send_packet(self, src_port_id, pkt, pkts_num_leak_out + pkts_num_trig_pfc - 2 - margin) else: # send packets short of triggering pfc send_packet(self, src_port_id, pkt, pkts_num_leak_out + pkts_num_trig_pfc - 1 - margin) # allow enough time for the dut to sync up the counter values in counters_db time.sleep(8) - - if hwsku == 'Arista-7050CX3-32S-D48C8' or hwsku == 'Arista-7050CX3-32S-C32' or hwsku == 'DellEMC-Z9332f-M-O16C64' or hwsku == 'DellEMC-Z9332f-O32': - xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) - actual_pkts_num_leak_out = xmit_counters[TRANSMITTED_PKTS] - xmit_counters_base[TRANSMITTED_PKTS] - send_packet(self, src_port_id, pkt, actual_pkts_num_leak_out) + + if check_leackout_compensation_support(asic_type, hwsku): + dynamically_compensate_leakout(self.client, sai_thrift_read_port_counters, port_list[dst_port_id], TRANSMITTED_PKTS, xmit_counters_base, self, src_port_id, pkt, 10) # get a snapshot of counter values at recv and transmit ports # queue counters value is not of our interest here - recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) - xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + recv_counters, _ = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + test_stage = 'after send packets short of triggering PFC' + sys.stderr.write('{}:\n\trecv_counters {}\n\trecv_counters_base {}\n\txmit_counters {}\n\txmit_counters_base {}\n'.format(test_stage, recv_counters, recv_counters_base, xmit_counters, xmit_counters_base)) # recv port no pfc - assert(recv_counters[pg] == recv_counters_base[pg]) + assert(recv_counters[pg] == recv_counters_base[pg]), 'unexpectedly PFC counter increase, {}'.format(test_stage) # recv port no ingress drop for cntr in ingress_counters: - assert(recv_counters[cntr] == recv_counters_base[cntr]) + assert(recv_counters[cntr] == recv_counters_base[cntr]), 'unexpectedly RX drop counter increase, {}'.format(test_stage) # xmit port no egress drop for cntr in egress_counters: - assert(xmit_counters[cntr] == xmit_counters_base[cntr]) + assert(xmit_counters[cntr] == xmit_counters_base[cntr]), 'unexpectedly TX drop counter increase, {}'.format(test_stage) # send 1 packet to trigger pfc send_packet(self, src_port_id, pkt, 1 + 2 * margin) @@ -767,16 +1123,18 @@ def runTest(self): # get a snapshot of counter values at recv and transmit ports # queue counters value is not of our interest here recv_counters_base = recv_counters - recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) - xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + recv_counters, _ = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + test_stage = 'after send a few packets to trigger PFC' + sys.stderr.write('{}:\n\trecv_counters {}\n\trecv_counters_base {}\n\txmit_counters {}\n\txmit_counters_base {}\n'.format(test_stage, recv_counters, recv_counters_base, xmit_counters, xmit_counters_base)) # recv port pfc - assert(recv_counters[pg] > recv_counters_base[pg]) + assert(recv_counters[pg] > recv_counters_base[pg]), 'unexpectedly PFC counter not increase, {}'.format(test_stage) # recv port no ingress drop for cntr in ingress_counters: - assert(recv_counters[cntr] == recv_counters_base[cntr]) + assert(recv_counters[cntr] == recv_counters_base[cntr]), 'unexpectedly RX drop counter increase, {}'.format(test_stage) # xmit port no egress drop for cntr in egress_counters: - assert(xmit_counters[cntr] == xmit_counters_base[cntr]) + assert(xmit_counters[cntr] == xmit_counters_base[cntr]), 'unexpectedly TX drop counter increase, {}'.format(test_stage) # send packets short of ingress drop send_packet(self, src_port_id, pkt, pkts_num_trig_ingr_drp - pkts_num_trig_pfc - 1 - 2 * margin) @@ -785,16 +1143,18 @@ def runTest(self): # get a snapshot of counter values at recv and transmit ports # queue counters value is not of our interest here recv_counters_base = recv_counters - recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) - xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + recv_counters, _ = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + test_stage = 'after send packets short of ingress drop' + sys.stderr.write('{}:\n\trecv_counters {}\n\trecv_counters_base {}\n\txmit_counters {}\n\txmit_counters_base {}\n'.format(test_stage, recv_counters, recv_counters_base, xmit_counters, xmit_counters_base)) # recv port pfc - assert(recv_counters[pg] > recv_counters_base[pg]) + assert(recv_counters[pg] > recv_counters_base[pg]), 'unexpectedly PFC counter not increase, {}'.format(test_stage) # recv port no ingress drop for cntr in ingress_counters: - assert(recv_counters[cntr] == recv_counters_base[cntr]) + assert(recv_counters[cntr] == recv_counters_base[cntr]), 'unexpectedly RX drop counter increase, {}'.format(test_stage) # xmit port no egress drop for cntr in egress_counters: - assert(xmit_counters[cntr] == xmit_counters_base[cntr]) + assert(xmit_counters[cntr] == xmit_counters_base[cntr]), 'unexpectedly TX drop counter increase, {}'.format(test_stage) # send 1 packet to trigger ingress drop send_packet(self, src_port_id, pkt, 1 + 2 * margin) @@ -803,16 +1163,18 @@ def runTest(self): # get a snapshot of counter values at recv and transmit ports # queue counters value is not of our interest here recv_counters_base = recv_counters - recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) - xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + recv_counters, _ = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + test_stage = 'after send a few packets to trigger drop' + sys.stderr.write('{}:\n\trecv_counters {}\n\trecv_counters_base {}\n\txmit_counters {}\n\txmit_counters_base {}\n'.format(test_stage, recv_counters, recv_counters_base, xmit_counters, xmit_counters_base)) # recv port pfc - assert(recv_counters[pg] > recv_counters_base[pg]) + assert(recv_counters[pg] > recv_counters_base[pg]), 'unexpectedly PFC counter not increase, {}'.format(test_stage) # recv port ingress drop for cntr in ingress_counters: - assert(recv_counters[cntr] > recv_counters_base[cntr]) + assert(recv_counters[cntr] > recv_counters_base[cntr]), 'unexpectedly RX drop counter not increase, {}'.format(test_stage) # xmit port no egress drop for cntr in egress_counters: - assert(xmit_counters[cntr] == xmit_counters_base[cntr]) + assert(xmit_counters[cntr] == xmit_counters_base[cntr]), 'unexpectedly TX drop counter increase, {}'.format(test_stage) if '201811' not in sonic_version and 'mellanox' in asic_type: pg_dropped_cntrs = sai_thrift_read_pg_drop_counters(self.client, port_list[src_port_id]) @@ -821,8 +1183,348 @@ def runTest(self): assert pg_dropped_cntrs[dscp] > pg_dropped_cntrs_old[dscp] finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) + + +class LosslessVoq(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + time.sleep(5) + switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) + + # Parse input parameters + dscp = int(self.test_params['dscp']) + ecn = int(self.test_params['ecn']) + router_mac = self.test_params['router_mac'] + sonic_version = self.test_params['sonic_version'] + # The pfc counter index starts from index 2 in sai_thrift_read_port_counters + pg = int(self.test_params['pg']) + 2 + dst_port_id = int(self.test_params['dst_port_id']) + dst_port_ip = self.test_params['dst_port_ip'] + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + src_port_1_id = int(self.test_params['src_port_1_id']) + src_port_1_ip = self.test_params['src_port_1_ip'] + src_port_1_mac = self.dataplane.get_mac(0, src_port_1_id) + src_port_2_id = int(self.test_params['src_port_2_id']) + src_port_2_ip = self.test_params['src_port_2_ip'] + src_port_2_mac = self.dataplane.get_mac(0, src_port_2_id) + num_of_flows = self.test_params['num_of_flows'] + asic_type = self.test_params['sonic_asic_type'] + pkts_num_leak_out = int(self.test_params['pkts_num_leak_out']) + pkts_num_trig_pfc = int(self.test_params['pkts_num_trig_pfc']) + + pkt_dst_mac = router_mac if router_mac != '' else dst_port_mac + # get counter names to query + ingress_counters, egress_counters = get_counter_names(sonic_version) + + # Prepare IP packet data + ttl = 64 + if 'packet_size' in self.test_params.keys(): + packet_length = int(self.test_params['packet_size']) + else: + packet_length = 64 + pkt = simple_udp_packet(pktlen=packet_length, + eth_dst=pkt_dst_mac, + eth_src=src_port_1_mac, + ip_src=src_port_1_ip, + ip_dst=dst_port_ip, + ip_tos=((dscp << 2) | ecn), + udp_sport=1024, + udp_dport=2048, + ip_ecn=ecn, + ip_ttl=ttl) + + pkt3 = simple_udp_packet(pktlen=packet_length, + eth_dst=pkt_dst_mac, + eth_src=src_port_2_mac, + ip_src=src_port_2_ip, + ip_dst=dst_port_ip, + ip_tos=((dscp << 2) | ecn), + udp_sport=1024, + udp_dport=2050, + ip_ecn=ecn, + ip_ttl=ttl) + + if num_of_flows == "multiple": + pkt2 = simple_udp_packet(pktlen=packet_length, + eth_dst=pkt_dst_mac, + eth_src=src_port_1_mac, + ip_src=src_port_1_ip, + ip_dst=dst_port_ip, + ip_tos=((dscp << 2) | ecn), + udp_sport=1024, + udp_dport=2049, + ip_ecn=ecn, + ip_ttl=ttl) + + pkt4 = simple_udp_packet(pktlen=packet_length, + eth_dst=pkt_dst_mac, + eth_src=src_port_2_mac, + ip_src=src_port_2_ip, + ip_dst=dst_port_ip, + ip_tos=((dscp << 2) | ecn), + udp_sport=1024, + udp_dport=2051, + ip_ecn=ecn, + ip_ttl=ttl) + + print >> sys.stderr, "test dst_port_id: {}, src_port_1_id: {}".format( + dst_port_id, src_port_1_id + ) + try: + # in case dst_port_id is part of LAG, find out the actual dst port + # for given IP parameters + dst_port_id = get_rx_port( + self, 0, src_port_1_id, pkt_dst_mac, dst_port_ip, src_port_1_ip + ) + except: + show_stats(self.__class__.__name__ + ' no rx pkt', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + raise + print >> sys.stderr, "actual dst_port_id: {}".format(dst_port_id) + + # get a snapshot of counter values at recv and transmit ports + recv_counters_base1, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_1_id]) + recv_counters_base2, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_2_id]) + xmit_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + # Add slight tolerance in threshold characterization to consider + # the case that cpu puts packets in the egress queue after we pause the egress + # or the leak out is simply less than expected as we have occasionally observed + if 'pkts_num_margin' in self.test_params.keys(): + margin = int(self.test_params['pkts_num_margin']) + else: + margin = 2 + + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) + + try: + fill_leakout_plus_one(self, src_port_1_id, dst_port_id, pkt, int(self.test_params['pg']), asic_type) + # send packets short of triggering pfc + # Send 1 less packet due to leakout filling + if num_of_flows == 'multiple': + send_packet(self, src_port_1_id, pkt, pkts_num_leak_out + pkts_num_trig_pfc/2 - 2 - margin) + send_packet(self, src_port_1_id, pkt2, pkts_num_leak_out + pkts_num_trig_pfc/2 - 2 - margin) + send_packet(self, src_port_2_id, pkt3, pkts_num_leak_out + pkts_num_trig_pfc/2 - 2 - margin) + send_packet(self, src_port_2_id, pkt4, pkts_num_leak_out + pkts_num_trig_pfc/2 - 2 - margin) + else: + send_packet(self, src_port_1_id, pkt, pkts_num_leak_out + pkts_num_trig_pfc - 2 - margin) + send_packet(self, src_port_2_id, pkt3, pkts_num_leak_out + pkts_num_trig_pfc - 2 - margin) + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + + # get a snapshot of counter values at recv and transmit ports + # queue counters value is not of our interest here + recv_counters1, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_1_id]) + recv_counters2, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_2_id]) + xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + # recv port no pfc + assert(recv_counters1[pg] == recv_counters_base1[pg]) + assert(recv_counters2[pg] == recv_counters_base2[pg]) + # recv port no ingress drop + for cntr in ingress_counters: + assert(recv_counters1[cntr] == recv_counters_base1[cntr]) + assert(recv_counters2[cntr] == recv_counters_base2[cntr]) + # xmit port no egress drop + for cntr in egress_counters: + assert(xmit_counters[cntr] == xmit_counters_base[cntr]) + + # send 1 packet to trigger pfc + if num_of_flows == "multiple": + send_packet(self, src_port_1_id, pkt, 1 + 2 * margin) + send_packet(self, src_port_1_id, pkt2, 1 + 2 * margin) + send_packet(self, src_port_2_id, pkt3, 1 + 2 * margin) + send_packet(self, src_port_2_id, pkt4, 1 + 2 * margin) + else: + send_packet(self, src_port_1_id, pkt, 1 + 2 * margin) + send_packet(self, src_port_2_id, pkt3, 1 + 2 * margin) + + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + # get a snapshot of counter values at recv and transmit ports + # queue counters value is not of our interest here + recv_counters_base1 = recv_counters1 + recv_counters_base2 = recv_counters2 + recv_counters1, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_1_id]) + recv_counters2, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_2_id]) + xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + # recv port pfc + assert(recv_counters1[pg] > recv_counters_base1[pg]) + assert(recv_counters2[pg] > recv_counters_base2[pg]) + # recv port no ingress drop + for cntr in ingress_counters: + assert(recv_counters1[cntr] == recv_counters_base1[cntr]) + assert(recv_counters2[cntr] == recv_counters_base2[cntr]) + # xmit port no egress drop + for cntr in egress_counters: + assert(xmit_counters[cntr] == xmit_counters_base[cntr]) + + finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) +# Base class used for individual PTF runs used in the following: testPfcStormWithSharedHeadroomOccupancy +class PfcStormTestWithSharedHeadroom(sai_base_test.ThriftInterfaceDataPlane): + + def parse_test_params(self): + # Parse pkt construction related input parameters + self.dscp = int(self.test_params['dscp']) + self.ecn = int(self.test_params['ecn']) + self.sonic_version = self.test_params['sonic_version'] + self.router_mac = self.test_params['router_mac'] + self.asic_type = self.test_params['sonic_asic_type'] + + self.pg_id = int(self.test_params['pg']) + # The pfc counter index starts from index 2 in sai_thrift_read_port_counters + self.pg = self.pg_id + 2 + + self.src_port_id = int(self.test_params['src_port_id']) + self.src_port_ip = self.test_params['src_port_ip'] + self.src_port_vlan = self.test_params['src_port_vlan'] + self.src_port_mac = self.dataplane.get_mac(0, self.src_port_id) + + self.dst_port_id = int(self.test_params['dst_port_id']) + self.dst_port_ip = self.test_params['dst_port_ip'] + self.dst_port_mac = self.dataplane.get_mac(0, self.dst_port_id) + + self.ttl = 64 + self.default_packet_length = 64 + + # Margin used to while crossing the shared headrooom boundary + self.margin = 2 + + # get counter names to query + self.ingress_counters, self.egress_counters = get_counter_names(self.sonic_version) + + +class PtfFillBuffer(PfcStormTestWithSharedHeadroom): + + def runTest(self): + + time.sleep(5) + switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) + + self.parse_test_params() + pkts_num_trig_pfc = int(self.test_params['pkts_num_trig_pfc']) + pkts_num_private_headrooom = int(self.test_params['pkts_num_private_headrooom']) + + # Draft packets + pkt_dst_mac = self.router_mac if self.router_mac != '' else self.dst_port_mac + pkt = construct_ip_pkt(self.default_packet_length, + pkt_dst_mac, + self.src_port_mac, + self.src_port_ip, + self.dst_port_ip, + self.dscp, + self.src_port_vlan, + ecn=self.ecn, + ttl=self.ttl) + + # get a snapshot of counter values at recv and transmit ports + # queue_counters value is not of our interest here + recv_counters_base, queue_counters = sai_thrift_read_port_counters( + self.client, port_list[self.src_port_id] + ) + + logging.info("Disabling xmit ports: {}".format(self.dst_port_id)) + sai_thrift_port_tx_disable(self.client, self.asic_type, [self.dst_port_id]) + + xmit_counters_base, queue_counters = sai_thrift_read_port_counters( + self.client, port_list[self.dst_port_id] + ) + + num_pkts = pkts_num_trig_pfc + pkts_num_private_headrooom + logging.info("Send {} pkts to egress out of {}".format(num_pkts, self.dst_port_id)) + # send packets to dst port 1, to cross into shared headrooom + send_packet( + self, self.src_port_id, pkt, num_pkts + ) + + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + # get a snapshot of counter values at recv and transmit ports + # queue counters value is not of our interest here + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[self.src_port_id]) + xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[self.dst_port_id]) + + logging.debug("Recv Counters: {}, Base: {}".format(recv_counters, recv_counters_base)) + logging.debug("Xmit Counters: {}, Base: {}".format(xmit_counters, xmit_counters_base)) + + # recv port pfc + assert(recv_counters[self.pg] > recv_counters_base[self.pg]) + # recv port no ingress drop + for cntr in self.ingress_counters: + assert(recv_counters[cntr] == recv_counters_base[cntr]) + # xmit port no egress drop + for cntr in self.egress_counters: + assert(xmit_counters[cntr] == xmit_counters_base[cntr]) + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + + +class PtfReleaseBuffer(PfcStormTestWithSharedHeadroom): + + def runTest(self): + time.sleep(1) + switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) + + self.parse_test_params() + + # get a snapshot of counter values at recv and transmit ports + # queue_counters value is not of our interest here + recv_counters_base, queue_counters = sai_thrift_read_port_counters( + self.client, port_list[self.src_port_id] + ) + + xmit_counters_base, queue_counters = sai_thrift_read_port_counters( + self.client, port_list[self.dst_port_id] + ) + + logging.info("Enable xmit ports: {}".format(self.dst_port_id)) + sai_thrift_port_tx_enable(self.client, self.asic_type, [self.dst_port_id]) + + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + + # get new base counter values at recv ports + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[self.src_port_id]) + # no ingress drop + for cntr in self.ingress_counters: + assert(recv_counters[cntr] == recv_counters_base[cntr]) + recv_counters_base = recv_counters + + # allow enough time for the test to check if no PFC frame was sent from Recv port + time.sleep(30) + + # get the current snapshot of counter values at recv and transmit ports + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[self.src_port_id]) + xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[self.dst_port_id]) + + logging.debug("Recv Counters: {}, Base: {}".format(recv_counters, recv_counters_base)) + logging.debug("Xmit Counters: {}, Base: {}".format(xmit_counters, xmit_counters_base)) + + # recv port pfc should not be incremented + assert(recv_counters[self.pg] == recv_counters_base[self.pg]) + # recv port no ingress drop + for cntr in self.ingress_counters: + assert(recv_counters[cntr] == recv_counters_base[cntr]) + # xmit port no egress drop + for cntr in self.egress_counters: + assert(xmit_counters[cntr] == xmit_counters_base[cntr]) + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + + +class PtfEnableDstPorts(PfcStormTestWithSharedHeadroom): + + def runTest(self): + time.sleep(1) + switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) + self.parse_test_params() + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + sai_thrift_port_tx_enable(self.client, self.asic_type, [self.dst_port_id]) + + # This test looks to measure xon threshold (pg_reset_floor) class PFCXonTest(sai_base_test.ThriftInterfaceDataPlane): @@ -839,6 +1541,7 @@ def get_rx_port(self, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip, dst_po def runTest(self): time.sleep(5) switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) last_pfc_counter = 0 recv_port_counters = [] transmit_port_counters = [] @@ -878,12 +1581,11 @@ def runTest(self): hysteresis = int(self.test_params['pkts_num_hysteresis']) else: hysteresis = 0 - default_packet_length = 64 hwsku = self.test_params['hwsku'] # get a snapshot of counter values at recv and transmit ports # queue_counters value is not of our interest here - recv_counters_base, queue_counters = sai_thrift_read_port_counters( + recv_counters_base, _ = sai_thrift_read_port_counters( self.client, port_list[src_port_id] ) @@ -898,134 +1600,215 @@ def runTest(self): # get counter names to query ingress_counters, egress_counters = get_counter_names(sonic_version) + port_counter_indexes = [pg] + port_counter_indexes += ingress_counters + port_counter_indexes += egress_counters + port_counter_indexes += [TRANSMITTED_PKTS, RECEIVED_PKTS, RECEIVED_NON_UC_PKTS, TRANSMITTED_NON_UC_PKTS, EGRESS_PORT_QLEN] + # create packet pkt_dst_mac = router_mac if router_mac != '' else dst_port_mac - pkt = construct_ip_pkt(default_packet_length, - pkt_dst_mac, - src_port_mac, - src_port_ip, - dst_port_ip, - dscp, - src_port_vlan, - ecn=ecn, - ttl=ttl) - dst_port_id = self.get_rx_port( - src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip, dst_port_id, src_port_vlan - ) + if 'packet_size' in self.test_params: + packet_length = self.test_params['packet_size'] + else: + packet_length = 64 - # create packet pkt_dst_mac2 = router_mac if router_mac != '' else dst_port_2_mac - pkt2 = construct_ip_pkt(default_packet_length, - pkt_dst_mac2, - src_port_mac, - src_port_ip, - dst_port_2_ip, - dscp, - src_port_vlan, - ecn=ecn, - ttl=ttl) - dst_port_2_id = self.get_rx_port( - src_port_id, pkt_dst_mac2, dst_port_2_ip, src_port_ip, dst_port_2_id, src_port_vlan - ) - - # create packet pkt_dst_mac3 = router_mac if router_mac != '' else dst_port_3_mac - pkt3 = construct_ip_pkt(default_packet_length, - pkt_dst_mac3, + + is_dualtor = self.test_params.get('is_dualtor', False) + def_vlan_mac = self.test_params.get('def_vlan_mac', None) + if is_dualtor and def_vlan_mac != None: + pkt_dst_mac = def_vlan_mac + pkt_dst_mac2 = def_vlan_mac + pkt_dst_mac3 = def_vlan_mac + + try: + pkt = construct_ip_pkt(packet_length, + pkt_dst_mac, src_port_mac, src_port_ip, - dst_port_3_ip, + dst_port_ip, dscp, src_port_vlan, ecn=ecn, ttl=ttl) - dst_port_3_id = self.get_rx_port( - src_port_id, pkt_dst_mac3, dst_port_3_ip, src_port_ip, dst_port_3_id, src_port_vlan - ) + dst_port_id = self.get_rx_port( + src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip, dst_port_id, src_port_vlan + ) + + # create packet + pkt2 = construct_ip_pkt(packet_length, + pkt_dst_mac2, + src_port_mac, + src_port_ip, + dst_port_2_ip, + dscp, + src_port_vlan, + ecn=ecn, + ttl=ttl) + dst_port_2_id = self.get_rx_port( + src_port_id, pkt_dst_mac2, dst_port_2_ip, src_port_ip, dst_port_2_id, src_port_vlan + ) + + # create packet + pkt3 = construct_ip_pkt(packet_length, + pkt_dst_mac3, + src_port_mac, + src_port_ip, + dst_port_3_ip, + dscp, + src_port_vlan, + ecn=ecn, + ttl=ttl) + dst_port_3_id = self.get_rx_port( + src_port_id, pkt_dst_mac3, dst_port_3_ip, src_port_ip, dst_port_3_id, src_port_vlan + ) + except: + show_stats(self.__class__.__name__ + ' no rx pkt', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + raise # For TH3, some packets stay in egress memory and doesn't show up in shared buffer or leakout if 'pkts_num_egr_mem' in self.test_params.keys(): pkts_num_egr_mem = int(self.test_params['pkts_num_egr_mem']) + step_id = 1 + step_desc = 'disable TX for dst_port_id, dst_port_2_id, dst_port_3_id' + sys.stderr.write('step {}: {}\n'.format(step_id, step_desc)) sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id, dst_port_2_id, dst_port_3_id]) try: + ''' + Send various numbers of pkts to each dst port to occupy PG buffer, as below: + shared buffer theshold + xon offset | + | | + PG config: + + + -----------------------------------------------------------------*---------------------------------*---------------------- + pkts in each port: + + + | | + |<--- pkts_num_trig_pfc - pkts_num_dismiss_pfc - margin --->| | + in dst port 1 | | + |<--- pkts_num_dismiss_pfc + margin*2 --->| + in dst port 2 | + |<--- X pkts --->| + in dst port 3 + ''' # send packets to dst port 1, occupying the "xon" - xmit_counters_base, queue_counters = sai_thrift_read_port_counters( + step_id += 1 + step_desc = 'send packets to dst port 1, occupying the xon' + sys.stderr.write('step {}: {}\n'.format(step_id, step_desc)) + + xmit_counters_base, _ = sai_thrift_read_port_counters( self.client, port_list[dst_port_id] ) - # Since there is variability in packet leakout in hwsku Arista-7050CX3-32S-D48C8 and - # Arista-7050CX3-32S-C32. Starting with zero pkts_num_leak_out and trying to find - # actual leakout by sending packets and reading actual leakout from HW - if hwsku == 'Arista-7050CX3-32S-D48C8' or hwsku == 'Arista-7050CX3-32S-C32' or hwsku == 'DellEMC-Z9332f-M-O16C64' or hwsku == 'DellEMC-Z9332f-O32': - pkts_num_leak_out = 0 + # Since there is variability in packet leakout in hwsku Arista-7050CX3-32S-D48C8 and + # Arista-7050CX3-32S-C32. Starting with zero pkts_num_leak_out and trying to find + # actual leakout by sending packets and reading actual leakout from HW. + # And apply dynamically compensation to all device using Broadcom ASIC. + if check_leackout_compensation_support(asic_type, hwsku): + pkts_num_leak_out = 0 if hwsku == 'DellEMC-Z9332f-M-O16C64' or hwsku == 'DellEMC-Z9332f-O32': send_packet( - self, src_port_id, pkt, + self, src_port_id, pkt, pkts_num_egr_mem + pkts_num_leak_out + pkts_num_trig_pfc - pkts_num_dismiss_pfc - hysteresis ) + elif 'cisco-8000' in asic_type: + fill_leakout_plus_one(self, src_port_id, dst_port_id, pkt, int(self.test_params['pg']), asic_type) + send_packet( + self, src_port_id, pkt, + pkts_num_leak_out + pkts_num_trig_pfc - pkts_num_dismiss_pfc - hysteresis - 1 + ) else: send_packet( - self, src_port_id, pkt, - pkts_num_leak_out + pkts_num_trig_pfc - pkts_num_dismiss_pfc - hysteresis + self, src_port_id, pkt, + pkts_num_leak_out + pkts_num_trig_pfc - pkts_num_dismiss_pfc - hysteresis - margin ) + sys.stderr.write('send_packet(src_port_id, pkt, {} + {} - {} - {})\n'.format(pkts_num_leak_out, pkts_num_trig_pfc, pkts_num_dismiss_pfc, hysteresis)) - if hwsku == 'Arista-7050CX3-32S-D48C8' or hwsku == 'Arista-7050CX3-32S-C32' or hwsku == 'DellEMC-Z9332f-M-O16C64' or hwsku == 'DellEMC-Z9332f-O32': - xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) - actual_port_leak_out = xmit_counters[TRANSMITTED_PKTS] - xmit_counters_base[TRANSMITTED_PKTS] - send_packet(self, src_port_id, pkt, actual_port_leak_out) + if check_leackout_compensation_support(asic_type, hwsku): + dynamically_compensate_leakout(self.client, sai_thrift_read_port_counters, port_list[dst_port_id], TRANSMITTED_PKTS, xmit_counters_base, self, src_port_id, pkt, 40) # send packets to dst port 2, occupying the shared buffer - xmit_2_counters_base, queue_counters = sai_thrift_read_port_counters( + step_id += 1 + step_desc = 'send packets to dst port 2, occupying the shared buffer' + sys.stderr.write('step {}: {}\n'.format(step_id, step_desc)) + + xmit_2_counters_base, _ = sai_thrift_read_port_counters( self.client, port_list[dst_port_2_id] ) if hwsku == 'DellEMC-Z9332f-M-O16C64' or hwsku == 'DellEMC-Z9332f-O32': send_packet( - self, src_port_id, pkt2, + self, src_port_id, pkt2, pkts_num_egr_mem + pkts_num_leak_out + margin + pkts_num_dismiss_pfc - 1 + hysteresis ) + elif 'cisco-8000' in asic_type: + fill_leakout_plus_one(self, src_port_id, dst_port_2_id, pkt2, int(self.test_params['pg']), asic_type) + send_packet( + self, src_port_id, pkt2, + pkts_num_leak_out + margin + pkts_num_dismiss_pfc - 2 + hysteresis + ) else: send_packet( - self, src_port_id, pkt2, - pkts_num_leak_out + margin + pkts_num_dismiss_pfc - 1 + hysteresis + self, src_port_id, pkt2, + pkts_num_leak_out + margin * 2 + pkts_num_dismiss_pfc - 1 + hysteresis ) + sys.stderr.write('send_packet(src_port_id, pkt2, {} + {} + {} - 1 + {})\n'.format(pkts_num_leak_out, margin, pkts_num_dismiss_pfc, hysteresis)) - if hwsku == 'Arista-7050CX3-32S-D48C8' or hwsku == 'Arista-7050CX3-32S-C32' or hwsku == 'DellEMC-Z9332f-M-O16C64' or hwsku == 'DellEMC-Z9332f-O32': - send_packet(self, src_port_id, pkt2, actual_port_leak_out) + if check_leackout_compensation_support(asic_type, hwsku): + dynamically_compensate_leakout(self.client, sai_thrift_read_port_counters, port_list[dst_port_2_id], TRANSMITTED_PKTS, xmit_2_counters_base, self, src_port_id, pkt2, 40) # send 1 packet to dst port 3, triggering PFC - xmit_3_counters_base, queue_counters = sai_thrift_read_port_counters( + step_id += 1 + step_desc = 'send 1 packet to dst port 3, triggering PFC' + sys.stderr.write('step {}: {}\n'.format(step_id, step_desc)) + + xmit_3_counters_base, _ = sai_thrift_read_port_counters( self.client, port_list[dst_port_3_id] ) if hwsku == 'DellEMC-Z9332f-M-O16C64' or hwsku == 'DellEMC-Z9332f-O32': send_packet(self, src_port_id, pkt3, pkts_num_egr_mem + pkts_num_leak_out + 1) + elif 'cisco-8000' in asic_type: + fill_leakout_plus_one(self, src_port_id, dst_port_3_id, pkt3, int(self.test_params['pg']), asic_type) + send_packet(self, src_port_id, pkt3, pkts_num_leak_out) else: send_packet(self, src_port_id, pkt3, pkts_num_leak_out + 1) + sys.stderr.write('send_packet(src_port_id, pkt3, {} + 1)\n'.format(pkts_num_leak_out)) - if hwsku == 'Arista-7050CX3-32S-D48C8' or hwsku == 'Arista-7050CX3-32S-C32' or hwsku == 'DellEMC-Z9332f-M-O16C64' or hwsku == 'DellEMC-Z9332f-O32': - send_packet(self, src_port_id, pkt3, actual_port_leak_out) + if check_leackout_compensation_support(asic_type, hwsku): + dynamically_compensate_leakout(self.client, sai_thrift_read_port_counters, port_list[dst_port_3_id], TRANSMITTED_PKTS, xmit_3_counters_base, self, src_port_id, pkt3, 40) # allow enough time for the dut to sync up the counter values in counters_db time.sleep(8) # get a snapshot of counter values at recv and transmit ports # queue counters value is not of our interest here - recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) - xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) - xmit_2_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_2_id]) - xmit_3_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_3_id]) + + recv_counters, _ = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + xmit_2_counters, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_2_id]) + xmit_3_counters, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_3_id]) + + show_counter('PortCnt', self, asic_type, [src_port_id, dst_port_id, dst_port_2_id, dst_port_3_id], + [recv_counters, xmit_counters, xmit_2_counters, xmit_3_counters], + [recv_counters_base, xmit_counters_base, xmit_2_counters_base, xmit_3_counters_base], + port_counter_indexes, + 'srcport {}, dstport {}, dstport2 {}, dstport3 {}, base is previous step'.format( src_port_id, dst_port_id, dst_port_2_id, dst_port_3_id)) + # recv port pfc - assert(recv_counters[pg] > recv_counters_base[pg]) + assert(recv_counters[pg] > recv_counters_base[pg]), 'unexpectedly not trigger PFC for PG {} (counter: {}), at step {} {}'.format(pg, port_counter_fields[pg], step_id, step_desc) # recv port no ingress drop for cntr in ingress_counters: - assert(recv_counters[cntr] == recv_counters_base[cntr]) + assert(recv_counters[cntr] == recv_counters_base[cntr]), 'unexpectedly ingress drop on recv port (counter: {}), at step {} {}'.format(port_counter_fields[cntr], step_id, step_desc) # xmit port no egress drop for cntr in egress_counters: - assert(xmit_counters[cntr] == xmit_counters_base[cntr]) - assert(xmit_2_counters[cntr] == xmit_2_counters_base[cntr]) - assert(xmit_3_counters[cntr] == xmit_3_counters_base[cntr]) + assert(xmit_counters[cntr] == xmit_counters_base[cntr]), 'unexpectedly egress drop on xmit port 1 (counter: {}, at step {} {})'.format(port_counter_fields[cntr], step_id, step_desc) + assert(xmit_2_counters[cntr] == xmit_2_counters_base[cntr]), 'unexpectedly egress drop on xmit port 2 (counter: {}, at step {} {})'.format(port_counter_fields[cntr], step_id, step_desc) + assert(xmit_3_counters[cntr] == xmit_3_counters_base[cntr]), 'unexpectedly egress drop on xmit port 3 (counter: {}, at step {} {})'.format(port_counter_fields[cntr], step_id, step_desc) + step_id += 1 + step_desc = 'enable TX for dst_port_2_id, to drain off buffer in dst_port_2' + sys.stderr.write('step {}: {}\n'.format(step_id, step_desc)) sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_2_id]) # allow enough time for the dut to sync up the counter values in counters_db @@ -1033,51 +1816,79 @@ def runTest(self): # get a snapshot of counter values at recv and transmit ports # queue counters value is not of our interest here recv_counters_base = recv_counters - recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) - xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) - xmit_2_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_2_id]) - xmit_3_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_3_id]) + + recv_counters, _ = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + xmit_2_counters, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_2_id]) + xmit_3_counters, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_3_id]) + + show_counter('PortCnt', self, asic_type, [src_port_id, dst_port_id, dst_port_2_id, dst_port_3_id], + [recv_counters, xmit_counters, xmit_2_counters, xmit_3_counters], + [recv_counters_base, xmit_counters_base, xmit_2_counters_base, xmit_3_counters_base], + port_counter_indexes, + 'srcport {}, dstport {}, dstport2 {}, dstport3 {}, base is previous step'.format( src_port_id, dst_port_id, dst_port_2_id, dst_port_3_id)) + # recv port pfc - assert(recv_counters[pg] > recv_counters_base[pg]) + assert(recv_counters[pg] > recv_counters_base[pg]), 'unexpectedly not trigger PFC for PG {} (counter: {}), at step {} {}'.format(pg, port_counter_fields[pg], step_id, step_desc) # recv port no ingress drop for cntr in ingress_counters: - assert(recv_counters[cntr] == recv_counters_base[cntr]) + assert(recv_counters[cntr] <= recv_counters_base[cntr] + COUNTER_MARGIN), 'unexpectedly ingress drop on recv port (counter: {}), at step {} {}'.format(port_counter_fields[cntr], step_id, step_desc) # xmit port no egress drop for cntr in egress_counters: - assert(xmit_counters[cntr] == xmit_counters_base[cntr]) - assert(xmit_2_counters[cntr] == xmit_2_counters_base[cntr]) - assert(xmit_3_counters[cntr] == xmit_3_counters_base[cntr]) + assert(xmit_counters[cntr] == xmit_counters_base[cntr]), 'unexpectedly egress drop on xmit port 1 (counter: {}), at step {} {}'.format(port_counter_fields[cntr], step_id, step_desc) + assert(xmit_2_counters[cntr] == xmit_2_counters_base[cntr]), 'unexpectedly egress drop on xmit port 2 (counter: {}), at step {} {}'.format(port_counter_fields[cntr], step_id, step_desc) + assert(xmit_3_counters[cntr] == xmit_3_counters_base[cntr]), 'unexpectedly egress drop on xmit port 3 (counter: {}), at step {} {}'.format(port_counter_fields[cntr], step_id, step_desc) + step_id += 1 + step_desc = 'enable TX for dst_port_3_id, to drain off buffer in dst_port_3' + sys.stderr.write('step {}: {}\n'.format(step_id, step_desc)) sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_3_id]) # allow enough time for the dut to sync up the counter values in counters_db time.sleep(8) # get new base counter values at recv ports # queue counters value is not of our interest here - recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + + recv_counters, _ = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + + show_counter('PortCnt', self, asic_type, [src_port_id], [recv_counters], [recv_counters_base], + port_counter_indexes, 'srcport {}, base is previous step'.format( src_port_id)) + for cntr in ingress_counters: - assert(recv_counters[cntr] == recv_counters_base[cntr]) + assert(recv_counters[cntr] <= recv_counters_base[cntr] + COUNTER_MARGIN), 'unexpectedly ingress drop on recv port (counter: {}), at step {} {}'.format(port_counter_fields[cntr], step_id, step_desc) recv_counters_base = recv_counters + step_id += 1 + step_desc = 'sleep 30 seconds' + sys.stderr.write('step {}: {}\n'.format(step_id, step_desc)) + time.sleep(30) # get a snapshot of counter values at recv and transmit ports # queue counters value is not of our interest here - recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) - xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) - xmit_2_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_2_id]) - xmit_3_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_3_id]) + recv_counters, _ = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + xmit_2_counters, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_2_id]) + xmit_3_counters, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_3_id]) + + show_counter('PortCnt', self, asic_type, [src_port_id, dst_port_id, dst_port_2_id, dst_port_3_id], + [recv_counters, xmit_counters, xmit_2_counters, xmit_3_counters], + [recv_counters_base, xmit_counters_base, xmit_2_counters_base, xmit_3_counters_base], + port_counter_indexes, + 'srcport {}, dstport {}, dstport2 {}, dstport3 {}, base is previous step'.format( src_port_id, dst_port_id, dst_port_2_id, dst_port_3_id)) + # recv port no pfc - assert(recv_counters[pg] == recv_counters_base[pg]) + assert(recv_counters[pg] == recv_counters_base[pg]), 'unexpectedly trigger PFC for PG {} (counter: {}), at step {} {}'.format(pg, port_counter_fields[pg], step_id, step_desc) # recv port no ingress drop for cntr in ingress_counters: - assert(recv_counters[cntr] == recv_counters_base[cntr]) + assert(recv_counters[cntr] <= recv_counters_base[cntr] + COUNTER_MARGIN), 'unexpectedly ingress drop on recv port (counter: {}), at step {} {}'.format(port_counter_fields[cntr], step_id, step_desc) # xmit port no egress drop for cntr in egress_counters: - assert(xmit_counters[cntr] == xmit_counters_base[cntr]) - assert(xmit_2_counters[cntr] == xmit_2_counters_base[cntr]) - assert(xmit_3_counters[cntr] == xmit_3_counters_base[cntr]) + assert(xmit_counters[cntr] == xmit_counters_base[cntr]), 'unexpectedly egress drop on xmit port 1 (counter: {}), at step {} {}'.format(port_counter_fields[cntr], step_id, step_desc) + assert(xmit_2_counters[cntr] == xmit_2_counters_base[cntr]), 'unexpectedly egress drop on xmit port 2 (counter: {}), at step {} {}'.format(port_counter_fields[cntr], step_id, step_desc) + assert(xmit_3_counters[cntr] == xmit_3_counters_base[cntr]), 'unexpectedly egress drop on xmit port 3 (counter: {}), at step {} {}'.format(port_counter_fields[cntr], step_id, step_desc) finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id, dst_port_2_id, dst_port_3_id]) class HdrmPoolSizeTest(sai_base_test.ThriftInterfaceDataPlane): @@ -1085,6 +1896,7 @@ def setUp(self): sai_base_test.ThriftInterfaceDataPlane.setUp(self) time.sleep(5) switch_init(self.client) + self.stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) # Parse input parameters self.testbed_type = self.test_params['testbed_type'] @@ -1141,8 +1953,11 @@ def setUp(self): self.dst_port_mac = self.dataplane.get_mac(0, self.dst_port_id) self.src_port_macs = [self.dataplane.get_mac(0, ptid) for ptid in self.src_port_ids] - if self.testbed_type in ['t0', 't0-64', 't0-116']: + if self.testbed_type in ['dualtor', 'dualtor-56', 't0', 't0-64', 't0-116']: # populate ARP + # sender's MAC address is corresponding PTF port's MAC address + # sender's IP address is caculated in tests/qos/qos_sai_base.py::QosSaiBase::__assignTestPortIps() + # for dualtor: sender_IP_address = DUT_default_VLAN_interface_IP_address + portIndex + 1 for idx, ptid in enumerate(self.src_port_ids): arpreq_pkt = simple_arp_packet( @@ -1165,6 +1980,14 @@ def setUp(self): send_packet(self, self.dst_port_id, arpreq_pkt) time.sleep(8) + # for dualtor, need to change test traffic's dest MAC address to point DUT's default VLAN interface + # and then DUT is able to correctly forward test traffic to dest PORT on PTF + # Reminder: need to change this dest MAC address after above ARP population to avoid corrupt ARP packet + is_dualtor = self.test_params.get('is_dualtor', False) + def_vlan_mac = self.test_params.get('def_vlan_mac', None) + if is_dualtor and def_vlan_mac != None: + self.dst_port_mac = def_vlan_mac + def tearDown(self): sai_base_test.ThriftInterfaceDataPlane.tearDown(self) @@ -1180,7 +2003,7 @@ def runTest(self): # get a snapshot of counter values at recv and transmit ports # queue_counters value is not of our interest here recv_counters_bases = [sai_thrift_read_port_counters(self.client, port_list[sid])[0] for sid in self.src_port_ids] - xmit_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[self.dst_port_id]) + xmit_counters_base, _ = sai_thrift_read_port_counters(self.client, port_list[self.dst_port_id]) # For TH3, some packets stay in egress memory and doesn't show up in shared buffer or leakout if 'pkts_num_egr_mem' in self.test_params.keys(): @@ -1225,7 +2048,17 @@ def runTest(self): else: pkts_num_trig_pfc = self.pkts_num_trig_pfc_shp[i] - send_packet(self, self.src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, pkts_num_trig_pfc / self.pkt_size_factor) + pkt_cnt = pkts_num_trig_pfc // self.pkt_size_factor + send_packet(self, self.src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, pkt_cnt) + + time.sleep(8) # wait pfc counter refresh + + show_counter('PortCnt', self, self.asic_type, ports=self.src_port_ids + [self.dst_port_id], + base=recv_counters_bases + [xmit_counters_base], + indexes=[pg for pg in self.pgs] + self.ingress_counters + self.egress_counters + + [TRANSMITTED_PKTS, RECEIVED_PKTS, RECEIVED_NON_UC_PKTS, TRANSMITTED_NON_UC_PKTS, EGRESS_PORT_QLEN], + banner='To fill service pool, send {} pkt with DSCP {} PG {} from srcport {} to dstport {}, base is first step'.format( + pkt_cnt, sidx_dscp_pg_tuples[i][1], sidx_dscp_pg_tuples[i][2], self.src_port_ids, self.dst_port_id)) print >> sys.stderr, "Service pool almost filled" sys.stderr.flush() @@ -1247,7 +2080,7 @@ def runTest(self): ip_ttl=ttl) pkt_cnt = 0 - recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[self.src_port_ids[sidx_dscp_pg_tuples[i][0]]]) + recv_counters, _ = sai_thrift_read_port_counters(self.client, port_list[self.src_port_ids[sidx_dscp_pg_tuples[i][0]]]) while (recv_counters[sidx_dscp_pg_tuples[i][2]] == recv_counters_bases[sidx_dscp_pg_tuples[i][0]][sidx_dscp_pg_tuples[i][2]]) and (pkt_cnt < 10): send_packet(self, self.src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, 1) pkt_cnt += 1 @@ -1256,7 +2089,16 @@ def runTest(self): # get a snapshot of counter values at recv and transmit ports # queue_counters value is not of our interest here - recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[self.src_port_ids[sidx_dscp_pg_tuples[i][0]]]) + recv_counters, _ = sai_thrift_read_port_counters(self.client, port_list[self.src_port_ids[sidx_dscp_pg_tuples[i][0]]]) + + time.sleep(8) # wait pfc counter refresh + + show_counter('PortCnt', self, self.asic_type, ports=self.src_port_ids + [self.dst_port_id], + base=recv_counters_bases + [xmit_counters_base], + indexes=[pg for pg in self.pgs] + self.ingress_counters + self.egress_counters + + [TRANSMITTED_PKTS, RECEIVED_PKTS, RECEIVED_NON_UC_PKTS, TRANSMITTED_NON_UC_PKTS, EGRESS_PORT_QLEN], + banner='To trigger PFC, send {} pkt with DSCP {} PG {} from srcport {} to dstport {}, base is first step'.format( + pkt_cnt, sidx_dscp_pg_tuples[i][1], sidx_dscp_pg_tuples[i][2], self.src_port_ids, self.dst_port_id)) if pkt_cnt == 10: sys.exit("Too many pkts needed to trigger pfc: %d" % (pkt_cnt)) @@ -1267,7 +2109,7 @@ def runTest(self): print >> sys.stderr, "PFC triggered" sys.stderr.flush() - upper_bound = 2 + upper_bound = 2 * margin + 1 if self.wm_multiplier: hdrm_pool_wm = sai_thrift_read_headroom_pool_watermark(self.client, self.buf_pool_roid) print >> sys.stderr, "Actual headroom pool watermark value to start: %d" % hdrm_pool_wm @@ -1291,14 +2133,26 @@ def runTest(self): ip_tos=tos, ip_ttl=ttl) - send_packet(self, self.src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, self.pkts_num_hdrm_full / self.pkt_size_factor if i != self.pgs_num - 1 else self.pkts_num_hdrm_partial / self.pkt_size_factor) + pkt_cnt = self.pkts_num_hdrm_full // self.pkt_size_factor if i != self.pgs_num - 1 else self.pkts_num_hdrm_partial // self.pkt_size_factor + send_packet(self, self.src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, pkt_cnt) # allow enough time for the dut to sync up the counter values in counters_db time.sleep(8) - recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[self.src_port_ids[sidx_dscp_pg_tuples[i][0]]]) + show_counter('PortCnt', self, self.asic_type, ports=self.src_port_ids + [self.dst_port_id], + base=recv_counters_bases + [xmit_counters_base], + indexes=[pg for pg in self.pgs] + self.ingress_counters + self.egress_counters + + [TRANSMITTED_PKTS, RECEIVED_PKTS, RECEIVED_NON_UC_PKTS, TRANSMITTED_NON_UC_PKTS, EGRESS_PORT_QLEN], + banner='To fill headroom pool, send {} pkt with DSCP {} PG {} from srcport {} to dstport {}, base is first step'.format( + pkt_cnt, sidx_dscp_pg_tuples[i][1], sidx_dscp_pg_tuples[i][2], self.src_port_ids, self.dst_port_id)) + + recv_counters, _ = sai_thrift_read_port_counters(self.client, port_list[self.src_port_ids[sidx_dscp_pg_tuples[i][0]]]) # assert no ingress drop for cntr in self.ingress_counters: - assert(recv_counters[cntr] == recv_counters_bases[sidx_dscp_pg_tuples[i][0]][cntr]) + # corner case: in previous step in which trigger PFC, a few packets were dropped, and dropping don't keep increasing constantaly. + # workaround: tolerates a few packet drop here, and output relevant information for offline analysis, to know if it's an issue + if recv_counters[cntr] != recv_counters_bases[sidx_dscp_pg_tuples[i][0]][cntr]: + sys.stderr.write('There are some unexpected {} packet drop\n'.format(recv_counters[cntr] - recv_counters_bases[sidx_dscp_pg_tuples[i][0]][cntr])) + assert(recv_counters[cntr] - recv_counters_bases[sidx_dscp_pg_tuples[i][0]][cntr] <= margin) if self.wm_multiplier: wm_pkt_num += (self.pkts_num_hdrm_full if i != self.pgs_num - 1 else self.pkts_num_hdrm_partial) @@ -1308,8 +2162,10 @@ def runTest(self): if upper_bound_wm > self.max_headroom: upper_bound_wm = self.max_headroom - print >> sys.stderr, "pkts sent: %d, lower bound: %d, actual headroom pool watermark: %d, upper_bound: %d" %(wm_pkt_num, expected_wm, hdrm_pool_wm, upper_bound_wm) - assert(expected_wm <= hdrm_pool_wm) + print >> sys.stderr, "pkts sent: %d, lower bound: %d, actual headroom pool watermark: %d, upper_bound: %d" % ( + wm_pkt_num, expected_wm, hdrm_pool_wm, upper_bound_wm) + if 'innovium' not in self.asic_type: + assert(expected_wm <= hdrm_pool_wm) assert(hdrm_pool_wm <= upper_bound_wm) print >> sys.stderr, "all but the last pg hdrms filled" @@ -1318,39 +2174,199 @@ def runTest(self): # last pg i = self.pgs_num - 1 # send 1 packet on last pg to trigger ingress drop - send_packet(self, self.src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, 1 + 2 * margin) + pkt_cnt = 1 + 2 * margin + send_packet(self, self.src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, pkt_cnt) # allow enough time for the dut to sync up the counter values in counters_db time.sleep(8) - recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[self.src_port_ids[sidx_dscp_pg_tuples[i][0]]]) + + show_counter('PortCnt', self, self.asic_type, ports=self.src_port_ids + [self.dst_port_id], + base=recv_counters_bases + [xmit_counters_base], + indexes=[pg for pg in self.pgs] + self.ingress_counters + self.egress_counters + + [TRANSMITTED_PKTS, RECEIVED_PKTS, RECEIVED_NON_UC_PKTS, TRANSMITTED_NON_UC_PKTS, EGRESS_PORT_QLEN], + banner='To fill last PG and trigger ingress drop, send {} pkt with DSCP {} PG {} from srcport {} to dstport {}, base is first step'.format( + pkt_cnt, sidx_dscp_pg_tuples[i][1], sidx_dscp_pg_tuples[i][2], self.src_port_ids, self.dst_port_id)) + + recv_counters, _ = sai_thrift_read_port_counters(self.client, port_list[self.src_port_ids[sidx_dscp_pg_tuples[i][0]]]) # assert ingress drop for cntr in self.ingress_counters: assert(recv_counters[cntr] > recv_counters_bases[sidx_dscp_pg_tuples[i][0]][cntr]) # assert no egress drop at the dut xmit port - xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[self.dst_port_id]) + xmit_counters, _ = sai_thrift_read_port_counters(self.client, port_list[self.dst_port_id]) for cntr in self.egress_counters: assert(xmit_counters[cntr] == xmit_counters_base[cntr]) print >> sys.stderr, "pg hdrm filled" if self.wm_multiplier: - # assert hdrm pool wm still remains the same - hdrm_pool_wm = sai_thrift_read_headroom_pool_watermark(self.client, self.buf_pool_roid) - assert(expected_wm <= hdrm_pool_wm) - assert(hdrm_pool_wm <= upper_bound_wm) - # at this point headroom pool should be full. send few more packets to continue causing drops - print >> sys.stderr, "overflow headroom pool" - send_packet(self, self.src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, 10) - hdrm_pool_wm = sai_thrift_read_headroom_pool_watermark(self.client, self.buf_pool_roid) - assert(hdrm_pool_wm <= self.max_headroom) + # assert hdrm pool wm still remains the same + hdrm_pool_wm = sai_thrift_read_headroom_pool_watermark( + self.client, self.buf_pool_roid) + sys.stderr.write('After PG headroom filled, actual headroom pool watermark {}, upper_bound {}\n'.format(hdrm_pool_wm, upper_bound_wm)) + if 'innovium' not in self.asic_type: + assert(expected_wm <= hdrm_pool_wm) + assert(hdrm_pool_wm <= upper_bound_wm) + # at this point headroom pool should be full. send few more packets to continue causing drops + print >> sys.stderr, "overflow headroom pool" + send_packet(self, self.src_port_ids[sidx_dscp_pg_tuples[i][0]], pkt, 10) + hdrm_pool_wm = sai_thrift_read_headroom_pool_watermark(self.client, self.buf_pool_roid) + assert(hdrm_pool_wm <= self.max_headroom) sys.stderr.flush() finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=self.stats) sai_thrift_port_tx_enable(self.client, self.asic_type, [self.dst_port_id]) +class SharedResSizeTest(sai_base_test.ThriftInterfaceDataPlane): + def setUp(self): + sai_base_test.ThriftInterfaceDataPlane.setUp(self) + time.sleep(5) + switch_init(self.client) + self.stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) + + # Parse input parameters + self.testbed_type = self.test_params['testbed_type'] + self.dscps = self.test_params['dscps'] + self.ecn = self.test_params['ecn'] + self.router_mac = self.test_params['router_mac'] + self.sonic_version = self.test_params['sonic_version'] + self.pgs = self.test_params['pgs'] + self.pg_cntr_indices = [pg + 2 for pg in self.pgs] + self.queues = self.test_params['queues'] + self.src_port_ids = self.test_params['src_port_ids'] + self.src_port_ips = self.test_params['src_port_ips'] + print >> sys.stderr, self.src_port_ips + sys.stderr.flush() + # get counter names to query + self.ingress_counters, self.egress_counters = get_counter_names(self.sonic_version) + + self.dst_port_ids = self.test_params['dst_port_ids'] + self.dst_port_ips = self.test_params['dst_port_ips'] + self.asic_type = self.test_params['sonic_asic_type'] + self.pkt_counts = self.test_params['pkt_counts'] + self.shared_limit_bytes = self.test_params['shared_limit_bytes'] + + # LACP causes slow increase in memory consumption over duration of the test, thus + # a margin may be needed. + if 'pkts_num_margin' in self.test_params: + self.margin = int(self.test_params['pkts_num_margin']) + else: + self.margin = 0 + + if 'packet_size' in self.test_params: + self.packet_size = self.test_params['packet_size'] + self.cell_size = self.test_params['cell_size'] + else: + self.packet_size = 64 + self.cell_size = 350 + + self.dst_port_macs = [self.dataplane.get_mac(0, ptid) for ptid in self.dst_port_ids] + self.src_port_macs = [self.dataplane.get_mac(0, ptid) for ptid in self.src_port_ids] + + time.sleep(8) + + def tearDown(self): + sai_base_test.ThriftInterfaceDataPlane.tearDown(self) + + def runTest(self): + assert len(self.dscps) == len(self.pgs) == len(self.src_port_ids) == len(self.dst_port_ids) == len(self.pkt_counts) + + # Need at least 2 packet send instructions + assert len(self.pkt_counts) >= 2 + + # Reservation limit should be indicated by single packet, which is then modified + # by the given margin + assert self.pkt_counts[-1] == 1 + self.pkt_counts[-1] += 2 * self.margin + + # Second to last pkt count instruction needs to be reduced by margin to avoid + # triggering XOFF early. + assert self.pkt_counts[-2] >= self.margin + self.pkt_counts[-2] -= self.margin + + # Test configuration packet counts and sizing should accurately trigger shared limit + cell_occupancy = (self.packet_size + self.cell_size - 1) / self.cell_size + assert sum(self.pkt_counts[:-1]) * cell_occupancy * self.cell_size < self.shared_limit_bytes + assert sum(self.pkt_counts) * cell_occupancy * self.cell_size >= self.shared_limit_bytes + + # get a snapshot of counter values at recv and transmit ports + recv_counters_bases = [sai_thrift_read_port_counters(self.client, port_list[sid])[0] for sid in self.src_port_ids] + xmit_counters_bases = [sai_thrift_read_port_counters(self.client, port_list[sid])[0] for sid in self.dst_port_ids] + + # Disable all dst ports + uniq_dst_ports = list(set(self.dst_port_ids)) + sai_thrift_port_tx_disable(self.client, self.asic_type, uniq_dst_ports) + + try: + for i in range(len(self.src_port_ids)): + dscp = self.dscps[i] + pg = self.pgs[i] + queue = self.queues[i] + src_port_id = self.src_port_ids[i] + dst_port_id = self.dst_port_ids[i] + src_port_mac = self.src_port_macs[i] + dst_port_mac = self.dst_port_macs[i] + src_port_ip = self.src_port_ips[i] + dst_port_ip = self.dst_port_ips[i] + pkt_count = self.pkt_counts[i] + + tos = (dscp << 2) | self.ecn + + ttl = 64 + pkt = simple_tcp_packet(pktlen=self.packet_size, + eth_dst=self.router_mac if self.router_mac != '' else dst_port_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=tos, + ip_ttl=ttl) + + if i == len(self.src_port_ids) - 1: + # Verify XOFF has not been triggered on final port before sending traffic + print >> sys.stderr, "Verifying XOFF hasn't been triggered yet on final iteration" + sys.stderr.flush() + time.sleep(8) + recv_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id])[0] + xoff_txd = recv_counters[self.pg_cntr_indices[i]] - recv_counters_bases[i][self.pg_cntr_indices[i]] + assert xoff_txd == 0, "XOFF triggered too early on final iteration, XOFF count is %d" % xoff_txd + + # Send requested number of packets + print >> sys.stderr, "Sending %d packets for dscp=%d, pg=%d, src_port_id=%d, dst_port_id=%d" % (pkt_count, dscp, pg, src_port_id, dst_port_id) + sys.stderr.flush() + if 'cisco-8000' in self.asic_type: + fill_leakout_plus_one(self, src_port_id, dst_port_id, pkt, queue, self.asic_type) + pkt_count -= 1 # leakout adds 1 packet, subtract from current iteration + + send_packet(self, src_port_id, pkt, pkt_count) + + if i == len(self.src_port_ids) - 1: + # Verify XOFF has now been triggered on final port + print >> sys.stderr, "Verifying XOFF has now been triggered on final iteration" + sys.stderr.flush() + time.sleep(8) + recv_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id])[0] + xoff_txd = recv_counters[self.pg_cntr_indices[i]] - recv_counters_bases[i][self.pg_cntr_indices[i]] + assert xoff_txd > 0, "Failed to trigger XOFF on final iteration" + + # Verify no ingress/egress drops for all ports + recv_counters_list = [sai_thrift_read_port_counters(self.client, port_list[sid])[0] for sid in self.src_port_ids] + xmit_counters_list = [sai_thrift_read_port_counters(self.client, port_list[sid])[0] for sid in self.dst_port_ids] + for i in range(len(self.src_port_ids)): + for cntr in self.ingress_counters: + drops = recv_counters_list[i][cntr] - recv_counters_bases[i][cntr] + assert drops == 0, "Detected %d ingress drops" % drops + for cntr in self.egress_counters: + drops = xmit_counters_list[i][cntr] - xmit_counters_bases[i][cntr] + assert drops == 0, "Detected %d egress drops" % drops + + finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=self.stats) + sai_thrift_port_tx_enable(self.client, self.asic_type, uniq_dst_ports) + # TODO: remove sai_thrift_clear_all_counters and change to use incremental counter values class DscpEcnSend(sai_base_test.ThriftInterfaceDataPlane): def runTest(self): switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) # Parse input parameters dscp = int(self.test_params['dscp']) @@ -1473,6 +2489,7 @@ def runTest(self): assert (port_counters[cntr] == 0) finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) # RELEASE PORT sched_prof_id=sai_thrift_create_scheduler_profile(self.client,RELEASE_PORT_MAX_RATE) attr_value = sai_thrift_attribute_value_t(oid=sched_prof_id) @@ -1483,6 +2500,7 @@ def runTest(self): class WRRtest(sai_base_test.ThriftInterfaceDataPlane): def runTest(self): switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) # Parse input parameters ecn = int(self.test_params['ecn']) @@ -1494,30 +2512,62 @@ def runTest(self): src_port_ip = self.test_params['src_port_ip'] src_port_vlan = self.test_params['src_port_vlan'] src_port_mac = self.dataplane.get_mac(0, src_port_id) - print >> sys.stderr, "dst_port_id: %d, src_port_id: %d" % (dst_port_id, src_port_id) + qos_remap_enable = bool(self.test_params.get('qos_remap_enable', False)) + print >> sys.stderr, "dst_port_id: %d, src_port_id: %d qos_remap_enable: %d" % (dst_port_id, src_port_id, qos_remap_enable) print >> sys.stderr, "dst_port_mac: %s, src_port_mac: %s, src_port_ip: %s, dst_port_ip: %s" % (dst_port_mac, src_port_mac, src_port_ip, dst_port_ip) + asic_type = self.test_params['sonic_asic_type'] default_packet_length = 1500 exp_ip_id = 110 - queue_0_num_of_pkts = int(self.test_params['q0_num_of_pkts']) - queue_1_num_of_pkts = int(self.test_params['q1_num_of_pkts']) - queue_2_num_of_pkts = int(self.test_params['q2_num_of_pkts']) - queue_3_num_of_pkts = int(self.test_params['q3_num_of_pkts']) - queue_4_num_of_pkts = int(self.test_params['q4_num_of_pkts']) - queue_5_num_of_pkts = int(self.test_params['q5_num_of_pkts']) - queue_6_num_of_pkts = int(self.test_params['q6_num_of_pkts']) + queue_0_num_of_pkts = int(self.test_params.get('q0_num_of_pkts', 0)) + queue_1_num_of_pkts = int(self.test_params.get('q1_num_of_pkts', 0)) + queue_2_num_of_pkts = int(self.test_params.get('q2_num_of_pkts', 0)) + queue_3_num_of_pkts = int(self.test_params.get('q3_num_of_pkts', 0)) + queue_4_num_of_pkts = int(self.test_params.get('q4_num_of_pkts', 0)) + queue_5_num_of_pkts = int(self.test_params.get('q5_num_of_pkts', 0)) + queue_6_num_of_pkts = int(self.test_params.get('q6_num_of_pkts', 0)) + queue_7_num_of_pkts = int(self.test_params.get('q7_num_of_pkts', 0)) limit = int(self.test_params['limit']) pkts_num_leak_out = int(self.test_params['pkts_num_leak_out']) topo = self.test_params['topo'] if 'backend' not in topo: - prio_list = [3, 4, 8, 0, 5, 46, 48] + if not qos_remap_enable: + # When qos_remap is disabled, the map is as below + # DSCP TC QUEUE + # 3 3 3 + # 4 4 4 + # 8 0 0 + # 0 1 1 + # 5 2 2 + # 46 5 5 + # 48 6 6 + prio_list = [3, 4, 8, 0, 5, 46, 48] + q_pkt_cnt = [queue_3_num_of_pkts, queue_4_num_of_pkts, queue_0_num_of_pkts, queue_1_num_of_pkts, queue_2_num_of_pkts, queue_5_num_of_pkts, queue_6_num_of_pkts] + else: + # When qos_remap is enabled, the map is as below + # DSCP TC QUEUE + # 3 3 3 + # 4 4 4 + # 8 0 0 + # 0 1 1 + # 46 5 5 + # 48 7 7 + prio_list = [3, 4, 8, 0, 46, 48] + q_pkt_cnt = [queue_3_num_of_pkts, queue_4_num_of_pkts, queue_0_num_of_pkts, queue_1_num_of_pkts, queue_5_num_of_pkts, queue_7_num_of_pkts] else: prio_list = [3, 4, 1, 0, 2, 5, 6] - q_pkt_cnt = [queue_3_num_of_pkts, queue_4_num_of_pkts, queue_1_num_of_pkts, queue_0_num_of_pkts, queue_2_num_of_pkts, queue_5_num_of_pkts, queue_6_num_of_pkts] - + q_pkt_cnt = [queue_3_num_of_pkts, queue_4_num_of_pkts, queue_1_num_of_pkts, queue_0_num_of_pkts, queue_2_num_of_pkts, queue_5_num_of_pkts, queue_6_num_of_pkts] + q_cnt_sum = sum(q_pkt_cnt) # Send packets to leak out pkt_dst_mac = router_mac if router_mac != '' else dst_port_mac + + is_dualtor = self.test_params.get('is_dualtor', False) + def_vlan_mac = self.test_params.get('def_vlan_mac', None) + if is_dualtor and def_vlan_mac != None: + sys.stderr.write("Since it's dual-TOR testbed, modify pkt_dst_mac from {} to {}\n".format(pkt_dst_mac, def_vlan_mac)) + pkt_dst_mac = def_vlan_mac + pkt = construct_ip_pkt(64, pkt_dst_mac, src_port_mac, @@ -1528,15 +2578,18 @@ def runTest(self): ttl=64) print >> sys.stderr, "dst_port_id: %d, src_port_id: %d, src_port_vlan: %s" % (dst_port_id, src_port_id, src_port_vlan) - # in case dst_port_id is part of LAG, find out the actual dst port - # for given IP parameters - dst_port_id = get_rx_port( - self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip, src_port_vlan - ) + try: + # in case dst_port_id is part of LAG, find out the actual dst port + # for given IP parameters + dst_port_id = get_rx_port( + self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip, src_port_vlan + ) + except: + show_stats(self.__class__.__name__ + ' no rx pkt', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + raise print >> sys.stderr, "actual dst_port_id: {}".format(dst_port_id) sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) - send_packet(self, src_port_id, pkt, pkts_num_leak_out) # Get a snapshot of counter values @@ -1604,7 +2657,7 @@ def runTest(self): queue_pkt_counters[dscp_of_pkt] += 1 if queue_pkt_counters[dscp_of_pkt] == queue_num_of_pkts[dscp_of_pkt]: - diff_list.append((dscp_of_pkt, (queue_0_num_of_pkts + queue_1_num_of_pkts + queue_2_num_of_pkts + queue_3_num_of_pkts + queue_4_num_of_pkts + queue_5_num_of_pkts + queue_6_num_of_pkts) - total_pkts)) + diff_list.append((dscp_of_pkt, q_cnt_sum - total_pkts)) print >> sys.stderr, queue_pkt_counters @@ -1620,11 +2673,14 @@ def runTest(self): print >> sys.stderr, map(operator.sub, queue_counters, queue_counters_base) # All packets sent should be received intact - assert(queue_0_num_of_pkts + queue_1_num_of_pkts + queue_2_num_of_pkts + queue_3_num_of_pkts + queue_4_num_of_pkts + queue_5_num_of_pkts + queue_6_num_of_pkts == total_pkts) + assert q_cnt_sum >= total_pkts, "Did not receive all packets that were sent." + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + class LossyQueueTest(sai_base_test.ThriftInterfaceDataPlane): def runTest(self): switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) # Parse input parameters dscp = int(self.test_params['dscp']) @@ -1678,12 +2734,17 @@ def runTest(self): src_port_vlan, ecn=ecn, ttl=ttl) + print >> sys.stderr, "dst_port_id: %d, src_port_id: %d src_port_vlan: %s" % (dst_port_id, src_port_id, src_port_vlan) - # in case dst_port_id is part of LAG, find out the actual dst port - # for given IP parameters - dst_port_id = get_rx_port( - self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip, src_port_vlan - ) + try: + # in case dst_port_id is part of LAG, find out the actual dst port + # for given IP parameters + dst_port_id = get_rx_port( + self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip, src_port_vlan + ) + except: + show_stats(self.__class__.__name__ + ' no rx pkt', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + raise print >> sys.stderr, "actual dst_port_id: %d" % (dst_port_id) # get a snapshot of counter values at recv and transmit ports @@ -1705,11 +2766,14 @@ def runTest(self): sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) try: - # Since there is variability in packet leakout in hwsku Arista-7050CX3-32S-D48C8 and - # Arista-7050CX3-32S-C32. Starting with zero pkts_num_leak_out and trying to find + # Since there is variability in packet leakout in hwsku Arista-7050CX3-32S-D48C8 and + # Arista-7050CX3-32S-C32. Starting with zero pkts_num_leak_out and trying to find # actual leakout by sending packets and reading actual leakout from HW if hwsku == 'Arista-7050CX3-32S-D48C8' or hwsku == 'Arista-7050CX3-32S-C32' or hwsku == 'DellEMC-Z9332f-O32' or hwsku == 'DellEMC-Z9332f-M-O16C64': - pkts_num_leak_out = 0 + pkts_num_leak_out = 0 + + if asic_type == 'cisco-8000': + fill_leakout_plus_one(self, src_port_id, dst_port_id, pkt, int(self.test_params['pg']), asic_type) # send packets short of triggering egress drop if hwsku == 'DellEMC-Z9332f-O32' or hwsku == 'DellEMC-Z9332f-M-O16C64': @@ -1722,7 +2786,7 @@ def runTest(self): if hwsku == 'Arista-7050CX3-32S-D48C8' or hwsku == 'Arista-7050CX3-32S-C32' or hwsku == 'DellEMC-Z9332f-O32' or hwsku == 'DellEMC-Z9332f-M-O16C64': xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) actual_pkts_num_leak_out = xmit_counters[TRANSMITTED_PKTS] - xmit_counters_base[TRANSMITTED_PKTS] - send_packet(self, src_port_id, pkt, actual_pkts_num_leak_out) + send_packet(self, src_port_id, pkt, actual_pkts_num_leak_out) # allow enough time for the dut to sync up the counter values in counters_db time.sleep(8) @@ -1757,13 +2821,150 @@ def runTest(self): assert(xmit_counters[cntr] > xmit_counters_base[cntr]) finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) + +class LossyQueueVoqTest(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) + + # Parse input parameters + dscp = int(self.test_params['dscp']) + ecn = int(self.test_params['ecn']) + # The pfc counter index starts from index 2 in sai_thrift_read_port_counters + pg = int(self.test_params['pg']) + 2 + sonic_version = self.test_params['sonic_version'] + router_mac = self.test_params['router_mac'] + dst_port_id = int(self.test_params['dst_port_id']) + dst_port_ip = self.test_params['dst_port_ip'] + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + src_port_id = int(self.test_params['src_port_id']) + src_port_ip = self.test_params['src_port_ip'] + src_port_mac = self.dataplane.get_mac(0, src_port_id) + asic_type = self.test_params['sonic_asic_type'] + + # get counter names to query + ingress_counters, egress_counters = get_counter_names(sonic_version) + + # prepare tcp packet data + ttl = 64 + + pkts_num_leak_out = int(self.test_params['pkts_num_leak_out']) + pkts_num_trig_egr_drp = int(self.test_params['pkts_num_trig_egr_drp']) + if 'packet_size' in self.test_params.keys(): + packet_length = int(self.test_params['packet_size']) + cell_size = int(self.test_params['cell_size']) + if packet_length != 64: + cell_occupancy = (packet_length + cell_size - 1) / cell_size + pkts_num_trig_egr_drp /= cell_occupancy + else: + packet_length = 64 + + pkt_dst_mac = router_mac if router_mac != '' else dst_port_mac + # crafting 2 udp packets with different udp_dport in order for traffic to go through different flows + pkt = simple_udp_packet(pktlen=packet_length, + eth_dst=pkt_dst_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=((dscp << 2) | ecn), + udp_sport=1024, + udp_dport=2048, + ip_ecn=ecn, + ip_ttl=ttl) + + pkt2 = simple_udp_packet(pktlen=packet_length, + eth_dst=pkt_dst_mac, + eth_src=src_port_mac, + ip_src=src_port_ip, + ip_dst=dst_port_ip, + ip_tos=((dscp << 2) | ecn), + udp_sport=1024, + udp_dport=2049, + ip_ecn=ecn, + ip_ttl=ttl) + + print >> sys.stderr, "dst_port_id: %d, src_port_id: %d " % (dst_port_id, src_port_id) + try: + # in case dst_port_id is part of LAG, find out the actual dst port + # for given IP parameters + dst_port_id = get_rx_port( + self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip + ) + except: + show_stats(self.__class__.__name__ + ' no rx pkt', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + raise + print >> sys.stderr, "actual dst_port_id: %d" % (dst_port_id) + + # get a snapshot of counter values at recv and transmit ports + # queue_counters value is not of our interest here + recv_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters_base, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + # add slight tolerance in threshold characterization to consider + # the case that npu puts packets in the egress queue after we pause the egress + # or the leak out is simply less than expected as we have occasionally observed + if 'pkts_num_margin' in self.test_params.keys(): + margin = int(self.test_params['pkts_num_margin']) + else: + margin = 2 + + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) + + try: + if asic_type == 'cisco-8000': + fill_leakout_plus_one(self, src_port_id, dst_port_id, pkt, int(self.test_params['pg']), + asic_type) + # send packets short of triggering egress drop on flow1 and flow2 + send_packet(self, src_port_id, pkt, pkts_num_leak_out + pkts_num_trig_egr_drp - 1 - margin) + send_packet(self, src_port_id, pkt2, pkts_num_leak_out + pkts_num_trig_egr_drp - 1 - margin) + + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + # get a snapshot of counter values at recv and transmit ports + # queue counters value is not of our interest here + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + # recv port no pfc + assert(recv_counters[pg] == recv_counters_base[pg]) + # recv port no ingress drop + for cntr in ingress_counters: + assert(recv_counters[cntr] == recv_counters_base[cntr]) + # xmit port no egress drop + for cntr in egress_counters: + assert(xmit_counters[cntr] == xmit_counters_base[cntr]) + + # send 1 packet to trigger egress drop + send_packet(self, src_port_id, pkt, 1 + 2 * margin) + send_packet(self, src_port_id, pkt2, 1 + 2 * margin) + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + # get a snapshot of counter values at recv and transmit ports + # queue counters value is not of our interest here + recv_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + # recv port no pfc + assert(recv_counters[pg] == recv_counters_base[pg]) + # recv port no ingress drop + for cntr in ingress_counters: + assert(recv_counters[cntr] == recv_counters_base[cntr]) + # xmit port egress drop + for cntr in egress_counters: + assert(xmit_counters[cntr] > xmit_counters_base[cntr]) + + finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) + + # pg shared pool applied to both lossy and lossless traffic class PGSharedWatermarkTest(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): time.sleep(5) switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) # Parse input parameters dscp = int(self.test_params['dscp']) @@ -1771,6 +2972,8 @@ def runTest(self): router_mac = self.test_params['router_mac'] print >> sys.stderr, "router_mac: %s" % (router_mac) pg = int(self.test_params['pg']) + ingress_counters, egress_counters = get_counter_names(self.test_params['sonic_version']) + dst_port_id = int(self.test_params['dst_port_id']) dst_port_ip = self.test_params['dst_port_ip'] dst_port_mac = self.dataplane.get_mac(0, dst_port_id) @@ -1807,11 +3010,15 @@ def runTest(self): ttl=ttl) print >> sys.stderr, "dst_port_id: %d, src_port_id: %d src_port_vlan: %s" % (dst_port_id, src_port_id, src_port_vlan) - # in case dst_port_id is part of LAG, find out the actual dst port - # for given IP parameters - dst_port_id = get_rx_port( - self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip, src_port_vlan - ) + try: + # in case dst_port_id is part of LAG, find out the actual dst port + # for given IP parameters + dst_port_id = get_rx_port( + self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip, src_port_vlan + ) + except: + show_stats(self.__class__.__name__ + ' no rx pkt', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + raise print >> sys.stderr, "actual dst_port_id: %d" % (dst_port_id) # Add slight tolerance in threshold characterization to consider @@ -1823,41 +3030,68 @@ def runTest(self): margin = int(self.test_params['pkts_num_margin']) if self.test_params.get("pkts_num_margin") else 2 # Get a snapshot of counter values - xmit_counters_base, queue_counters_base = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + recv_counters_base, _ = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters_base, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) # For TH3, some packets stay in egress memory and doesn't show up in shared buffer or leakout if 'pkts_num_egr_mem' in self.test_params.keys(): pkts_num_egr_mem = int(self.test_params['pkts_num_egr_mem']) sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) + pg_cntrs_base = sai_thrift_read_pg_counters(self.client, port_list[src_port_id]) + dst_pg_cntrs_base = sai_thrift_read_pg_counters(self.client, port_list[dst_port_id]) + pg_shared_wm_res_base = sai_thrift_read_pg_shared_watermark(self.client, asic_type, port_list[src_port_id]) + dst_pg_shared_wm_res_base = sai_thrift_read_pg_shared_watermark(self.client, asic_type, port_list[dst_port_id]) # send packets try: - # Since there is variability in packet leakout in hwsku Arista-7050CX3-32S-D48C8 and - # Arista-7050CX3-32S-C32. Starting with zero pkts_num_leak_out and trying to find + # Since there is variability in packet leakout in hwsku Arista-7050CX3-32S-D48C8 and + # Arista-7050CX3-32S-C32. Starting with zero pkts_num_leak_out and trying to find # actual leakout by sending packets and reading actual leakout from HW - if hwsku == 'Arista-7050CX3-32S-D48C8' or hwsku == 'Arista-7050CX3-32S-C32': - pkts_num_leak_out = pkts_num_leak_out - margin + if check_leackout_compensation_support(asic_type, hwsku): + pkts_num_leak_out = 0 + + xmit_counters_history, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + pg_min_pkts_num = 0 # send packets to fill pg min but not trek into shared pool # so if pg min is zero, it directly treks into shared pool by 1 # this is the case for lossy traffic if hwsku == 'DellEMC-Z9332f-O32' or hwsku == 'DellEMC-Z9332f-M-O16C64': - send_packet(self, src_port_id, pkt, pkts_num_egr_mem + pkts_num_leak_out + pkts_num_fill_min + margin) + pg_min_pkts_num = pkts_num_egr_mem + pkts_num_leak_out + pkts_num_fill_min + margin + send_packet(self, src_port_id, pkt, pg_min_pkts_num) + elif 'cisco-8000' in asic_type: + fill_leakout_plus_one(self, src_port_id, dst_port_id, pkt, pg, asic_type) else: - send_packet(self, src_port_id, pkt, pkts_num_leak_out + pkts_num_fill_min) + pg_min_pkts_num = pkts_num_leak_out + pkts_num_fill_min + send_packet(self, src_port_id, pkt, pg_min_pkts_num) # allow enough time for the dut to sync up the counter values in counters_db time.sleep(8) - if hwsku == 'Arista-7050CX3-32S-D48C8' or hwsku == 'Arista-7050CX3-32S-C32': - xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) - actual_pkts_num_leak_out = xmit_counters[TRANSMITTED_PKTS] - xmit_counters_base[TRANSMITTED_PKTS] - if actual_pkts_num_leak_out > pkts_num_leak_out: - send_packet(self, src_port_id, pkt, actual_pkts_num_leak_out - pkts_num_leak_out) + if pg_min_pkts_num > 0 and check_leackout_compensation_support(asic_type, hwsku): + dynamically_compensate_leakout(self.client, sai_thrift_read_port_counters, port_list[dst_port_id], TRANSMITTED_PKTS, xmit_counters_history, self, src_port_id, pkt, 40) - pg_shared_wm_res = sai_thrift_read_pg_shared_watermark(self.client, port_list[src_port_id]) - print >> sys.stderr, "Init pkts num sent: %d, min: %d, actual watermark value to start: %d" % ((pkts_num_leak_out + pkts_num_fill_min), pkts_num_fill_min, pg_shared_wm_res[pg]) + pg_cntrs = sai_thrift_read_pg_counters(self.client, port_list[src_port_id]) + pg_shared_wm_res = sai_thrift_read_pg_shared_watermark(self.client, asic_type, port_list[src_port_id]) + print >> sys.stderr, "Received packets: %d" % (pg_cntrs[pg] - pg_cntrs_base[pg]) + print >> sys.stderr, "Init pkts num sent: %d, min: %d, actual watermark value to start: %d" % (pg_min_pkts_num, pkts_num_fill_min, pg_shared_wm_res[pg]) + + show_counter('PortCnt', self, asic_type, [src_port_id, dst_port_id], + base=[recv_counters_base, xmit_counters_base], + indexes=[pg + 2] + ingress_counters + egress_counters + + [TRANSMITTED_PKTS, RECEIVED_PKTS, RECEIVED_NON_UC_PKTS, TRANSMITTED_NON_UC_PKTS, EGRESS_PORT_QLEN], + banner='Filled PG min, srcport {}, dstport {}, base is first step'.format(src_port_id, dst_port_id)) + + show_counter('PgCnt', self, asic_type, [src_port_id, dst_port_id], + current=[pg_cntrs, sai_thrift_read_pg_counters(self.client, port_list[dst_port_id])], + base=[pg_cntrs_base, dst_pg_cntrs_base], indexes=[pg], + banner='Filled PG min, srcport {}, dstport {}, base is first step'.format(src_port_id, dst_port_id)) + + show_counter('PgShareWm', self, asic_type, [src_port_id, dst_port_id], + current=[pg_shared_wm_res, sai_thrift_read_pg_shared_watermark(self.client, asic_type, port_list[dst_port_id])], + base=[pg_shared_wm_res_base, dst_pg_shared_wm_res_base], indexes=[pg], + banner='Filled PG min, srcport {}, dstport {}, base is first step'.format(src_port_id, dst_port_id)) if pkts_num_fill_min: assert(pg_shared_wm_res[pg] == 0) @@ -1874,7 +3108,12 @@ def runTest(self): expected_wm = 0 total_shared = pkts_num_fill_shared - pkts_num_fill_min pkts_inc = (total_shared / cell_occupancy) >> 2 - pkts_num = 1 + margin + if 'cisco-8000' in asic_type: + # No additional packet margin needed while sending, + # but small margin still needed during boundary checks below + pkts_num = 1 + else: + pkts_num = 1 + margin fragment = 0 while (expected_wm < total_shared - fragment): expected_wm += pkts_num * cell_occupancy @@ -1887,25 +3126,72 @@ def runTest(self): send_packet(self, src_port_id, pkt, pkts_num) time.sleep(8) + + if pg_min_pkts_num == 0 and pkts_num <= 1 + margin and check_leackout_compensation_support(asic_type, hwsku): + dynamically_compensate_leakout(self.client, sai_thrift_read_port_counters, port_list[dst_port_id], TRANSMITTED_PKTS, xmit_counters_history, self, src_port_id, pkt, 40) + # these counters are clear on read, ensure counter polling # is disabled before the test - pg_shared_wm_res = sai_thrift_read_pg_shared_watermark(self.client, port_list[src_port_id]) + pg_shared_wm_res = sai_thrift_read_pg_shared_watermark(self.client, asic_type, port_list[src_port_id]) + pg_cntrs = sai_thrift_read_pg_counters(self.client, port_list[src_port_id]) + print >> sys.stderr, "Received packets: %d" % (pg_cntrs[pg] - pg_cntrs_base[pg]) print >> sys.stderr, "lower bound: %d, actual value: %d, upper bound (+%d): %d" % (expected_wm * cell_size, pg_shared_wm_res[pg], margin, (expected_wm + margin) * cell_size) - assert(pg_shared_wm_res[pg] <= (expected_wm + margin) * cell_size) - assert(expected_wm * cell_size <= pg_shared_wm_res[pg]) + + show_counter('PortCnt', self, asic_type, [src_port_id, dst_port_id], + base=[recv_counters_base, xmit_counters_base], + indexes=[pg + 2] + ingress_counters + egress_counters + + [TRANSMITTED_PKTS, RECEIVED_PKTS, RECEIVED_NON_UC_PKTS, TRANSMITTED_NON_UC_PKTS, EGRESS_PORT_QLEN], + banner='To fill PG share pool, send {} pkt, srcport {}, dstport {}, base is first step'.format( + pkts_num, src_port_id, dst_port_id)) + + show_counter('PgCnt', self, asic_type, [src_port_id, dst_port_id], + current=[pg_cntrs, sai_thrift_read_pg_counters(self.client, port_list[dst_port_id])], + base=[pg_cntrs_base, dst_pg_cntrs_base], indexes=[pg], + banner='To fill PG share pool, send {} pkt, srcport {}, dstport {}, base is first step'.format( + pkts_num, src_port_id, dst_port_id)) + + show_counter('PgShareWm', self, asic_type, [src_port_id, dst_port_id], + current=[pg_shared_wm_res, sai_thrift_read_pg_shared_watermark(self.client, asic_type, port_list[dst_port_id])], + base=[pg_shared_wm_res_base, dst_pg_shared_wm_res_base], indexes=[pg], + banner='To fill PG share pool, send {} pkt, srcport {}, dstport {}, base is first step'.format( + pkts_num, src_port_id, dst_port_id)) + + assert(expected_wm * cell_size <= pg_shared_wm_res[pg] <= (expected_wm + margin) * cell_size) pkts_num = pkts_inc # overflow the shared pool send_packet(self, src_port_id, pkt, pkts_num) time.sleep(8) - pg_shared_wm_res = sai_thrift_read_pg_shared_watermark(self.client, port_list[src_port_id]) + pg_shared_wm_res = sai_thrift_read_pg_shared_watermark(self.client, asic_type, port_list[src_port_id]) + pg_cntrs = sai_thrift_read_pg_counters(self.client, port_list[src_port_id]) + print >> sys.stderr, "Received packets: %d" % (pg_cntrs[pg] - pg_cntrs_base[pg]) print >> sys.stderr, "exceeded pkts num sent: %d, expected watermark: %d, actual value: %d" % (pkts_num, ((expected_wm + cell_occupancy) * cell_size), pg_shared_wm_res[pg]) + + show_counter('PortCnt', self, asic_type, [src_port_id, dst_port_id], + base=[recv_counters_base, xmit_counters_base], + indexes=[pg + 2] + ingress_counters + egress_counters + + [TRANSMITTED_PKTS, RECEIVED_PKTS, RECEIVED_NON_UC_PKTS, TRANSMITTED_NON_UC_PKTS, EGRESS_PORT_QLEN], + banner='To overflow PG share pool, send {} pkt, srcport {}, dstport {}, base is first step'.format( + pkts_num, src_port_id, dst_port_id)) + + show_counter('PgCnt', self, asic_type, [src_port_id, dst_port_id], + current=[pg_cntrs, sai_thrift_read_pg_counters(self.client, port_list[dst_port_id])], + base=[pg_cntrs_base, dst_pg_cntrs_base], indexes=[pg], + banner='To overflow PG share pool, send {} pkt, srcport {}, dstport {}, base is first step'.format( + pkts_num, src_port_id, dst_port_id)) + + show_counter('PgShareWm', self, asic_type, [src_port_id, dst_port_id], + current=[pg_shared_wm_res, sai_thrift_read_pg_shared_watermark(self.client, asic_type, port_list[dst_port_id])], + base=[pg_shared_wm_res_base, dst_pg_shared_wm_res_base], indexes=[pg], + banner='To overflow PG share pool, send {} pkt, srcport {}, dstport {}, base is first step'.format( + pkts_num, src_port_id, dst_port_id)) + assert(fragment < cell_occupancy) - assert(expected_wm * cell_size <= pg_shared_wm_res[pg]) - assert(pg_shared_wm_res[pg] <= (expected_wm + margin + cell_occupancy) * cell_size) + assert(expected_wm * cell_size <= pg_shared_wm_res[pg] <= (expected_wm + margin + cell_occupancy) * cell_size) finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) # pg headroom is a notion for lossless traffic only @@ -1913,6 +3199,7 @@ class PGHeadroomWatermarkTest(sai_base_test.ThriftInterfaceDataPlane): def runTest(self): time.sleep(5) switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) # Parse input parameters dscp = int(self.test_params['dscp']) @@ -1939,6 +3226,10 @@ def runTest(self): ttl = 64 default_packet_length = 64 pkt_dst_mac = router_mac if router_mac != '' else dst_port_mac + is_dualtor = self.test_params.get('is_dualtor', False) + def_vlan_mac = self.test_params.get('def_vlan_mac', None) + if is_dualtor and def_vlan_mac != None: + pkt_dst_mac = def_vlan_mac pkt = construct_ip_pkt(default_packet_length, pkt_dst_mac, src_port_mac, @@ -1950,11 +3241,15 @@ def runTest(self): ttl=ttl) print >> sys.stderr, "dst_port_id: %d, src_port_id: %d, src_port_vlan: %s" % (dst_port_id, src_port_id, src_port_vlan) - # in case dst_port_id is part of LAG, find out the actual dst port - # for given IP parameters - dst_port_id = get_rx_port( - self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip, src_port_vlan - ) + try: + # in case dst_port_id is part of LAG, find out the actual dst port + # for given IP parameters + dst_port_id = get_rx_port( + self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip, src_port_vlan + ) + except: + show_stats(self.__class__.__name__ + ' no rx pkt', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + raise print >> sys.stderr, "actual dst_port_id: %d" % (dst_port_id) @@ -1972,8 +3267,15 @@ def runTest(self): sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) + xmit_counters_base, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + # send packets try: + # Starting with zero pkts_num_leak_out and trying to find + # actual leakout by sending packets and reading actual leakout from HW. + if check_leackout_compensation_support(asic_type, hwsku): + pkts_num_leak_out = 0 + # send packets to trigger pfc but not trek into headroom if hwsku == 'DellEMC-Z9332f-O32' or hwsku == 'DellEMC-Z9332f-M-O16C64': send_packet(self, src_port_id, pkt, pkts_num_egr_mem + pkts_num_leak_out + pkts_num_trig_pfc - margin) @@ -1981,6 +3283,10 @@ def runTest(self): send_packet(self, src_port_id, pkt, pkts_num_leak_out + pkts_num_trig_pfc - margin) time.sleep(8) + + if check_leackout_compensation_support(asic_type, hwsku): + dynamically_compensate_leakout(self.client, sai_thrift_read_port_counters, port_list[dst_port_id], TRANSMITTED_PKTS, xmit_counters_base, self, src_port_id, pkt, 30) + q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[src_port_id]) assert(pg_headroom_wm_res[pg] == 0) @@ -2021,14 +3327,112 @@ def runTest(self): assert((expected_wm - margin) * cell_size <= pg_headroom_wm_res[pg]) finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) + +class PGDropTest(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): + time.sleep(5) + switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) + + # Parse input parameters + dscp = int(self.test_params['dscp']) + ecn = int(self.test_params['ecn']) + router_mac = self.test_params['router_mac'] + pg = int(self.test_params['pg']) + queue = int(self.test_params['queue']) + dst_port_id = int(self.test_params['dst_port_id']) + dst_port_ip = self.test_params['dst_port_ip'] + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + src_port_id = int(self.test_params['src_port_id']) + src_port_ip = self.test_params['src_port_ip'] + src_port_vlan = self.test_params['src_port_vlan'] + src_port_mac = self.dataplane.get_mac(0, src_port_id) + asic_type = self.test_params['sonic_asic_type'] + pkts_num_trig_pfc = int(self.test_params['pkts_num_trig_pfc']) + # Should be set to cause at least 1 drop at ingress + pkts_num_trig_ingr_drp = int(self.test_params['pkts_num_trig_ingr_drp']) + iterations = int(self.test_params['iterations']) + margin = int(self.test_params['pkts_num_margin']) + + pkt_dst_mac = router_mac if router_mac != '' else dst_port_mac + try: + dst_port_id = get_rx_port( + self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip + ) + except: + show_stats(self.__class__.__name__ + ' no rx pkt', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + raise + # Prepare IP packet data + ttl = 64 + packet_length = 64 + pkt = construct_ip_pkt(packet_length, + pkt_dst_mac, + src_port_mac, + src_port_ip, + dst_port_ip, + dscp, + src_port_vlan, + ecn=ecn, + ttl=ttl) + + print >> sys.stderr, "test dst_port_id: {}, src_port_id: {}, src_vlan: {}".format( + dst_port_id, src_port_id, src_port_vlan + ) + + try: + pass_iterations = 0 + assert iterations > 0, "Need at least 1 iteration" + for test_i in range(iterations): + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) + + pg_dropped_cntrs_base = sai_thrift_read_pg_drop_counters(self.client, port_list[src_port_id]) + + # Send packets to trigger PFC + print >> sys.stderr, "Iteration {}/{}, sending {} packets to trigger PFC".format(test_i + 1, iterations, pkts_num_trig_pfc) + send_packet(self, src_port_id, pkt, pkts_num_trig_pfc) + + # Account for leakout + if 'cisco-8000' in asic_type: + queue_counters = sai_thrift_read_queue_occupancy(self.client, dst_port_id) + occ_pkts = queue_counters[queue] / (packet_length + 24) + leaked_pkts = pkts_num_trig_pfc - occ_pkts + print >> sys.stderr, "resending leaked packets {}".format(leaked_pkts) + send_packet(self, src_port_id, pkt, leaked_pkts) + + # Trigger drop + pkt_inc = pkts_num_trig_ingr_drp + margin - pkts_num_trig_pfc + print >> sys.stderr, "sending {} additional packets to trigger ingress drop".format(pkt_inc) + send_packet(self, src_port_id, pkt, pkt_inc) + + pg_dropped_cntrs = sai_thrift_read_pg_drop_counters(self.client, port_list[src_port_id]) + pg_drops = pg_dropped_cntrs[pg] - pg_dropped_cntrs_base[pg] + + actual_num_trig_ingr_drp = pkts_num_trig_ingr_drp + margin - (pg_drops - 1) + ingr_drop_diff = actual_num_trig_ingr_drp - pkts_num_trig_ingr_drp + if abs(ingr_drop_diff) < margin: + pass_iterations += 1 + print >> sys.stderr, "expected trig drop: {}, actual trig drop: {}, diff: {}".format(pkts_num_trig_ingr_drp, actual_num_trig_ingr_drp, ingr_drop_diff) + + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) + + print >> sys.stderr, "pass iterations: {}, total iterations: {}, margin: {}".format(pass_iterations, iterations, margin) + assert pass_iterations >= int(0.75 * iterations), "Passed iterations {} insufficient to meet minimum required iterations {}".format(pass_iterations, int(0.75 * iterations)) + + finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) class QSharedWatermarkTest(sai_base_test.ThriftInterfaceDataPlane): + def runTest(self): time.sleep(5) switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) # Parse input parameters + ingress_counters, egress_counters = get_counter_names(self.test_params['sonic_version']) dscp = int(self.test_params['dscp']) ecn = int(self.test_params['ecn']) router_mac = self.test_params['router_mac'] @@ -2059,6 +3463,10 @@ def runTest(self): # Prepare TCP packet data ttl = 64 pkt_dst_mac = router_mac if router_mac != '' else dst_port_mac + is_dualtor = self.test_params.get('is_dualtor', False) + def_vlan_mac = self.test_params.get('def_vlan_mac', None) + if is_dualtor and def_vlan_mac != None: + pkt_dst_mac = def_vlan_mac pkt = construct_ip_pkt(packet_length, pkt_dst_mac, src_port_mac, @@ -2070,11 +3478,15 @@ def runTest(self): ttl=ttl) print >> sys.stderr, "dst_port_id: %d, src_port_id: %d, src_port_vlan: %s" % (dst_port_id, src_port_id, src_port_vlan) - # in case dst_port_id is part of LAG, find out the actual dst port - # for given IP parameters - dst_port_id = get_rx_port( - self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip, src_port_vlan - ) + try: + # in case dst_port_id is part of LAG, find out the actual dst port + # for given IP parameters + dst_port_id = get_rx_port( + self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip, src_port_vlan + ) + except: + show_stats(self.__class__.__name__ + ' no rx pkt', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) + raise print >> sys.stderr, "actual dst_port_id: %d" % (dst_port_id) # Add slight tolerance in threshold characterization to consider @@ -2090,39 +3502,79 @@ def runTest(self): if 'pkts_num_egr_mem' in self.test_params.keys(): pkts_num_egr_mem = int(self.test_params['pkts_num_egr_mem']) - xmit_counters_base, queue_counters_base = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + recv_counters_base, _ = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + xmit_counters_base, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) + pg_cntrs_base = sai_thrift_read_pg_counters(self.client, port_list[src_port_id]) + dst_pg_cntrs_base = sai_thrift_read_pg_counters(self.client, port_list[dst_port_id]) + q_wm_res_base, pg_shared_wm_res_base, pg_headroom_wm_res_base = sai_thrift_read_port_watermarks(self.client, port_list[src_port_id]) + dst_q_wm_res_base, dst_pg_shared_wm_res_base, dst_pg_headroom_wm_res_base = sai_thrift_read_port_watermarks(self.client, port_list[dst_port_id]) # send packets try: - # Since there is variability in packet leakout in hwsku Arista-7050CX3-32S-D48C8 and - # Arista-7050CX3-32S-C32. Starting with zero pkts_num_leak_out and trying to find + # Since there is variability in packet leakout in hwsku Arista-7050CX3-32S-D48C8 and + # Arista-7050CX3-32S-C32. Starting with zero pkts_num_leak_out and trying to find # actual leakout by sending packets and reading actual leakout from HW - if hwsku == 'Arista-7050CX3-32S-D48C8' or hwsku == 'Arista-7050CX3-32S-C32' or hwsku == 'DellEMC-Z9332f-O32' or hwsku == 'DellEMC-Z9332f-M-O16C64': - pkts_num_leak_out = pkts_num_leak_out - margin + if check_leackout_compensation_support(asic_type, hwsku): + pkts_num_leak_out = 0 + + xmit_counters_history, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + que_min_pkts_num = 0 # send packets to fill queue min but not trek into shared pool # so if queue min is zero, it will directly trek into shared pool by 1 # TH2 uses scheduler-based TX enable, this does not require sending packets # to leak out if hwsku == 'DellEMC-Z9332f-O32' or hwsku == 'DellEMC-Z9332f-M-O16C64': - send_packet(self, src_port_id, pkt, pkts_num_egr_mem + pkts_num_leak_out + pkts_num_fill_min) + que_min_pkts_num = pkts_num_egr_mem + pkts_num_leak_out + pkts_num_fill_min + send_packet(self, src_port_id, pkt, que_min_pkts_num) else: - send_packet(self, src_port_id, pkt, pkts_num_leak_out + pkts_num_fill_min) + que_min_pkts_num = pkts_num_leak_out + pkts_num_fill_min + send_packet(self, src_port_id, pkt, que_min_pkts_num) # allow enough time for the dut to sync up the counter values in counters_db time.sleep(8) - if hwsku == 'Arista-7050CX3-32S-D48C8' or hwsku == 'Arista-7050CX3-32S-C32' or hwsku == 'DellEMC-Z9332f-O32' or hwsku == 'DellEMC-Z9332f-M-O16C64': - xmit_counters, queue_counters = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) - actual_pkts_num_leak_out = xmit_counters[TRANSMITTED_PKTS] - xmit_counters_base[TRANSMITTED_PKTS] - if actual_pkts_num_leak_out > pkts_num_leak_out: - send_packet(self, src_port_id, pkt, actual_pkts_num_leak_out - pkts_num_leak_out) + if que_min_pkts_num > 0 and check_leackout_compensation_support(asic_type, hwsku): + dynamically_compensate_leakout(self.client, sai_thrift_read_port_counters, port_list[dst_port_id], TRANSMITTED_PKTS, xmit_counters_history, self, src_port_id, pkt, 40) q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[dst_port_id]) - print >> sys.stderr, "Init pkts num sent: %d, min: %d, actual watermark value to start: %d" % ((pkts_num_leak_out + pkts_num_fill_min), pkts_num_fill_min, q_wm_res[queue]) + pg_cntrs = sai_thrift_read_pg_counters(self.client, port_list[src_port_id]) + print >> sys.stderr, "Init pkts num sent: %d, min: %d, actual watermark value to start: %d" % ((que_min_pkts_num), pkts_num_fill_min, q_wm_res[queue]) + print >> sys.stderr, "Received packets: %d" % (pg_cntrs[queue] - pg_cntrs_base[queue]) + + ptf_cnt_prev, _ = show_counter('PtfCnt', self, asic_type, self.test_params.get('test_port_ids', None), + base=stats[0], banner='Filled queue min, base is previous step') + + port_cnt_prev, _ = show_counter('PortCnt', self, asic_type, [src_port_id, dst_port_id], + base=[recv_counters_base, xmit_counters_base], + indexes=[queue + 2] + ingress_counters + egress_counters + + [TRANSMITTED_PKTS, RECEIVED_PKTS, RECEIVED_NON_UC_PKTS, TRANSMITTED_NON_UC_PKTS, EGRESS_PORT_QLEN], + banner='Filled queue min, srcport {}, dstport {}, base is previous step'.format(src_port_id, dst_port_id)) + + pg_cnt_prev, _ = show_counter('PgCnt', self, asic_type, [src_port_id, dst_port_id], + current=[pg_cntrs, sai_thrift_read_pg_counters(self.client, port_list[dst_port_id])], + base=[pg_cntrs_base, dst_pg_cntrs_base], indexes=[queue], + banner='Filled queue min, srcport {}, dstport {}, base is previous step'.format(src_port_id, dst_port_id)) + + src_port_wm = sai_thrift_read_port_watermarks(self.client, port_list[src_port_id]) + + pg_share_wm_prev, _ = show_counter('PgShareWm', self, asic_type, [src_port_id, dst_port_id], + current=[src_port_wm[1], pg_shared_wm_res], base=[pg_shared_wm_res_base, dst_pg_shared_wm_res_base], indexes=[queue], + banner='Filled queue min, srcport {}, dstport {}, base is previous step'.format(src_port_id, dst_port_id)) + + pg_headroom_wm_prev, _ = show_counter('PgHeadroomWm', self, asic_type, [src_port_id, dst_port_id], + current=[src_port_wm[2], pg_headroom_wm_res], base=[pg_headroom_wm_res_base, dst_pg_headroom_wm_res_base], indexes=[queue], + banner='Filled queue min, srcport {}, dstport {}, base is previous step'.format(src_port_id, dst_port_id)) + + que_share_wm_prev, _ = show_counter('QueShareWm', self, asic_type, [src_port_id, dst_port_id], + current=[src_port_wm[0], q_wm_res], base=[q_wm_res_base, dst_q_wm_res_base], indexes=[queue], + banner='Filled queue min, srcport {}, dstport {}, base is previous step'.format(src_port_id, dst_port_id)) + if pkts_num_fill_min: assert(q_wm_res[queue] == 0) + elif 'cisco-8000' in asic_type: + assert(q_wm_res[queue] <= (margin + 1) * cell_size) else: assert(q_wm_res[queue] <= 1 * cell_size) @@ -2131,7 +3583,11 @@ def runTest(self): expected_wm = 0 total_shared = pkts_num_trig_drp - pkts_num_fill_min - 1 pkts_inc = (total_shared / cell_occupancy) >> 2 - pkts_num = 1 + margin + if 'cisco-8000' in asic_type: + pkts_total = 0 # track total desired queue fill level + pkts_num = 1 + else: + pkts_num = 1 + margin fragment = 0 while (expected_wm < total_shared - fragment): expected_wm += pkts_num * cell_occupancy @@ -2140,29 +3596,114 @@ def runTest(self): pkts_num -= diff expected_wm -= diff * cell_occupancy fragment = total_shared - expected_wm + + if 'cisco-8000' in asic_type: + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) + fill_leakout_plus_one(self, src_port_id, dst_port_id, pkt, queue, asic_type) + pkts_total += pkts_num + pkts_num = pkts_total - 1 + print >> sys.stderr, "pkts num to send: %d, total pkts: %d, queue shared: %d" % (pkts_num, expected_wm, total_shared) send_packet(self, src_port_id, pkt, pkts_num) + + if 'cisco-8000' in asic_type: + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) + time.sleep(8) + + if que_min_pkts_num == 0 and pkts_num <= 1 + margin and check_leackout_compensation_support(asic_type, hwsku): + dynamically_compensate_leakout(self.client, sai_thrift_read_port_counters, port_list[dst_port_id], TRANSMITTED_PKTS, xmit_counters_history, self, src_port_id, pkt, 40) + # these counters are clear on read, ensure counter polling # is disabled before the test q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[dst_port_id]) + pg_cntrs = sai_thrift_read_pg_counters(self.client, port_list[src_port_id]) + print >> sys.stderr, "Received packets: %d" % (pg_cntrs[queue] - pg_cntrs_base[queue]) print >> sys.stderr, "lower bound: %d, actual value: %d, upper bound: %d" % ((expected_wm - margin) * cell_size, q_wm_res[queue], (expected_wm + margin) * cell_size) - assert(q_wm_res[queue] <= (expected_wm + margin) * cell_size) - assert((expected_wm - margin) * cell_size <= q_wm_res[queue]) + + ptf_cnt_prev, _ = show_counter('PtfCnt', self, asic_type, self.test_params.get('test_port_ids', None), + base=ptf_cnt_prev, banner='Fill queue shared, base is previous step') + + port_cnt_prev, _ = show_counter('PortCnt', self, asic_type, [src_port_id, dst_port_id], base=port_cnt_prev, + indexes=[queue + 2] + ingress_counters + egress_counters + + [TRANSMITTED_PKTS, RECEIVED_PKTS, RECEIVED_NON_UC_PKTS, TRANSMITTED_NON_UC_PKTS, EGRESS_PORT_QLEN], + banner='Fill queue shared, srcport {}, dstport {}, base is previous step'.format(src_port_id, dst_port_id)) + + pg_cnt_prev, _ = show_counter('PgCnt', self, asic_type, [src_port_id, dst_port_id], + current=[pg_cntrs, sai_thrift_read_pg_counters(self.client, port_list[dst_port_id])], + base=pg_cnt_prev, indexes=[queue], + banner='Fill queue shared, srcport {}, dstport {}, base is previous step'.format(src_port_id, dst_port_id)) + + src_port_wm = sai_thrift_read_port_watermarks(self.client, port_list[src_port_id]) + + pg_share_wm_prev, _ = show_counter('PgShareWm', self, asic_type, [src_port_id, dst_port_id], + current=[src_port_wm[1], pg_shared_wm_res], base=pg_share_wm_prev, indexes=[queue], + banner='Fill queue shared, srcport {}, dstport {}, base is previous step'.format(src_port_id, dst_port_id)) + + pg_headroom_wm_prev, _ = show_counter('PgHeadroomWm', self, asic_type, [src_port_id, dst_port_id], + current=[src_port_wm[2], pg_headroom_wm_res], base=pg_headroom_wm_prev, indexes=[queue], + banner='Fill queue shared, srcport {}, dstport {}, base is previous step'.format(src_port_id, dst_port_id)) + + que_share_wm_prev, _ = show_counter('QueShareWm', self, asic_type, [src_port_id, dst_port_id], + current=[src_port_wm[0], q_wm_res], base=que_share_wm_prev, indexes=[queue], + banner='Fill queue shared, srcport {}, dstport {}, base is previous step'.format(src_port_id, dst_port_id)) + + assert((expected_wm - margin) * cell_size <= q_wm_res[queue] <= (expected_wm + margin) * cell_size) pkts_num = pkts_inc + if 'cisco-8000' in asic_type: + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) + fill_leakout_plus_one(self, src_port_id, dst_port_id, pkt, queue, asic_type) + pkts_total += pkts_num + pkts_num = pkts_total - 1 + # overflow the shared pool send_packet(self, src_port_id, pkt, pkts_num) + + if 'cisco-8000' in asic_type: + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) + time.sleep(8) q_wm_res, pg_shared_wm_res, pg_headroom_wm_res = sai_thrift_read_port_watermarks(self.client, port_list[dst_port_id]) + pg_cntrs = sai_thrift_read_pg_counters(self.client, port_list[src_port_id]) + print >> sys.stderr, "Received packets: %d" % (pg_cntrs[queue] - pg_cntrs_base[queue]) print >> sys.stderr, "exceeded pkts num sent: %d, actual value: %d, lower bound: %d, upper bound: %d" % (pkts_num, q_wm_res[queue], expected_wm * cell_size, (expected_wm + margin) * cell_size) + + ptf_cnt_prev, _ = show_counter('PtfCnt', self, asic_type, self.test_params.get('test_port_ids', None), + base=ptf_cnt_prev, banner='Overflow queue shared, base is previous step') + + show_counter('PortCnt', self, asic_type, [src_port_id, dst_port_id], + base=port_cnt_prev, + indexes=[queue + 2] + ingress_counters + egress_counters + + [TRANSMITTED_PKTS, RECEIVED_PKTS, RECEIVED_NON_UC_PKTS, TRANSMITTED_NON_UC_PKTS, EGRESS_PORT_QLEN], + banner='Overflow queue shared, srcport {}, dstport {}, base is previous step'.format(src_port_id, dst_port_id)) + + show_counter('PgCnt', self, asic_type, [src_port_id, dst_port_id], + current=[pg_cntrs, sai_thrift_read_pg_counters(self.client, port_list[dst_port_id])], + base=pg_cnt_prev, indexes=[queue], + banner='Overflow queue shared, srcport {}, dstport {}, base is previous step'.format(src_port_id, dst_port_id)) + + src_port_wm = sai_thrift_read_port_watermarks(self.client, port_list[src_port_id]) + + show_counter('PgShareWm', self, asic_type, [src_port_id, dst_port_id], + current=[src_port_wm[1], pg_shared_wm_res], base=pg_share_wm_prev, indexes=[queue], + banner='Overflow queue shared, srcport {}, dstport {}, base is previous step'.format(src_port_id, dst_port_id)) + + show_counter('PgHeadroomWm', self, asic_type, [src_port_id, dst_port_id], + current=[src_port_wm[2], pg_headroom_wm_res], base=pg_headroom_wm_prev, indexes=[queue], + banner='Overflow queue shared, srcport {}, dstport {}, base is previous step'.format(src_port_id, dst_port_id)) + + show_counter('QueShareWm', self, asic_type, [src_port_id, dst_port_id], + current=[src_port_wm[0], q_wm_res], base=que_share_wm_prev, indexes=[queue], + banner='Overflow queue shared, srcport {}, dstport {}, base is previous step'.format(src_port_id, dst_port_id)) + assert(fragment < cell_occupancy) - assert(expected_wm * cell_size <= q_wm_res[queue]) - assert(q_wm_res[queue] <= (expected_wm + margin) * cell_size) + assert(expected_wm * cell_size <= q_wm_res[queue] <= (expected_wm + margin) * cell_size) finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) # TODO: buffer pool roid should be obtained via rpc calls @@ -2172,6 +3713,7 @@ class BufferPoolWatermarkTest(sai_base_test.ThriftInterfaceDataPlane): def runTest(self): time.sleep(5) switch_init(self.client) + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), silent=True) # Parse input parameters dscp = int(self.test_params['dscp']) @@ -2198,30 +3740,46 @@ def runTest(self): buf_pool_roid=int(self.test_params['buf_pool_roid'], 0) print >> sys.stderr, "buf_pool_roid: 0x%lx" % (buf_pool_roid) + buffer_pool_wm_base = 0 + if 'cisco-8000' in asic_type: + # Some small amount of memory is always occupied + buffer_pool_wm_base = sai_thrift_read_buffer_pool_watermark(self.client, buf_pool_roid) + # Prepare TCP packet data tos = dscp << 2 tos |= ecn ttl = 64 - default_packet_length = 64 - pkt = simple_tcp_packet(pktlen=default_packet_length, + + if 'packet_size' in self.test_params.keys(): + packet_length = int(self.test_params['packet_size']) + else: + packet_length = 64 + + cell_occupancy = (packet_length + cell_size - 1) / cell_size + pkt = simple_tcp_packet(pktlen=packet_length, eth_dst=router_mac if router_mac != '' else dst_port_mac, eth_src=src_port_mac, ip_src=src_port_ip, ip_dst=dst_port_ip, ip_tos=tos, ip_ttl=ttl) + # Add slight tolerance in threshold characterization to consider # the case that cpu puts packets in the egress queue after we pause the egress # or the leak out is simply less than expected as we have occasionally observed - upper_bound_margin = 2 - # On TD2, we found the watermark value is always short of the expected - # value by 1 - lower_bound_margin = 1 + upper_bound_margin = 2 * cell_occupancy + if 'cisco-8000' in asic_type: + lower_bound_margin = 2 * cell_occupancy + else: + # On TD2, we found the watermark value is always short of the expected + # value by 1 + lower_bound_margin = 1 + # On TH2 using scheduler-based TX enable, we find the Q min being inflated # to have 0x10 = 16 cells. This effect is captured in lossy traffic ingress # buffer pool test and lossy traffic egress buffer pool test to illusively # have extra capacity in the buffer pool space - extra_cap_margin = 8 + extra_cap_margin = 8 * cell_occupancy # Adjust the methodology to enable TX for each incremental watermark value test # To this end, send the total # of packets instead of the incremental amount @@ -2242,7 +3800,7 @@ def runTest(self): send_packet(self, src_port_id, pkt, pkts_num_to_send) sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) time.sleep(8) - buffer_pool_wm = sai_thrift_read_buffer_pool_watermark(self.client, buf_pool_roid) + buffer_pool_wm = sai_thrift_read_buffer_pool_watermark(self.client, buf_pool_roid) - buffer_pool_wm_base print >> sys.stderr, "Init pkts num sent: %d, min: %d, actual watermark value to start: %d" % ((pkts_num_leak_out + pkts_num_fill_min), pkts_num_fill_min, buffer_pool_wm) if pkts_num_fill_min: assert(buffer_pool_wm <= upper_bound_margin * cell_size) @@ -2257,22 +3815,31 @@ def runTest(self): # send packet batch of fixed packet numbers to fill shared # first round sends only 1 packet expected_wm = 0 - total_shared = pkts_num_fill_shared - pkts_num_fill_min - pkts_inc = total_shared >> 2 - pkts_num = 1 + upper_bound_margin + total_shared = (pkts_num_fill_shared - pkts_num_fill_min) * cell_occupancy + pkts_inc = (total_shared >> 2) // cell_occupancy + if 'cisco-8000' in asic_type: + # No additional packet margin needed while sending, + # but small margin still needed during boundary checks below + pkts_num = 1 + else: + pkts_num = (1 + upper_bound_margin) // cell_occupancy while (expected_wm < total_shared): - expected_wm += pkts_num + expected_wm += pkts_num * cell_occupancy if (expected_wm > total_shared): - pkts_num -= (expected_wm - total_shared) + pkts_num -= (expected_wm - total_shared + cell_occupancy - 1) // cell_occupancy expected_wm = total_shared print >> sys.stderr, "pkts num to send: %d, total pkts: %d, shared: %d" % (pkts_num, expected_wm, total_shared) sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) pkts_num_to_send += pkts_num - send_packet(self, src_port_id, pkt, pkts_num_to_send) + if 'cisco-8000' in asic_type: + fill_leakout_plus_one(self, src_port_id, dst_port_id, pkt, queue, asic_type) + send_packet(self, src_port_id, pkt, pkts_num_to_send - 1) + else: + send_packet(self, src_port_id, pkt, pkts_num_to_send) sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) time.sleep(8) - buffer_pool_wm = sai_thrift_read_buffer_pool_watermark(self.client, buf_pool_roid) + buffer_pool_wm = sai_thrift_read_buffer_pool_watermark(self.client, buf_pool_roid) - buffer_pool_wm_base print >> sys.stderr, "lower bound (-%d): %d, actual value: %d, upper bound (+%d): %d" % (lower_bound_margin, (expected_wm - lower_bound_margin)* cell_size, buffer_pool_wm, upper_bound_margin, (expected_wm + upper_bound_margin) * cell_size) assert(buffer_pool_wm <= (expected_wm + upper_bound_margin) * cell_size) assert((expected_wm - lower_bound_margin)* cell_size <= buffer_pool_wm) @@ -2282,16 +3849,22 @@ def runTest(self): # overflow the shared pool sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) pkts_num_to_send += pkts_num - send_packet(self, src_port_id, pkt, pkts_num_to_send) + if 'cisco-8000' in asic_type: + fill_leakout_plus_one(self, src_port_id, dst_port_id, pkt, queue, asic_type) + send_packet(self, src_port_id, pkt, pkts_num_to_send - 1) + else: + send_packet(self, src_port_id, pkt, pkts_num_to_send) + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) time.sleep(8) - buffer_pool_wm = sai_thrift_read_buffer_pool_watermark(self.client, buf_pool_roid) + buffer_pool_wm = sai_thrift_read_buffer_pool_watermark(self.client, buf_pool_roid) - buffer_pool_wm_base print >> sys.stderr, "exceeded pkts num sent: %d, expected watermark: %d, actual value: %d" % (pkts_num, (expected_wm * cell_size), buffer_pool_wm) assert(expected_wm == total_shared) assert((expected_wm - lower_bound_margin)* cell_size <= buffer_pool_wm) assert(buffer_pool_wm <= (expected_wm + extra_cap_margin) * cell_size) finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', None), bases=stats) sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) @@ -2335,3 +3908,133 @@ def runTest(self): packet_count, src_port_id ) send_packet(self, src_port_id, pkt, packet_count) + + +# PFC test on tunnel traffic (dualtor specific test case) +class PCBBPFCTest(sai_base_test.ThriftInterfaceDataPlane): + + def _build_testing_ipinip_pkt(self, active_tor_mac, standby_tor_mac, active_tor_ip, standby_tor_ip, inner_dscp, outer_dscp, dst_ip, ecn=1, packet_size=64): + pkt = simple_tcp_packet( + pktlen=packet_size, + eth_dst=standby_tor_mac, + ip_src='1.1.1.1', + ip_dst=dst_ip, + ip_dscp=inner_dscp, + ip_ecn=ecn, + ip_ttl=64 + ) + # The pktlen is ignored if inner_frame is not None + ipinip_packet = simple_ipv4ip_packet( + eth_dst=active_tor_mac, + eth_src=standby_tor_mac, + ip_src=standby_tor_ip, + ip_dst=active_tor_ip, + ip_dscp=outer_dscp, + ip_ecn=ecn, + inner_frame=pkt[scapy.IP] + ) + return ipinip_packet + + def _build_testing_pkt(self, active_tor_mac, dscp, dst_ip, ecn=1, packet_size=64): + pkt = simple_tcp_packet( + pktlen=packet_size, + eth_dst=active_tor_mac, + ip_src='1.1.1.1', + ip_dst=dst_ip, + ip_dscp=dscp, + ip_ecn=ecn, + ip_ttl=64 + ) + return pkt + + def runTest(self): + """ + This test case is to verify PFC for tunnel traffic. + Traffic is ingressed from IPinIP tunnel(LAG port), and then being decaped at active tor, and then egress to server. + Tx is disabled on the egress port to trigger PFC pause. + """ + switch_init(self.client) + + # Parse input parameters + active_tor_mac = self.test_params['active_tor_mac'] + active_tor_ip = self.test_params['active_tor_ip'] + standby_tor_mac = self.test_params['standby_tor_mac'] + standby_tor_ip = self.test_params['standby_tor_ip'] + src_port_id = self.test_params['src_port_id'] + dst_port_id = self.test_params['dst_port_id'] + dst_port_ip = self.test_params['dst_port_ip'] + + stats = show_stats('just collect base data', self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', [src_port_id, dst_port_id]), silent=True) + + inner_dscp = int(self.test_params['dscp']) + tunnel_traffic_test = False + if 'outer_dscp' in self.test_params: + outer_dscp = int(self.test_params['outer_dscp']) + tunnel_traffic_test = True + ecn = int(self.test_params['ecn']) + pkts_num_trig_pfc = int(self.test_params['pkts_num_trig_pfc']) + # The pfc counter index starts from index 2 in sai_thrift_read_port_counters + pg = int(self.test_params['pg']) + 2 + + asic_type = self.test_params['sonic_asic_type'] + if 'packet_size' in list(self.test_params.keys()): + packet_size = int(self.test_params['packet_size']) + else: + packet_size = 64 + if 'pkts_num_margin' in list(self.test_params.keys()): + pkts_num_margin = int(self.test_params['pkts_num_margin']) + else: + pkts_num_margin = 2 + + try: + # Disable tx on EGRESS port so that headroom buffer cannot be free + sai_thrift_port_tx_disable(self.client, asic_type, [dst_port_id]) + # Make a snapshot of transmitted packets + tx_counters_base, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + # Make a snapshot of received packets + rx_counters_base, _ = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + if tunnel_traffic_test: + # Build IPinIP packet for testing + pkt = self._build_testing_ipinip_pkt(active_tor_mac=active_tor_mac, + standby_tor_mac=standby_tor_mac, + active_tor_ip=active_tor_ip, + standby_tor_ip=standby_tor_ip, + inner_dscp=inner_dscp, + outer_dscp=outer_dscp, + dst_ip=dst_port_ip, + ecn=ecn, + packet_size=packet_size + ) + else: + # Build regular packet + pkt = self._build_testing_pkt(active_tor_mac=active_tor_mac, + dscp=inner_dscp, + dst_ip=dst_port_ip, + ecn=ecn, + packet_size=packet_size) + + # Send packets short of triggering pfc + send_packet(self, src_port_id, pkt, pkts_num_trig_pfc) + time.sleep(8) + # Read TX_OK again to calculate leaked packet number + tx_counters, _ = sai_thrift_read_port_counters(self.client, port_list[dst_port_id]) + leaked_packet_number = tx_counters[TRANSMITTED_PKTS] - tx_counters_base[TRANSMITTED_PKTS] + # Send packets to compensate the leaked packets + send_packet(self, src_port_id, pkt, leaked_packet_number) + time.sleep(8) + # Read rx counter again. No PFC pause frame should be triggered + rx_counters, _ = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + # Verify no pfc + assert(rx_counters[pg] == rx_counters_base[pg]) + rx_counters_base = rx_counters + # Send some packets to trigger PFC + send_packet(self, src_port_id, pkt, 1 + 2 * pkts_num_margin) + time.sleep(8) + rx_counters, _ = sai_thrift_read_port_counters(self.client, port_list[src_port_id]) + # Verify PFC pause frame is generated on expected PG + assert(rx_counters[pg] > rx_counters_base[pg]) + finally: + show_stats(self.__class__.__name__, self, self.test_params.get('sonic_asic_type', None), self.test_params.get('test_port_ids', [src_port_id, dst_port_id]), bases=stats) + # Enable tx on dest port + sai_thrift_port_tx_enable(self.client, asic_type, [dst_port_id]) + diff --git a/tests/saitests/sai_rpc_caller.py b/tests/saitests/sai_rpc_caller.py new file mode 100644 index 00000000000..322a3860fb4 --- /dev/null +++ b/tests/saitests/sai_rpc_caller.py @@ -0,0 +1,60 @@ +""" + Script to query any SAI variable from syncd-rpc-container. The script supports any + SAI variable that is defined in switch_sai_thrift.sai_headers library. This script + needs to be run in the PTF container using the commandline: + + Usage: + ptf --test-dir ixia_saitests/saitests sai_rpc_caller.RPC_Caller --platform-dir ixia_ptftests/ptftests/ --platform remote -t 'dutport=44;port_map="0@0";server="1.72.33.5";sai_values=["SAI_QUEUE_STAT_PACKETS","SAI_QUEUE_STAT_WRED_ECN_MARKED_PACKETS"];clear_only=False' + To clear all sai counters: + ptf --test-dir ixia_saitests/saitests sai_rpc_caller.RPC_Caller --platform-dir ixia_ptftests/ptftests/ --platform remote -t 'dutport=44;port_map="0@0";server="1.72.33.5";sai_values=[];clear_only=True' +""" +import sai_base_test +from switch import * +from switch_sai_thrift.sai_headers import * +import ptf.testutils as testutils + +class RPC_Caller(sai_base_test.ThriftInterfaceDataPlane): + """ + + To check for the queue ECN marked packets counts. + port's OID and tc_class are required. + + port: port's OID value + tc_class : 0-7 TC class value + sai_values : The actual sai values to query for. + """ + + def runTest(self): + self.test_params = testutils.test_params_get() + # Parse input parameters + port = "Ethernet" + str(self.test_params['dutport']) + stats = self.test_params['sai_values'] + + clear_only = self.test_params['clear_only'] + switch_init(self.client) + if clear_only: + sai_thrift_clear_all_counters(self.client) + return + + numeric_stats = [] + for stat in stats: + numeric_stats.append(globals()[stat]) + + port_oid = self.client.sai_thrift_get_port_id_by_front_port(port) + attrs = self.client.sai_thrift_get_port_attribute(port_oid) + queue_stats = [x.value for x in attrs.attr_list if x.id == SAI_PORT_ATTR_QOS_QUEUE_LIST] + queue_ids = queue_stats[0].objlist.object_id_list + + thrift_results=[] + queue_counters_results=[] + + queue_counters_results = [] + tc_count = 0 + for queue in queue_ids: + if tc_count <= 7: + thrift_results = self.client.sai_thrift_get_queue_stats(queue,numeric_stats,len(numeric_stats)) + queue_counters_results.append(thrift_results) + tc_count += 1 + + print (queue_counters_results) + return (queue_counters_results) diff --git a/tests/saitests/switch.py b/tests/saitests/switch.py index cab483594df..85973b08238 100644 --- a/tests/saitests/switch.py +++ b/tests/saitests/switch.py @@ -74,12 +74,6 @@ def switch_init(client): else: print "unknown switch attribute" - # TOFIX in brcm sai: This causes the following error on td2 (a7050-qx-32s) - # ERR syncd: brcm_sai_set_switch_attribute:842 updating switch mac addr failed with error -2. - attr_value = sai_thrift_attribute_value_t(mac='00:77:66:55:44:33') - attr = sai_thrift_attribute_t(id=SAI_SWITCH_ATTR_SRC_MAC_ADDRESS, value=attr_value) - client.sai_thrift_set_switch_attribute(attr) - # wait till the port are up time.sleep(10) @@ -665,6 +659,10 @@ def sai_thrift_read_port_counters(client,port): port_cnt_ids.append(SAI_PORT_STAT_IF_OUT_UCAST_PKTS) port_cnt_ids.append(SAI_PORT_STAT_IN_DROPPED_PKTS) port_cnt_ids.append(SAI_PORT_STAT_OUT_DROPPED_PKTS) + port_cnt_ids.append(SAI_PORT_STAT_IF_IN_UCAST_PKTS) + port_cnt_ids.append(SAI_PORT_STAT_IF_IN_NON_UCAST_PKTS) + port_cnt_ids.append(SAI_PORT_STAT_IF_OUT_NON_UCAST_PKTS) + port_cnt_ids.append(SAI_PORT_STAT_IF_OUT_QLEN) counters_results=[] counters_results = client.sai_thrift_get_port_stats(port,port_cnt_ids,len(port_cnt_ids)) @@ -769,10 +767,8 @@ def sai_thrift_read_pg_drop_counters(client, port_id): return pg_cntrs -def sai_thrift_read_pg_shared_watermark(client, port_id): - pg_cntr_ids=[ - SAI_INGRESS_PRIORITY_GROUP_STAT_SHARED_WATERMARK_BYTES - ] +def sai_thrift_read_pg_shared_watermark(client, asic_type, port_id): + pg_cntr_ids = [SAI_INGRESS_PRIORITY_GROUP_STAT_SHARED_WATERMARK_BYTES] # fetch pg ids under port id pg_ids = [] @@ -813,6 +809,24 @@ def sai_thrift_read_headroom_pool_watermark(client, buffer_pool_id): return None return wm_vals[0] +def sai_thrift_read_queue_occupancy(client, port_id): + queue_list=[] + port_attr_list = client.sai_thrift_get_port_attribute(port_list[port_id]) + attr_list = port_attr_list.attr_list + for attribute in attr_list: + if attribute.id == SAI_PORT_ATTR_QOS_QUEUE_LIST: + for queue_id in attribute.value.objlist.object_id_list: + queue_list.append(queue_id) + cnt_ids=[SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES] + queue_counters_results=[] + queue1=0 + for queue in queue_list: + if queue1 <= 7: + thrift_results=client.sai_thrift_get_queue_stats(queue,cnt_ids,len(cnt_ids)) + queue_counters_results.append(thrift_results[0]) + queue1+=1 + return queue_counters_results + def sai_thrift_create_vlan_member(client, vlan_id, port_id, tagging_mode): vlan_member_attr_list = [] attribute_value = sai_thrift_attribute_value_t(s32=vlan_id) diff --git a/tests/saitests/texttable.py b/tests/saitests/texttable.py new file mode 100644 index 00000000000..fa5d03694ad --- /dev/null +++ b/tests/saitests/texttable.py @@ -0,0 +1,95 @@ +#!python + +class TextTable(): + + def __init__(self, field_names = None): + self.widths = [] + self.table = [] + self._field_names = None + if field_names: + self._field_names = field_names + for name in field_names: + self.widths.append(len(name)) + + def get_field_names(self): + return self._field_names + + def set_field_names(self, field_names): + if self._field_names: + return + if not field_names: + return + self._field_names = field_names + for name in field_names: + self.widths.append(len(name)) + + field_names = property(get_field_names, set_field_names) + + def add_row(self, row): + if not row: + return False + if not self._field_names: + return False + if len(row) != len(self.widths): + return False + self.table.append(row) + for index, item in enumerate(row): + if self.widths[index] < len(str(item)): + self.widths[index] = len(str(item)) + + def __str__(self): + if not self._field_names: + return '' + + line = '+' + '+'.join(['-' * (width + 2) for width in self.widths]) + '+' + + # field names + buf = line + buf += '\n| ' + ' | '.join([str(name).center(self.widths[index]) for index, name in enumerate(self._field_names)]) + ' |' + buf += '\n' + line + + # table + for row in self.table: + buf += '\n| ' + ' | '.join([str(item).center(self.widths[index]) for index, item in enumerate(row)]) + ' |' + + # last line + if self.table: + buf += '\n' + line + + return buf + + +if __name__ == '__main__': + table1 = TextTable(['f1', 'f22', 'f333', 'f4444', 'f55555', 'f666666', 'f7777777', 'f88888888', 'f999999999']) + print('table1 only fields') + print(table1) + + table1.add_row(['v999999999', 'v88888888', 'v7777777', 'v666666', 'v55555', 'v4444', 'v333', 'v22', 'v1']) + print('table1 add row') + print(table1) + + + table2 = TextTable() + print('table2 empty') + print(table2) + + table2.field_names = ['1f', '2ff', '3fff', '4ffff', '5fffff'] + print('table2 set fields') + print(table2) + + table2.add_row(['1', '2', '3', '4', '5']) + print('table2 add row') + print(table2) + + + table3 = TextTable(['field'] + ['f1', 'f22', 'f333', 'f4444', 'f55555', 'f666666', 'f7777777', 'f88888888', 'f999999999']) + print('table3 only fields') + print(table3) + + table3.add_row([''] + ['v999999999', 'v88888888', 'v7777777', 'v666666', 'v55555', 'v4444', 'v333', 'v22', 'v1']) + print('table3 add row') + print(table3) + + table3.add_row(['row2'] + ['v999999999', 'v88888888', 'v7777777', 'v666666', 'v55555', 'v4444', 'v333', 'v22', 'v1']) + print('table3 more rows') + print(table3) diff --git a/tests/scripts/arp_responder.py b/tests/scripts/arp_responder.py index e7281cee09c..8888ed06ee6 100644 --- a/tests/scripts/arp_responder.py +++ b/tests/scripts/arp_responder.py @@ -99,11 +99,26 @@ def __init__(self, ip_sets): def action(self, interface): data = interface.recv() - - if len(data) <= self.ARP_PKT_LEN: - return self.reply_to_arp(data, interface) - elif len(data) <= self.NDP_PKT_LEN: - return self.reply_to_ndp(data, interface) + eth_type = struct.unpack('!H', data[12:14])[0] + # Retrieve the correct ethertype if the packet is VLAN tagged + if eth_type == 0x8100: # 802.1Q, VLAN tagged + eth_type = struct.unpack('!H', data[16:18])[0] + + if eth_type == 0x0806: # ARP + if len(data) <= self.ARP_PKT_LEN: + return self.reply_to_arp(data, interface) + else: + # Handle the case where data length is greater than ARP packet length + pass + elif eth_type == 0x86DD: # IPv6 + if len(data) <= self.NDP_PKT_LEN: + return self.reply_to_ndp(data, interface) + else: + # Handle the case where data length is greater than NDP packet length + pass + else: + # Handle other Ethernet types + pass def reply_to_arp(self, data, interface): remote_mac, remote_ip, request_ip, op_type, vlan_id = self.extract_arp_info(data) diff --git a/tests/scripts/garp_service.py b/tests/scripts/garp_service.py index 3a52fd6730d..3d77bdf13a6 100644 --- a/tests/scripts/garp_service.py +++ b/tests/scripts/garp_service.py @@ -5,7 +5,7 @@ import time from ipaddress import ip_interface -from scapy.all import conf +from scapy.all import conf, Ether, IPv6, ICMPv6ND_NA, ICMPv6NDOptSrcLLAddr from scapy.arch import get_if_hwaddr class GarpService: @@ -28,7 +28,11 @@ def gen_garp_packets(self): intf_name = 'eth{}'.format(port) source_mac = get_if_hwaddr(intf_name) source_ip_str = config['target_ip'] + source_ipv6_str = config['target_ipv6'] + dut_mac = config['dut_mac'] + dst_ipv6 = config['dst_ipv6'] source_ip = str(ip_interface(source_ip_str).ip) + source_ipv6 = str(ip_interface(source_ipv6_str).ip) # PTF uses Scapy to create packets, so this is ok to create # packets through PTF even though we are using Scapy to send the packets @@ -37,7 +41,13 @@ def gen_garp_packets(self): ip_snd=source_ip, ip_tgt=source_ip, # Re-use server IP as target IP, since it is within the subnet of the VLAN IP arp_op=2) - self.packets[intf_name] = garp_pkt + + na_pkt = Ether(src=source_mac, dst=dut_mac) \ + / IPv6(dst=dst_ipv6, src=source_ipv6) \ + / ICMPv6ND_NA(tgt=source_ipv6, S=1, R=0, O=0) \ + / ICMPv6NDOptSrcLLAddr(type=2, lladdr=source_mac) + + self.packets[intf_name] = [garp_pkt, na_pkt] def send_garp_packets(self): ''' @@ -54,8 +64,9 @@ def send_garp_packets(self): try: while True: - for socket, packet in sockets.items(): - socket.send(packet) + for socket, packet_list in sockets.items(): + for packet in packet_list: + socket.send(packet) if self.interval is None: break diff --git a/tests/scripts/getbuild.py b/tests/scripts/getbuild.py index 292246a0392..03a71ec1e29 100755 --- a/tests/scripts/getbuild.py +++ b/tests/scripts/getbuild.py @@ -4,11 +4,16 @@ import time import sys import argparse -from urllib.request import urlopen, urlretrieve +from urllib.request import urlopen, urlretrieve, Request, build_opener, install_opener +import base64 _start_time = None _last_time = None artifact_size = 0 +NOT_FOUND_BUILD_ID = -999 +MAX_DOWNLOAD_TIMES = 3 + + def reporthook(count, block_size, total_size): global _start_time, _last_time, artifact_size cur_time = int(time.time()) @@ -31,12 +36,13 @@ def reporthook(count, block_size, total_size): percent = int(count * block_size * 100 / total_size) time_left = (total_size - progress_size) / speed / 1024 sys.stdout.write("\r...%d%%, %d(%d) MB, %d KB/s, %d seconds left..." % - (percent, progress_size / (1024 * 1024), total_size / (1024 * 1024), speed, time_left)) + (percent, progress_size / (1024 * 1024), total_size / (1024 * 1024), speed, time_left)) else: sys.stdout.write("\r...%d MB, %d KB/s, ..." % - (progress_size / (1024 * 1024), speed)) + (progress_size / (1024 * 1024), speed)) sys.stdout.flush() + def validate_url_or_abort(url): # Attempt to retrieve HTTP response code try: @@ -55,12 +61,19 @@ def validate_url_or_abort(url): print("Image file not found on remote machine. Aborting...") sys.exit(1) -def get_download_url(buildid, artifact_name): + +def get_download_url(buildid, artifact_name, url_prefix, access_token): """get download url""" - artifact_url = "https://dev.azure.com/mssonic/build/_apis/build/builds/{}/artifacts?artifactName={}&api-version=5.0".format(buildid, artifact_name) + artifact_req = Request("https://dev.azure.com/{}/_apis/build/builds/{}/artifacts?artifactName={}&api-version=5.0" + .format(url_prefix, buildid, artifact_name)) + + # If access token is not empty, set headers + if access_token: + artifact_req.add_header('Authorization', + 'Basic {}'.format(base64.b64encode(access_token.encode('utf-8')).decode('utf-8'))) - resp = urlopen(artifact_url) + resp = urlopen(artifact_req) j = json.loads(resp.read().decode('utf-8')) @@ -70,12 +83,17 @@ def get_download_url(buildid, artifact_name): return (download_url, artifact_size) -def download_artifacts(url, content_type, platform, buildid): +def download_artifacts(url, content_type, platform, buildid, num_asic, access_token): """find latest successful build id for a branch""" if content_type == 'image': if platform == 'vs': - filename = 'sonic-vs.img.gz' + if num_asic == 6: + filename = 'sonic-6asic-vs.img.gz' + elif num_asic == 4: + filename = 'sonic-4asic-vs.img.gz' + else: + filename = 'sonic-vs.img.gz' else: filename = "sonic-{}.bin".format(platform) @@ -85,27 +103,53 @@ def download_artifacts(url, content_type, platform, buildid): filename = "{}.zip".format(platform) if url.startswith('http://') or url.startswith('https://'): - print('Downloading {} from build {}...'.format(filename, buildid)) validate_url_or_abort(url) - try: - urlretrieve(url, filename, reporthook) - except Exception as e: - print("Download error", e) - sys.exit(1) - -def find_latest_build_id(branch): + download_times = 0 + while download_times < MAX_DOWNLOAD_TIMES: + try: + print('Downloading {} from build {}...'.format(filename, buildid)) + download_times += 1 + # If access token is not empty, set headers + if access_token: + opener = build_opener() + opener.addheaders = [ + ('Authorization', + 'Basic {}'.format(base64.b64encode(access_token.encode('utf-8')).decode('utf-8')))] + install_opener(opener) + urlretrieve(url, filename, reporthook) + print('\nDownload finished!') + break + except Exception as e: + print("Download error", e) + if download_times < MAX_DOWNLOAD_TIMES: + print('Download times: {}, sleep: {} seconds before retry.'.format(download_times, + 30 * download_times)) + time.sleep(30 * download_times) + continue + else: + sys.exit(1) + + +def find_latest_build_id(branch, success_flag="succeeded"): """find latest successful build id for a branch""" - builds_url = "https://dev.azure.com/mssonic/build/_apis/build/builds?definitions=1&branchName=refs/heads/{}&resultFilter=succeeded&statusFilter=completed&api-version=6.0".format(branch) + builds_url = "https://dev.azure.com/mssonic/build/_apis/build/builds?definitions=1&branchName=refs/heads/{}" \ + "&resultFilter={}&statusFilter=completed&api-version=6.0".format(branch, success_flag) resp = urlopen(builds_url) j = json.loads(resp.read().decode('utf-8')) - latest_build_id = int(j['value'][0]['id']) + value = j.get('value', []) + + if len(value) > 0: + latest_build_id = int(value[0]['id']) + else: + latest_build_id = NOT_FOUND_BUILD_ID return latest_build_id + def main(): global artifact_size @@ -113,23 +157,37 @@ def main(): parser.add_argument('--buildid', metavar='buildid', type=int, help='build id') parser.add_argument('--branch', metavar='branch', type=str, help='branch name') parser.add_argument('--platform', metavar='platform', type=str, - choices=['broadcom', 'mellanox', 'vs'], - help='platform to download') + choices=['broadcom', 'mellanox', 'vs'], + help='platform to download') parser.add_argument('--content', metavar='content', type=str, - choices=['all', 'image'], default='image', - help='download content type [all|image(default)]') + choices=['all', 'image'], default='image', + help='download content type [all|image(default)]') + parser.add_argument('--num_asic', metavar='num_asic', type=int, + default=1, + help='Specifiy number of asics') + parser.add_argument('--url_prefix', metavar='url_prefix', type=str, default='mssonic/build', help='url prefix') + parser.add_argument('--access_token', metavar='access_token', type=str, default='', nargs='?', const='', required=False, help='access token') + args = parser.parse_args() if args.buildid is None: - buildid = find_latest_build_id(args.branch) + buildid_succ = find_latest_build_id(args.branch, "succeeded") + buildid_partial = find_latest_build_id(args.branch, "partiallySucceeded") + print('Succeeded buildId:{}, PartiallySucceeded buildId {}'.format(buildid_succ, buildid_partial)) + if buildid_succ == NOT_FOUND_BUILD_ID and buildid_partial == NOT_FOUND_BUILD_ID: + raise Exception("Can't find 'Succeeded' or 'partiallySucceeded' build result.") + buildid = max(buildid_succ, buildid_partial) else: buildid = int(args.buildid) artifact_name = "sonic-buildimage.{}".format(args.platform) - (dl_url, artifact_size) = get_download_url(buildid, artifact_name) + (dl_url, artifact_size) = get_download_url(buildid, artifact_name, + url_prefix=args.url_prefix, + access_token=args.access_token) + + download_artifacts(dl_url, args.content, args.platform, buildid, args.num_asic, access_token=args.access_token) - download_artifacts(dl_url, args.content, args.platform, buildid) if __name__ == '__main__': main() diff --git a/tests/scripts/icmp_responder.py b/tests/scripts/icmp_responder.py index d68f369f5ee..2266b5e66b0 100644 --- a/tests/scripts/icmp_responder.py +++ b/tests/scripts/icmp_responder.py @@ -1,11 +1,25 @@ import argparse +import logging +import sys +from io import BytesIO +from concurrent.futures.thread import ThreadPoolExecutor from scapy.all import conf, Ether, ICMP, IP from scapy.arch import get_if_hwaddr from scapy.data import ETH_P_IP from select import select +root = logging.getLogger() +root.setLevel(logging.DEBUG) + +handler = logging.StreamHandler(sys.stdout) +handler.setLevel(logging.DEBUG) +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +handler.setFormatter(formatter) +root.addHandler(handler) + + def respond_to_icmp_request(socket, request, dst_mac=None): """Respond to ICMP request.""" reply = request.copy() @@ -29,13 +43,33 @@ class ICMPSniffer(object): TYPE_ECHO_REQUEST = 8 - def __init__(self, ifaces, request_handler=None, dst_mac=None): + @staticmethod + def is_icmp_packet_checksum_valid(packet): + ip_chksum = packet[IP].chksum + icmp_chksum = packet[ICMP].chksum + packet[IP].chksum = None + packet[ICMP].chksum = None + rebuild_packet = Ether(packet.build()) + return rebuild_packet[IP].chksum == ip_chksum and rebuild_packet[ICMP].chksum == icmp_chksum + + @staticmethod + def dump_scapy_packet_show_output(packet): + """Dump packet show output to string.""" + _stdout, sys.stdout = sys.stdout, BytesIO() + try: + packet.show() + return sys.stdout.getvalue() + finally: + sys.stdout = _stdout + + def __init__(self, ifaces, request_handler=None, dst_mac=None, validate_checksum=False): """ Init ICMP sniffer. @param ifaces: interfaces to listen for ICMP reqest @param request_handler: handler function that will be called when receives ICMP request + @param validate_checksum: validate checksum for received ICMP request before sending reply """ self.sniff_sockets = [] self.iface_hwaddr = {} @@ -44,6 +78,7 @@ def __init__(self, ifaces, request_handler=None, dst_mac=None): self.iface_hwaddr[iface] = get_if_hwaddr(iface) self.request_handler = request_handler self.dst_mac = dst_mac + self.validate_checksum = validate_checksum def __call__(self): try: @@ -53,7 +88,16 @@ def __call__(self): packet = s.recv() if packet is not None: if ICMP in packet and packet[ICMP].type == self.TYPE_ECHO_REQUEST and self.request_handler: - self.request_handler(s, packet, self.dst_mac) + if self.validate_checksum: + if ICMPSniffer.is_icmp_packet_checksum_valid(packet): + self.request_handler(s, packet, self.dst_mac) + else: + logging.error( + "Receive ICMP echo message with invalid checksum:\n%s\n", + ICMPSniffer.dump_scapy_packet_show_output(packet) + ) + else: + self.request_handler(s, packet, self.dst_mac) finally: for s in self.sniff_sockets: s.close() @@ -63,9 +107,19 @@ def __call__(self): parser = argparse.ArgumentParser(description="ICMP responder") parser.add_argument("--intf", "-i", dest="ifaces", required=True, action="append", help="interface to listen for ICMP request") parser.add_argument("--dst_mac", "-m", dest="dst_mac", required=False, action="store", help="The destination MAC to use for ICMP echo replies") + parser.add_argument("--validate_checksum", "-c", dest="validate_checksum", required=False, default=False, + action="store_true", help="validate received ICMP packet checksum before sending reply") args = parser.parse_args() ifaces = args.ifaces dst_mac = args.dst_mac + validate_checksum = args.validate_checksum + + max_workers = 24 if len(ifaces) > 24 else len(ifaces) + sniffed_ifaces = [[] for _ in range(max_workers)] + for i, iface in enumerate(ifaces): + sniffed_ifaces[i % max_workers].append(iface) - icmp_sniffer = ICMPSniffer(ifaces, request_handler=respond_to_icmp_request, dst_mac=dst_mac) - icmp_sniffer() + with ThreadPoolExecutor(max_workers=max_workers) as executor: + for ifaces in sniffed_ifaces: + icmp_sniffer = ICMPSniffer(ifaces, request_handler=respond_to_icmp_request, dst_mac=dst_mac, validate_checksum=validate_checksum) + executor.submit(icmp_sniffer) diff --git a/tests/scripts/restart_interface.sh b/tests/scripts/restart_interface.sh new file mode 100644 index 00000000000..fb2e35f8aad --- /dev/null +++ b/tests/scripts/restart_interface.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -euo pipefail + +INTF_LIST=$(ls /sys/class/net | grep -E "^eth[0-9]+(\.[0-9]+)?$") + +for INTF in ${INTF_LIST}; do + echo "Restart interface: ${INTF}" + ifconfig "${INTF}" down + ifconfig "${INTF}" up +done diff --git a/tests/sflow/test_sflow.py b/tests/sflow/test_sflow.py index e5cccdcde01..9b9ffdc8e96 100644 --- a/tests/sflow/test_sflow.py +++ b/tests/sflow/test_sflow.py @@ -20,7 +20,7 @@ from netaddr import * pytestmark = [ - pytest.mark.topology('t0') + pytest.mark.topology('t0', 'm0') ] logger = logging.getLogger(__name__) diff --git a/tests/show_techsupport/conftest.py b/tests/show_techsupport/conftest.py new file mode 100644 index 00000000000..8c51e752105 --- /dev/null +++ b/tests/show_techsupport/conftest.py @@ -0,0 +1,27 @@ +import pytest + + +@pytest.fixture(autouse=True) +def ignore_expected_loganalyzer_exceptions(duthosts, loganalyzer): + """ + In Mellanox, when techsupport is taken, it invokes fw dump. + While taking the fw dump, the fw is busy and doesn't respond to other calls. + The access of sfp eeprom happens through firmware and xcvrd gets the DOM fields + every 60 seconds which fails during the fw dump. + This is a temporary issue and this log can be ignored. + Issue link: https://github.com/sonic-net/sonic-buildimage/issues/12621 + The fixture is auto used to all test scripts in this directory. + """ + ignoreRegex = [ + ".*ERR kernel:.*Reg cmd access status failed.*", + ".*ERR kernel:.*Reg cmd access failed.*", + ".*ERR kernel:.*Eeprom query failed.*", + ".*ERR kernel:.*Fails to access.*register MCIA.*", + ".*ERR kernel:.*Fails to read module eeprom.*", + ".*ERR kernel:.*Fails to access.*module eeprom.*", + ".*ERR kernel:.*Fails to get module type.*", + ".*ERR pmon#xcvrd:.*Failed to read sfp.*" + ] + for dut in duthosts: + if loganalyzer and loganalyzer[dut.hostname]: + loganalyzer[dut.hostname].ignore_regex.extend(ignoreRegex) diff --git a/tests/show_techsupport/test_techsupport.py b/tests/show_techsupport/test_techsupport.py index 272c39add84..08d9927edf7 100644 --- a/tests/show_techsupport/test_techsupport.py +++ b/tests/show_techsupport/test_techsupport.py @@ -3,7 +3,7 @@ import pytest import time import logging -import tech_support_cmds as cmds +import tech_support_cmds as cmds from random import randint from tests.common.helpers.assertions import pytest_assert, pytest_require @@ -264,7 +264,11 @@ def execute_command(duthost, since): :param duthost: DUT :param since: since string enterd by user """ - result = duthost.command("show techsupport -r --since={}".format('"' + since + '"'), module_ignore_errors=True) + opt = "-r" if duthost.sonic_release not in ["201811", "201911"] else "" + result = duthost.command( + "show techsupport {} --since={}".format(opt, '"' + since + '"'), + module_ignore_errors=True + ) if result['rc'] != SUCCESS_CODE: pytest.fail('Failed to create techsupport. \nstdout:{}. \nstderr:{}'.format(result['stdout'], result['stderr'])) pytest.tar_stdout = result['stdout'] @@ -294,8 +298,8 @@ def test_techsupport(request, config, duthosts, enum_rand_one_per_hwsku_frontend def add_asic_arg(format_str, cmds_list, asic_num): - """ - Add ASIC specific arg using the supplied string formatter + """ + Add ASIC specific arg using the supplied string formatter New commands are added for each ASIC. In case of a regex paramter, new regex is created for each ASIC. @@ -330,9 +334,9 @@ def add_asic_arg(format_str, cmds_list, asic_num): @pytest.fixture(scope='function') def commands_to_check(duthosts, enum_rand_one_per_hwsku_frontend_hostname): """ - Prepare a list of commands to be expected in the - show techsupport output. All the expected commands are - categorized into groups. + Prepare a list of commands to be expected in the + show techsupport output. All the expected commands are + categorized into groups. For multi ASIC platforms, command strings are generated based on the number of ASICs. @@ -364,9 +368,9 @@ def commands_to_check(duthosts, enum_rand_one_per_hwsku_frontend_hostname): if duthost.facts["asic_type"] == "broadcom": cmds_to_check.update( { - "broadcom_cmd_bcmcmd": + "broadcom_cmd_bcmcmd": add_asic_arg(" -n {}", cmds.broadcom_cmd_bcmcmd, num), - "broadcom_cmd_misc": + "broadcom_cmd_misc": add_asic_arg("{}", cmds.broadcom_cmd_misc, num), } ) @@ -393,8 +397,8 @@ def commands_to_check(duthosts, enum_rand_one_per_hwsku_frontend_hostname): def check_cmds(cmd_group_name, cmd_group_to_check, cmdlist): - """ - Check commands within a group against the command list + """ + Check commands within a group against the command list Returns: list commands not found """ diff --git a/tests/show_techsupport/test_techsupport_no_secret.py b/tests/show_techsupport/test_techsupport_no_secret.py index eab451b8f63..a19bb65f470 100644 --- a/tests/show_techsupport/test_techsupport_no_secret.py +++ b/tests/show_techsupport/test_techsupport_no_secret.py @@ -1,7 +1,7 @@ import pytest import logging from tests.common.helpers.assertions import pytest_assert -from tests.common.utilities import skip_release +from tests.common.utilities import skip_release, delete_running_config logger = logging.getLogger(__name__) @@ -26,6 +26,10 @@ def setup_password(duthosts, enum_rand_one_per_hwsku_hostname, creds_all_duts): duthost.shell("sudo config tacacs default passkey") duthost.shell("sudo config radius default passkey") + # Remove TACACS/Radius keys + delete_keys_json = [{"RADIUS": {}}, {"TACPLUS": {}}] + delete_running_config(delete_keys_json, duthost) + def check_no_result(duthost, command): res = duthost.shell(command) logger.info(command) diff --git a/tests/snappi/pfc/files/helper.py b/tests/snappi/pfc/files/helper.py deleted file mode 100644 index 5673a9f3c40..00000000000 --- a/tests/snappi/pfc/files/helper.py +++ /dev/null @@ -1,445 +0,0 @@ -import time -import logging -from tests.common.helpers.assertions import pytest_assert -from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ - fanout_graph_facts -from tests.common.snappi.snappi_helpers import get_dut_port_id -from tests.common.snappi.common_helpers import pfc_class_enable_vector,\ - get_egress_lossless_buffer_size, stop_pfcwd, disable_packet_aging -from tests.common.snappi.port import select_ports, select_tx_port -from tests.common.snappi.snappi_helpers import wait_for_arp - -logger = logging.getLogger(__name__) - -PAUSE_FLOW_NAME = 'Pause Storm' -TEST_FLOW_NAME = 'Test Flow' -TEST_FLOW_AGGR_RATE_PERCENT = 45 -BG_FLOW_NAME = 'Background Flow' -BG_FLOW_AGGR_RATE_PERCENT = 45 -DATA_PKT_SIZE = 1024 -DATA_FLOW_DURATION_SEC = 2 -DATA_FLOW_DELAY_SEC = 1 -SNAPPI_POLL_DELAY_SEC = 2 -TOLERANCE_THRESHOLD = 0.05 - - -def run_pfc_test(api, - testbed_config, - port_config_list, - conn_data, - fanout_data, - duthost, - dut_port, - global_pause, - pause_prio_list, - test_prio_list, - bg_prio_list, - prio_dscp_map, - test_traffic_pause): - """ - Run a PFC test - Args: - api (obj): snappi session - testbed_config (obj): testbed L1/L2/L3 configuration - port_config_list (list): list of port configuration - conn_data (dict): the dictionary returned by conn_graph_fact. - fanout_data (dict): the dictionary returned by fanout_graph_fact. - duthost (Ansible host instance): device under test - dut_port (str): DUT port to test - global_pause (bool): if pause frame is IEEE 802.3X pause - pause_prio_list (list): priorities to pause for pause frames - test_prio_list (list): priorities of test flows - bg_prio_list (list): priorities of background flows - prio_dscp_map (dict): Priority vs. DSCP map (key = priority). - test_traffic_pause (bool): if test flows are expected to be paused - Returns: - N/A - """ - - pytest_assert(testbed_config is not None, 'Fail to get L2/3 testbed config') - - stop_pfcwd(duthost) - disable_packet_aging(duthost) - - """ Get the ID of the port to test """ - port_id = get_dut_port_id(dut_hostname=duthost.hostname, - dut_port=dut_port, - conn_data=conn_data, - fanout_data=fanout_data) - - pytest_assert(port_id is not None, - 'Fail to get ID for port {}'.format(dut_port)) - - """ Rate percent must be an integer """ - test_flow_rate_percent = int(TEST_FLOW_AGGR_RATE_PERCENT / len(test_prio_list)) - bg_flow_rate_percent = int(BG_FLOW_AGGR_RATE_PERCENT / len(bg_prio_list)) - - """ Generate traffic config """ - __gen_traffic(testbed_config=testbed_config, - port_config_list=port_config_list, - port_id=port_id, - pause_flow_name=PAUSE_FLOW_NAME, - global_pause=global_pause, - pause_prio_list=pause_prio_list, - test_flow_name=TEST_FLOW_NAME, - test_flow_prio_list=test_prio_list, - test_flow_rate_percent=test_flow_rate_percent, - bg_flow_name=BG_FLOW_NAME, - bg_flow_prio_list=bg_prio_list, - bg_flow_rate_percent=bg_flow_rate_percent, - data_flow_dur_sec=DATA_FLOW_DURATION_SEC, - data_flow_delay_sec=DATA_FLOW_DELAY_SEC, - data_pkt_size=DATA_PKT_SIZE, - prio_dscp_map=prio_dscp_map) - - flows = testbed_config.flows - - all_flow_names = [flow.name for flow in flows] - data_flow_names = [flow.name for flow in flows if PAUSE_FLOW_NAME not in flow.name] - - """ Run traffic """ - flow_stats = __run_traffic(api=api, - config=testbed_config, - data_flow_names=data_flow_names, - all_flow_names=all_flow_names, - exp_dur_sec=DATA_FLOW_DURATION_SEC+DATA_FLOW_DELAY_SEC) - - speed_str = testbed_config.layer1[0].speed - speed_gbps = int(speed_str.split('_')[1]) - - """ Verify experiment results """ - __verify_results(rows=flow_stats, - duthost=duthost, - pause_flow_name=PAUSE_FLOW_NAME, - test_flow_name=TEST_FLOW_NAME, - bg_flow_name=BG_FLOW_NAME, - data_flow_dur_sec=DATA_FLOW_DURATION_SEC, - test_flow_rate_percent=test_flow_rate_percent, - bg_flow_rate_percent=bg_flow_rate_percent, - data_pkt_size=DATA_PKT_SIZE, - speed_gbps=speed_gbps, - test_flow_pause=test_traffic_pause, - tolerance=TOLERANCE_THRESHOLD) - - -sec_to_nanosec = lambda x: x * 1e9 - - -def __gen_traffic(testbed_config, - port_config_list, - port_id, - pause_flow_name, - global_pause, - pause_prio_list, - test_flow_name, - test_flow_prio_list, - test_flow_rate_percent, - bg_flow_name, - bg_flow_prio_list, - bg_flow_rate_percent, - data_flow_dur_sec, - data_flow_delay_sec, - data_pkt_size, - prio_dscp_map): - """ - Generate configurations of flows, including test flows, background flows and - pause storm. Test flows and background flows are also known as data flows. - Args: - testbed_config (obj): testbed L1/L2/L3 configuration - port_config_list (list): list of port configuration - port_id (int): ID of DUT port to test - pause_flow_name (str): name of pause storm - global_pause (bool): if pause frame is IEEE 802.3X pause - pause_prio_list (list): priorities to pause for pause frames - test_flow_name (str): name of test flows - test_prio_list (list): priorities of test flows - test_flow_rate_percent (int): rate percentage for each test flow - bg_flow_name (str): name of background flows - bg_prio_list (list): priorities of background flows - bg_flow_rate_percent (int): rate percentage for each background flow - data_flow_dur_sec (int): duration of data flows in second - data_flow_delay_sec (int): start delay of data flows in second - data_pkt_size (int): packet size of data flows in byte - prio_dscp_map (dict): Priority vs. DSCP map (key = priority). - Returns: - flows configurations (list): the list should have configurations of - len(test_flow_prio_list) test flow, len(bg_flow_prio_list) background - flows and a pause storm. - """ - - rx_port_id = port_id - tx_port_id_list, rx_port_id_list = select_ports(port_config_list=port_config_list, - pattern="many to one", - rx_port_id=rx_port_id) - - pytest_assert(len(tx_port_id_list) > 0, "Cannot find any TX ports") - tx_port_id = select_tx_port(tx_port_id_list=tx_port_id_list, - rx_port_id=rx_port_id) - pytest_assert(tx_port_id is not None, "Cannot find a suitable TX port") - - tx_port_config = next((x for x in port_config_list if x.id == tx_port_id), None) - rx_port_config = next((x for x in port_config_list if x.id == rx_port_id), None) - - tx_mac = tx_port_config.mac - if tx_port_config.gateway == rx_port_config.gateway and \ - tx_port_config.prefix_len == rx_port_config.prefix_len: - """ If soruce and destination port are in the same subnet """ - rx_mac = rx_port_config.mac - else: - rx_mac = tx_port_config.gateway_mac - - tx_port_name = testbed_config.ports[tx_port_id].name - rx_port_name = testbed_config.ports[rx_port_id].name - data_flow_delay_nanosec = sec_to_nanosec(data_flow_delay_sec) - - """ Test flows """ - for prio in test_flow_prio_list: - test_flow = testbed_config.flows.flow( - name='{} Prio {}'.format(test_flow_name, prio))[-1] - test_flow.tx_rx.port.tx_name = tx_port_name - test_flow.tx_rx.port.rx_name = rx_port_name - - eth, ipv4 = test_flow.packet.ethernet().ipv4() - eth.src.value = tx_mac - eth.dst.value = rx_mac - eth.pfc_queue.value = prio - - ipv4.src.value = tx_port_config.ip - ipv4.dst.value = rx_port_config.ip - ipv4.priority.choice = ipv4.priority.DSCP - ipv4.priority.dscp.phb.values = prio_dscp_map[prio] - ipv4.priority.dscp.ecn.value = ( - ipv4.priority.dscp.ecn.CAPABLE_TRANSPORT_1) - - test_flow.size.fixed = data_pkt_size - test_flow.rate.percentage = test_flow_rate_percent - test_flow.duration.fixed_seconds.seconds = data_flow_dur_sec - test_flow.duration.fixed_seconds.delay.nanoseconds = int(data_flow_delay_nanosec) - - test_flow.metrics.enable = True - test_flow.metrics.loss = True - - """ Background flows """ - for prio in bg_flow_prio_list: - bg_flow = testbed_config.flows.flow( - name='{} Prio {}'.format(bg_flow_name, prio))[-1] - bg_flow.tx_rx.port.tx_name = tx_port_name - bg_flow.tx_rx.port.rx_name = rx_port_name - - eth, ipv4 = bg_flow.packet.ethernet().ipv4() - eth.src.value = tx_mac - eth.dst.value = rx_mac - eth.pfc_queue.value = prio - - ipv4.src.value = tx_port_config.ip - ipv4.dst.value = rx_port_config.ip - ipv4.priority.choice = ipv4.priority.DSCP - ipv4.priority.dscp.phb.values = prio_dscp_map[prio] - ipv4.priority.dscp.ecn.value = ( - ipv4.priority.dscp.ecn.CAPABLE_TRANSPORT_1) - - bg_flow.size.fixed = data_pkt_size - bg_flow.rate.percentage = bg_flow_rate_percent - bg_flow.duration.fixed_seconds.seconds = data_flow_dur_sec - bg_flow.duration.fixed_seconds.delay.nanoseconds = int(data_flow_delay_nanosec) - - bg_flow.metrics.enable = True - bg_flow.metrics.loss = True - - """ Pause storm """ - pause_flow = testbed_config.flows.flow(name=pause_flow_name)[-1] - pause_flow.tx_rx.port.tx_name = testbed_config.ports[rx_port_id].name - pause_flow.tx_rx.port.rx_name = testbed_config.ports[tx_port_id].name - - if global_pause: - pause_pkt = pause_flow.packet.ethernetpause()[-1] - pause_pkt.src.value = '00:00:fa:ce:fa:ce' - pause_pkt.dst.value = '01:80:C2:00:00:01' - - else: - pause_time = [] - for x in range(8): - if x in pause_prio_list: - pause_time.append(int('ffff', 16)) - else: - pause_time.append(int('0000', 16)) - - vector = pfc_class_enable_vector(pause_prio_list) - pause_pkt = pause_flow.packet.pfcpause()[-1] - pause_pkt.src.value = '00:00:fa:ce:fa:ce' - pause_pkt.dst.value = '01:80:C2:00:00:01' - pause_pkt.class_enable_vector.value = vector - pause_pkt.pause_class_0.value = pause_time[0] - pause_pkt.pause_class_1.value = pause_time[1] - pause_pkt.pause_class_2.value = pause_time[2] - pause_pkt.pause_class_3.value = pause_time[3] - pause_pkt.pause_class_4.value = pause_time[4] - pause_pkt.pause_class_5.value = pause_time[5] - pause_pkt.pause_class_6.value = pause_time[6] - pause_pkt.pause_class_7.value = pause_time[7] - - """ Pause frames are sent from the RX port """ - - speed_str = testbed_config.layer1[0].speed - speed_gbps = int(speed_str.split('_')[1]) - pause_dur = 65535 * 64 * 8.0 / (speed_gbps * 1e9) - pps = int(2 / pause_dur) - - pause_flow.rate.pps = pps - pause_flow.size.fixed = 64 - pause_flow.duration.choice = pause_flow.duration.CONTINUOUS - pause_flow.duration.continuous.delay.nanoseconds = 0 - - pause_flow.metrics.enable = True - pause_flow.metrics.loss = True - - -def __run_traffic(api, - config, - data_flow_names, - all_flow_names, - exp_dur_sec): - - """ - Run traffic and dump per-flow statistics - Args: - api (obj): snappi session - config (obj): experiment config (testbed config + flow config) - data_flow_names (list): list of names of data (test and background) flows - all_flow_names (list): list of names of all the flows - exp_dur_sec (int): experiment duration in second - Returns: - per-flow statistics (list) - """ - - api.set_config(config) - - logger.info('Wait for Arp to Resolve ...') - wait_for_arp(api, max_attempts=10, poll_interval_sec=2) - - logger.info('Starting transmit on all flows ...') - ts = api.transmit_state() - ts.state = ts.START - api.set_transmit_state(ts) - - time.sleep(exp_dur_sec) - - attempts = 0 - max_attempts = 20 - - while attempts < max_attempts: - request = api.metrics_request() - request.flow.flow_names = data_flow_names - rows = api.get_metrics(request).flow_metrics - - """ If all the data flows have stopped """ - transmit_states = [row.transmit for row in rows] - if len(rows) == len(data_flow_names) and\ - list(set(transmit_states)) == ['stopped']: - time.sleep(SNAPPI_POLL_DELAY_SEC) - break - else: - time.sleep(1) - attempts += 1 - - pytest_assert(attempts < max_attempts, - "Flows do not stop in {} seconds".format(max_attempts)) - - """ Dump per-flow statistics """ - request = api.metrics_request() - request.flow.flow_names = all_flow_names - rows = api.get_metrics(request).flow_metrics - logger.info('Stop transmit on all flows ...') - ts = api.transmit_state() - ts.state = ts.STOP - api.set_transmit_state(ts) - - return rows - - -def __verify_results(rows, - duthost, - pause_flow_name, - test_flow_name, - bg_flow_name, - data_flow_dur_sec, - test_flow_rate_percent, - bg_flow_rate_percent, - data_pkt_size, - speed_gbps, - test_flow_pause, - tolerance): - """ - Verify if we get expected experiment results - Args: - rows (list): per-flow statistics - duthost (Ansible host instance): device under test - pause_flow_name (str): name of pause storm - test_flow_name (str): name of test flows - bg_flow_name (str): name of background flows - test_flow_rate_percent (int): rate percentage for each test flow - bg_flow_rate_percent (int): rate percentage for each background flow - data_pkt_size (int): packet size of data flows in byte - speed_gbps (int): link speed in Gbps - test_flow_pause (bool): if test flows are expected to be paused - tolerance (float): maximum allowable deviation - Returns: - N/A - """ - - """ All the pause frames should be dropped """ - pause_flow_row = next(row for row in rows if row.name == pause_flow_name) - tx_frames = pause_flow_row.frames_tx - rx_frames = pause_flow_row.frames_rx - pytest_assert(tx_frames > 0 and rx_frames == 0, - 'All the pause frames should be dropped') - - """ Check background flows """ - for row in rows: - if bg_flow_name not in row.name: - continue - - tx_frames = row.frames_tx - rx_frames = row.frames_rx - - pytest_assert(tx_frames == rx_frames, - '{} should not have any dropped packet'.format(row.name)) - - exp_bg_flow_rx_pkts = bg_flow_rate_percent / 100.0 * speed_gbps \ - * 1e9 * data_flow_dur_sec / 8.0 / data_pkt_size - deviation = (rx_frames - exp_bg_flow_rx_pkts) / float(exp_bg_flow_rx_pkts) - pytest_assert(abs(deviation) < tolerance, - '{} should receive {} packets (actual {})'. - format(row.name, exp_bg_flow_rx_pkts, rx_frames)) - - """ Check test flows """ - for row in rows: - if test_flow_name not in row.name: - continue - - tx_frames = row.frames_tx - rx_frames = row.frames_rx - - if test_flow_pause: - pytest_assert(tx_frames > 0 and rx_frames == 0, - '{} should be paused'.format(row.name)) - else: - pytest_assert(tx_frames == rx_frames, - '{} should not have any dropped packet'.format(row.name)) - - exp_test_flow_rx_pkts = test_flow_rate_percent / 100.0 * speed_gbps \ - * 1e9 * data_flow_dur_sec / 8.0 / data_pkt_size - deviation = (rx_frames - exp_test_flow_rx_pkts) / float(exp_test_flow_rx_pkts) - pytest_assert(abs(deviation) < tolerance, - '{} should receive {} packets (actual {})'. - format(test_flow_name, exp_test_flow_rx_pkts, rx_frames)) - - if test_flow_pause: - """ In-flight TX bytes of test flows should be held by switch buffer """ - tx_frames_total = sum(row.frames_tx for row in rows if test_flow_name in row.name) - tx_bytes_total = tx_frames_total * data_pkt_size - dut_buffer_size = get_egress_lossless_buffer_size(host_ans=duthost) - - pytest_assert(tx_bytes_total < dut_buffer_size, - 'Total TX bytes {} should be smaller than DUT buffer size {}'.\ - format(tx_bytes_total, dut_buffer_size)) \ No newline at end of file diff --git a/tests/snappi/pfcwd/files/helper.py b/tests/snappi/pfcwd/files/helper.py deleted file mode 100644 index beb0b16499c..00000000000 --- a/tests/snappi/pfcwd/files/helper.py +++ /dev/null @@ -1,20 +0,0 @@ -from tests.common.helpers.assertions import pytest_require -from tests.common.broadcom_data import is_broadcom_device - -def skip_pfcwd_test(duthost, trigger_pfcwd): - """ - Skip PFC watchdog tests that may cause fake alerts - - PFC watchdog on Broadcom devices use some approximation techniques to detect - PFC storms, which may cause some fake alerts. Therefore, we skip test cases - whose trigger_pfcwd is False for Broadcom devices. - - Args: - duthost (obj): device to test - trigger_pfcwd (bool): if PFC watchdog is supposed to trigger - - Returns: - N/A - """ - pytest_require(trigger_pfcwd is True or is_broadcom_device(duthost) is False, - 'Skip trigger_pfcwd=False test cases for Broadcom devices') diff --git a/tests/snappi/pfcwd/files/__init__.py b/tests/snappi_tests/__init__.py similarity index 100% rename from tests/snappi/pfcwd/files/__init__.py rename to tests/snappi_tests/__init__.py diff --git a/tests/snappi/conftest.py b/tests/snappi_tests/conftest.py similarity index 95% rename from tests/snappi/conftest.py rename to tests/snappi_tests/conftest.py index d793fc60135..f8e905ff019 100644 --- a/tests/snappi/conftest.py +++ b/tests/snappi_tests/conftest.py @@ -1,6 +1,6 @@ import pytest import random -from tests.common.snappi.common_helpers import enable_packet_aging, start_pfcwd +from tests.common.snappi_tests.common_helpers import enable_packet_aging, start_pfcwd from tests.conftest import generate_priority_lists diff --git a/tests/snappi_tests/ecn/__init__.py b/tests/snappi_tests/ecn/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/snappi/ecn/conftest.py b/tests/snappi_tests/ecn/conftest.py similarity index 72% rename from tests/snappi/ecn/conftest.py rename to tests/snappi_tests/ecn/conftest.py index 9d0187b6832..89a4b1a74d1 100644 --- a/tests/snappi/ecn/conftest.py +++ b/tests/snappi_tests/ecn/conftest.py @@ -1,4 +1,4 @@ -from ecn_args.ecn_args import add_ecn_args +from tests.snappi_tests.ecn.ecn_args.ecn_args import add_ecn_args def pytest_addoption(parser): diff --git a/tests/snappi_tests/ecn/ecn_args/__init__.py b/tests/snappi_tests/ecn/ecn_args/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/snappi/ecn/ecn_args/ecn_args.py b/tests/snappi_tests/ecn/ecn_args/ecn_args.py similarity index 100% rename from tests/snappi/ecn/ecn_args/ecn_args.py rename to tests/snappi_tests/ecn/ecn_args/ecn_args.py diff --git a/tests/snappi/ecn/files/__init__.py b/tests/snappi_tests/ecn/files/__init__.py similarity index 100% rename from tests/snappi/ecn/files/__init__.py rename to tests/snappi_tests/ecn/files/__init__.py diff --git a/tests/snappi/ecn/files/helper.py b/tests/snappi_tests/ecn/files/helper.py similarity index 96% rename from tests/snappi/ecn/files/helper.py rename to tests/snappi_tests/ecn/files/helper.py index 521187dfac1..5a19452503c 100644 --- a/tests/snappi/ecn/files/helper.py +++ b/tests/snappi_tests/ecn/files/helper.py @@ -5,13 +5,13 @@ from tests.common.helpers.assertions import pytest_assert from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ fanout_graph_facts -from tests.common.snappi.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ +from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ snappi_api -from tests.common.snappi.snappi_helpers import get_dut_port_id -from tests.common.snappi.common_helpers import pfc_class_enable_vector, config_wred,\ +from tests.common.snappi_tests.snappi_helpers import get_dut_port_id +from tests.common.snappi_tests.common_helpers import pfc_class_enable_vector, config_wred,\ enable_ecn, config_ingress_lossless_buffer_alpha, stop_pfcwd, disable_packet_aging -from tests.common.snappi.port import select_ports, select_tx_port -from tests.common.snappi.snappi_helpers import wait_for_arp +from tests.common.snappi_tests.port import select_ports, select_tx_port +from tests.common.snappi_tests.snappi_helpers import wait_for_arp logger = logging.getLogger(__name__) diff --git a/tests/snappi/ecn/test_dequeue_ecn_with_snappi.py b/tests/snappi_tests/ecn/test_dequeue_ecn_with_snappi.py similarity index 91% rename from tests/snappi/ecn/test_dequeue_ecn_with_snappi.py rename to tests/snappi_tests/ecn/test_dequeue_ecn_with_snappi.py index 45a3bd38051..00c80da8bfa 100644 --- a/tests/snappi/ecn/test_dequeue_ecn_with_snappi.py +++ b/tests/snappi_tests/ecn/test_dequeue_ecn_with_snappi.py @@ -3,13 +3,13 @@ from tests.common.helpers.assertions import pytest_require, pytest_assert from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ fanout_graph_facts -from tests.common.snappi.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ +from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ snappi_api, snappi_testbed_config -from tests.common.snappi.qos_fixtures import prio_dscp_map, lossless_prio_list +from tests.common.snappi_tests.qos_fixtures import prio_dscp_map, lossless_prio_list -from files.helper import run_ecn_test, is_ecn_marked +from tests.snappi_tests.ecn.files.helper import run_ecn_test, is_ecn_marked -pytestmark = [ pytest.mark.topology('snappi') ] +pytestmark = [ pytest.mark.topology('tgen') ] def test_dequeue_ecn(request, snappi_api, diff --git a/tests/snappi/ecn/test_red_accuracy_with_snappi.py b/tests/snappi_tests/ecn/test_red_accuracy_with_snappi.py similarity index 93% rename from tests/snappi/ecn/test_red_accuracy_with_snappi.py rename to tests/snappi_tests/ecn/test_red_accuracy_with_snappi.py index 7ee0f870412..d625c8fe20b 100644 --- a/tests/snappi/ecn/test_red_accuracy_with_snappi.py +++ b/tests/snappi_tests/ecn/test_red_accuracy_with_snappi.py @@ -4,13 +4,13 @@ from tests.common.helpers.assertions import pytest_require, pytest_assert from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ fanout_graph_facts -from tests.common.snappi.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ +from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ snappi_api, snappi_testbed_config -from tests.common.snappi.qos_fixtures import prio_dscp_map, lossless_prio_list +from tests.common.snappi_tests.qos_fixtures import prio_dscp_map, lossless_prio_list -from files.helper import run_ecn_test, is_ecn_marked +from tests.snappi_tests.ecn.files.helper import run_ecn_test, is_ecn_marked -pytestmark = [ pytest.mark.topology('snappi') ] +pytestmark = [ pytest.mark.topology('tgen') ] def test_red_accuracy(request, snappi_api, diff --git a/tests/snappi_tests/files/__init__.py b/tests/snappi_tests/files/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/snappi_tests/files/helper.py b/tests/snappi_tests/files/helper.py new file mode 100644 index 00000000000..862f8a3e4b5 --- /dev/null +++ b/tests/snappi_tests/files/helper.py @@ -0,0 +1,25 @@ +from tests.common.broadcom_data import is_broadcom_device +from tests.common.helpers.assertions import pytest_require +from tests.common.cisco_data import is_cisco_device + + +def skip_warm_reboot(duthost, reboot_type): + """ + Skip warm/fast reboot tests for TD2 asics and Cisco devices + + Args: + duthost (pytest fixture): device under test + reboot_type (string): type of reboot (can be warm, cold, fast) + + Returns: + None + """ + SKIP_LIST = ["td2"] + asic_type = duthost.get_asic_name() + reboot_case_supported = True + if (reboot_type == "warm" or reboot_type == "fast") and is_cisco_device(duthost): + reboot_case_supported = False + elif is_broadcom_device(duthost) and asic_type in SKIP_LIST and "warm" in reboot_type: + reboot_case_supported = False + pytest_require(reboot_case_supported, "Reboot type {} is not supported on {} switches". + format(reboot_type, duthost.facts['asic_type'])) diff --git a/tests/snappi_tests/pfc/__init__.py b/tests/snappi_tests/pfc/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/snappi/pfc/files/__init__.py b/tests/snappi_tests/pfc/files/__init__.py similarity index 100% rename from tests/snappi/pfc/files/__init__.py rename to tests/snappi_tests/pfc/files/__init__.py diff --git a/tests/snappi_tests/pfc/files/helper.py b/tests/snappi_tests/pfc/files/helper.py new file mode 100644 index 00000000000..bc4a3352343 --- /dev/null +++ b/tests/snappi_tests/pfc/files/helper.py @@ -0,0 +1,239 @@ +import logging + +from tests.common.helpers.assertions import pytest_assert +from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ + fanout_graph_facts # noqa F401 +from tests.common.snappi_tests.snappi_helpers import get_dut_port_id +from tests.common.snappi_tests.common_helpers import pfc_class_enable_vector,\ + get_lossless_buffer_size, get_pg_dropped_packets,\ + stop_pfcwd, disable_packet_aging, sec_to_nanosec,\ + get_pfc_frame_count, packet_capture, config_capture_pkt # noqa F401 +from tests.common.snappi_tests.port import select_ports, select_tx_port # noqa F401 +from tests.common.snappi_tests.snappi_helpers import wait_for_arp # noqa F401 +from tests.common.snappi_tests.traffic_generation import setup_base_traffic_config, generate_test_flows,\ + generate_background_flows, generate_pause_flows, run_traffic, verify_pause_flow, verify_basic_test_flow,\ + verify_background_flow, verify_pause_frame_count, verify_egress_queue_frame_count, verify_in_flight_buffer_pkts,\ + verify_unset_cev_pause_frame_count +from tests.common.snappi_tests.snappi_test_params import SnappiTestParams +from tests.common.snappi_tests.read_pcap import validate_pfc_frame + + +logger = logging.getLogger(__name__) + +dut_port_config = [] +PAUSE_FLOW_NAME = 'Pause Storm' +TEST_FLOW_NAME = 'Test Flow' +TEST_FLOW_AGGR_RATE_PERCENT = 45 +BG_FLOW_NAME = 'Background Flow' +BG_FLOW_AGGR_RATE_PERCENT = 45 +data_flow_pkt_size = 1024 +DATA_FLOW_DURATION_SEC = 2 +data_flow_delay_sec = 1 +SNAPPI_POLL_DELAY_SEC = 2 +PAUSE_FLOW_DUR_BASE_SEC = 3 +TOLERANCE_THRESHOLD = 0.05 +CONTINUOUS_MODE = -5 + + +def run_pfc_test(api, + testbed_config, + port_config_list, + conn_data, + fanout_data, + duthost, + dut_port, + global_pause, + pause_prio_list, + test_prio_list, + bg_prio_list, + prio_dscp_map, + test_traffic_pause, + snappi_extra_params=None): + """ + Run a PFC test + Args: + api (obj): snappi session + testbed_config (obj): testbed L1/L2/L3 configuration + port_config_list (list): list of port configuration + conn_data (dict): the dictionary returned by conn_graph_fact. + fanout_data (dict): the dictionary returned by fanout_graph_fact. + duthost (Ansible host instance): device under test + dut_port (str): DUT port to test + global_pause (bool): if pause frame is IEEE 802.3X pause + pause_prio_list (list): priorities to pause for pause frames + test_prio_list (list): priorities of test flows + bg_prio_list (list): priorities of background flows + prio_dscp_map (dict): Priority vs. DSCP map (key = priority). + test_traffic_pause (bool): if test flows are expected to be paused + snappi_extra_params (SnappiTestParams obj): additional parameters for Snappi traffic + + Returns: + N/A + """ + + pytest_assert(testbed_config is not None, 'Fail to get L2/3 testbed config') + + if snappi_extra_params is None: + snappi_extra_params = SnappiTestParams() + + stop_pfcwd(duthost) + disable_packet_aging(duthost) + + # Get the ID of the port to test + port_id = get_dut_port_id(dut_hostname=duthost.hostname, + dut_port=dut_port, + conn_data=conn_data, + fanout_data=fanout_data) + + pytest_assert(port_id is not None, + 'Fail to get ID for port {}'.format(dut_port)) + + # Rate percent must be an integer + bg_flow_rate_percent = int(BG_FLOW_AGGR_RATE_PERCENT / len(bg_prio_list)) + test_flow_rate_percent = int(TEST_FLOW_AGGR_RATE_PERCENT / len(test_prio_list)) + + if snappi_extra_params.headroom_test_params is not None: + global DATA_FLOW_DURATION_SEC + DATA_FLOW_DURATION_SEC = 10 + global data_flow_delay_sec + data_flow_delay_sec = 2 + + # Set up pfc delay parameter + l1_config = testbed_config.layer1[0] + pfc = l1_config.flow_control.ieee_802_1qbb + pfc.pfc_delay = snappi_extra_params.headroom_test_params[0] + + # Generate base traffic config + snappi_extra_params.base_flow_config = setup_base_traffic_config(testbed_config=testbed_config, + port_config_list=port_config_list, + port_id=port_id) + + if snappi_extra_params.packet_capture_type != packet_capture.NO_CAPTURE: + # Setup capture config + if snappi_extra_params.is_snappi_ingress_port_cap: + # packet capture is required on the ingress snappi port + snappi_extra_params.packet_capture_ports = [snappi_extra_params.base_flow_config["rx_port_name"]] + else: + # packet capture will be on the egress snappi port + snappi_extra_params.packet_capture_ports = [snappi_extra_params.base_flow_config["tx_port_name"]] + + snappi_extra_params.packet_capture_file = snappi_extra_params.packet_capture_type.value + + config_capture_pkt(testbed_config=testbed_config, + port_names=snappi_extra_params.packet_capture_ports, + capture_type=snappi_extra_params.packet_capture_type, + capture_name=snappi_extra_params.packet_capture_file) + + if snappi_extra_params.packet_capture_type == packet_capture.PFC_CAPTURE: + # PFC pause frame capture is requested + valid_pfc_frame_test = True + else: + # PFC pause frame capture is not requested + valid_pfc_frame_test = False + + pause_flow_dur_sec = DATA_FLOW_DURATION_SEC + data_flow_delay_sec + SNAPPI_POLL_DELAY_SEC + \ + PAUSE_FLOW_DUR_BASE_SEC if valid_pfc_frame_test else CONTINUOUS_MODE + + # Generate test flow config + generate_test_flows(testbed_config=testbed_config, + test_flow_name=TEST_FLOW_NAME, + test_flow_prio_list=test_prio_list, + test_flow_rate_percent=test_flow_rate_percent, + test_flow_dur_sec=DATA_FLOW_DURATION_SEC, + test_flow_delay_sec=data_flow_delay_sec, + test_flow_pkt_size=data_flow_pkt_size, + prio_dscp_map=prio_dscp_map, + snappi_extra_params=snappi_extra_params) + + # Generate background flow config + generate_background_flows(testbed_config=testbed_config, + bg_flow_name=BG_FLOW_NAME, + bg_flow_prio_list=bg_prio_list, + bg_flow_rate_percent=bg_flow_rate_percent, + bg_flow_dur_sec=DATA_FLOW_DURATION_SEC, + bg_flow_delay_sec=data_flow_delay_sec, + bg_flow_pkt_size=data_flow_pkt_size, + prio_dscp_map=prio_dscp_map, + snappi_extra_params=snappi_extra_params) + + # Generate pause storm config + generate_pause_flows(testbed_config=testbed_config, + pause_flow_name=PAUSE_FLOW_NAME, + pause_prio_list=pause_prio_list, + global_pause=global_pause, + snappi_extra_params=snappi_extra_params, + pause_flow_delay_sec=0, + pause_flow_dur_sec=pause_flow_dur_sec) + + flows = testbed_config.flows + + all_flow_names = [flow.name for flow in flows] + data_flow_names = [flow.name for flow in flows if PAUSE_FLOW_NAME not in flow.name] + + # Clear PFC and queue counters before traffic run + duthost.command("pfcstat -c") + duthost.command("sonic-clear queuecounters") + + """ Run traffic """ + flow_stats = run_traffic(api=api, + config=testbed_config, + data_flow_names=data_flow_names, + all_flow_names=all_flow_names, + exp_dur_sec=DATA_FLOW_DURATION_SEC + data_flow_delay_sec, + snappi_extra_params=snappi_extra_params) + + speed_str = testbed_config.layer1[0].speed + speed_gbps = int(speed_str.split('_')[1]) + + # Reset pfc delay parameter + pfc = testbed_config.layer1[0].flow_control.ieee_802_1qbb + pfc.pfc_delay = 0 + + # Verify PFC pause frames + if valid_pfc_frame_test: + is_valid_pfc_frame = validate_pfc_frame(snappi_extra_params.packet_capture_file + ".pcapng") + pytest_assert(is_valid_pfc_frame, "PFC frames invalid") + return + + # Verify pause flows + verify_pause_flow(flow_metrics=flow_stats, + pause_flow_name=PAUSE_FLOW_NAME) + + # Verify background flows + verify_background_flow(flow_metrics=flow_stats, + bg_flow_name=BG_FLOW_NAME, + bg_flow_rate_percent=bg_flow_rate_percent, + bg_flow_dur_sec=DATA_FLOW_DURATION_SEC, + bg_flow_pkt_size=data_flow_pkt_size, + speed_gbps=speed_gbps, + tolerance=TOLERANCE_THRESHOLD, + snappi_extra_params=snappi_extra_params) + + # Verify basic test flows metrics from ixia + verify_basic_test_flow(flow_metrics=flow_stats, + test_flow_name=TEST_FLOW_NAME, + test_flow_rate_percent=test_flow_rate_percent, + test_flow_dur_sec=DATA_FLOW_DURATION_SEC, + test_flow_pkt_size=data_flow_pkt_size, + speed_gbps=speed_gbps, + tolerance=TOLERANCE_THRESHOLD, + test_flow_pause=test_traffic_pause, + snappi_extra_params=snappi_extra_params) + + if test_traffic_pause: + # Verify in flight TX packets count relative to switch buffer size + verify_in_flight_buffer_pkts(duthost=duthost, + flow_metrics=flow_stats, + test_flow_name=TEST_FLOW_NAME, + test_flow_pkt_size=data_flow_pkt_size, + snappi_extra_params=snappi_extra_params) + # Verify PFC pause frame count + verify_pause_frame_count(duthost=duthost, + snappi_extra_params=snappi_extra_params) + else: + # Verify zero pause frames are counted when the PFC class enable vector is not set + verify_unset_cev_pause_frame_count(duthost=duthost, + snappi_extra_params=snappi_extra_params) + # Verify egress queue frame counts + verify_egress_queue_frame_count(duthost=duthost, + snappi_extra_params=snappi_extra_params) diff --git a/tests/snappi/pfc/test_global_pause_with_snappi.py b/tests/snappi_tests/pfc/test_global_pause_with_snappi.py similarity index 88% rename from tests/snappi/pfc/test_global_pause_with_snappi.py rename to tests/snappi_tests/pfc/test_global_pause_with_snappi.py index aca906916c4..4217e64ea21 100644 --- a/tests/snappi/pfc/test_global_pause_with_snappi.py +++ b/tests/snappi_tests/pfc/test_global_pause_with_snappi.py @@ -3,14 +3,14 @@ from tests.common.helpers.assertions import pytest_require from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ fanout_graph_facts -from tests.common.snappi.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ +from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ snappi_api, snappi_testbed_config -from tests.common.snappi.qos_fixtures import prio_dscp_map, all_prio_list, lossless_prio_list,\ +from tests.common.snappi_tests.qos_fixtures import prio_dscp_map, all_prio_list, lossless_prio_list,\ lossy_prio_list -from files.helper import run_pfc_test +from tests.snappi_tests.pfc.files.helper import run_pfc_test -pytestmark = [ pytest.mark.topology('snappi') ] +pytestmark = [ pytest.mark.topology('tgen') ] def test_global_pause(snappi_api, diff --git a/tests/snappi/pfc/test_pfc_pause_lossless_with_snappi.py b/tests/snappi_tests/pfc/test_pfc_pause_lossless_with_snappi.py similarity index 95% rename from tests/snappi/pfc/test_pfc_pause_lossless_with_snappi.py rename to tests/snappi_tests/pfc/test_pfc_pause_lossless_with_snappi.py index deb7921e2c5..03b4b3b1f87 100644 --- a/tests/snappi/pfc/test_pfc_pause_lossless_with_snappi.py +++ b/tests/snappi_tests/pfc/test_pfc_pause_lossless_with_snappi.py @@ -1,20 +1,22 @@ import logging import pytest -from files.helper import run_pfc_test +from tests.snappi_tests.pfc.files.helper import run_pfc_test from tests.common.helpers.assertions import pytest_assert, pytest_require from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ fanout_graph_facts -from tests.common.snappi.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ +from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ snappi_api, snappi_testbed_config -from tests.common.snappi.qos_fixtures import prio_dscp_map, all_prio_list, lossless_prio_list,\ +from tests.common.snappi_tests.qos_fixtures import prio_dscp_map, all_prio_list, lossless_prio_list,\ lossy_prio_list from tests.common.reboot import reboot +from tests.common.platform.processes_utils import wait_critical_processes from tests.common.utilities import wait_until +from tests.snappi_tests.files.helper import skip_warm_reboot logger = logging.getLogger(__name__) -pytestmark = [ pytest.mark.topology('snappi') ] +pytestmark = [ pytest.mark.topology('tgen') ] def test_pfc_pause_single_lossless_prio(snappi_api, snappi_testbed_config, @@ -168,8 +170,10 @@ def test_pfc_pause_single_lossless_prio_reboot(snappi_api, testbed_config, port_config_list = snappi_testbed_config duthost = duthosts[rand_one_dut_hostname] - lossless_prio = int(lossless_prio) + skip_warm_reboot(duthost, reboot_type) + + lossless_prio = int(lossless_prio) pause_prio_list = [lossless_prio] test_prio_list = [lossless_prio] bg_prio_list = [p for p in all_prio_list] @@ -178,6 +182,7 @@ def test_pfc_pause_single_lossless_prio_reboot(snappi_api, logger.info("Issuing a {} reboot on the dut {}".format(reboot_type, duthost.hostname)) reboot(duthost, localhost, reboot_type=reboot_type) logger.info("Wait until the system is stable") + wait_critical_processes(duthost) pytest_assert(wait_until(300, 20, 0, duthost.critical_services_fully_started), "Not all critical services are fully started") @@ -236,6 +241,9 @@ def test_pfc_pause_multi_lossless_prio_reboot(snappi_api, testbed_config, port_config_list = snappi_testbed_config duthost = duthosts[rand_one_dut_hostname] + + skip_warm_reboot(duthost, reboot_type) + pause_prio_list = lossless_prio_list test_prio_list = lossless_prio_list bg_prio_list = lossy_prio_list @@ -243,6 +251,7 @@ def test_pfc_pause_multi_lossless_prio_reboot(snappi_api, logger.info("Issuing a {} reboot on the dut {}".format(reboot_type, duthost.hostname)) reboot(duthost, localhost, reboot_type=reboot_type) logger.info("Wait until the system is stable") + wait_critical_processes(duthost) pytest_assert(wait_until(300, 20, 0, duthost.critical_services_fully_started), "Not all critical services are fully started") diff --git a/tests/snappi/pfc/test_pfc_pause_lossy_with_snappi.py b/tests/snappi_tests/pfc/test_pfc_pause_lossy_with_snappi.py similarity index 96% rename from tests/snappi/pfc/test_pfc_pause_lossy_with_snappi.py rename to tests/snappi_tests/pfc/test_pfc_pause_lossy_with_snappi.py index c33788391d4..908d36ae018 100644 --- a/tests/snappi/pfc/test_pfc_pause_lossy_with_snappi.py +++ b/tests/snappi_tests/pfc/test_pfc_pause_lossy_with_snappi.py @@ -1,20 +1,21 @@ import logging import pytest -from files.helper import run_pfc_test +from tests.snappi_tests.pfc.files.helper import run_pfc_test from tests.common.helpers.assertions import pytest_assert, pytest_require from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ fanout_graph_facts -from tests.common.snappi.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ +from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ snappi_api, snappi_testbed_config -from tests.common.snappi.qos_fixtures import prio_dscp_map, all_prio_list, lossless_prio_list,\ +from tests.common.snappi_tests.qos_fixtures import prio_dscp_map, all_prio_list, lossless_prio_list,\ lossy_prio_list from tests.common.reboot import reboot from tests.common.utilities import wait_until +from tests.snappi_tests.files.helper import skip_warm_reboot logger = logging.getLogger(__name__) -pytestmark = [ pytest.mark.topology('snappi') ] +pytestmark = [ pytest.mark.topology('tgen') ] def test_pfc_pause_single_lossy_prio(snappi_api, snappi_testbed_config, @@ -167,8 +168,9 @@ def test_pfc_pause_single_lossy_prio_reboot(snappi_api, pytest_require(rand_one_dut_hostname == dut_hostname == dut_hostname2, "Priority and port are not mapped to the expected DUT") - testbed_config, port_config_list = snappi_testbed_config duthost = duthosts[rand_one_dut_hostname] + skip_warm_reboot(duthost, reboot_type) + testbed_config, port_config_list = snappi_testbed_config lossy_prio = int(lossy_prio) pause_prio_list = [lossy_prio] @@ -237,6 +239,9 @@ def test_pfc_pause_multi_lossy_prio_reboot(snappi_api, testbed_config, port_config_list = snappi_testbed_config duthost = duthosts[rand_one_dut_hostname] + + skip_warm_reboot(duthost, reboot_type) + pause_prio_list = lossy_prio_list test_prio_list = lossy_prio_list bg_prio_list = lossless_prio_list diff --git a/tests/snappi_tests/pfc/test_pfc_pause_response_with_snappi.py b/tests/snappi_tests/pfc/test_pfc_pause_response_with_snappi.py new file mode 100644 index 00000000000..36b5c540cbf --- /dev/null +++ b/tests/snappi_tests/pfc/test_pfc_pause_response_with_snappi.py @@ -0,0 +1,158 @@ +import logging +import pytest + +from tests.snappi_tests.pfc.files.helper import run_pfc_test +from tests.common.helpers.assertions import pytest_require +from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ + fanout_graph_facts # noqa F401 +from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ + snappi_api, snappi_testbed_config # noqa F401 +from tests.common.snappi_tests.qos_fixtures import prio_dscp_map, all_prio_list, lossless_prio_list,\ + lossy_prio_list # noqa F401 +from tests.common.snappi_tests.snappi_test_params import SnappiTestParams + +logger = logging.getLogger(__name__) + +pytestmark = [pytest.mark.topology('tgen')] + + +def test_pfc_single_lossless_headroom(snappi_api, # noqa F811 + snappi_testbed_config, # noqa F811 + conn_graph_facts, # noqa F811 + fanout_graph_facts, # noqa F811 + duthosts, + rand_one_dut_hostname, + rand_one_dut_portname_oper_up, + enum_dut_lossless_prio, + all_prio_list, # noqa F811 + prio_dscp_map, # noqa F811 + enum_pfc_pause_delay_test_params): + """ + Test headroom capacity for DUT for a single lossless priority + + Args: + snappi_api (pytest fixture): SNAPPI session + snappi_testbed_config (pytest fixture): testbed configuration information + conn_graph_facts (pytest fixture): connection graph + fanout_graph_facts (pytest fixture): fanout graph + duthosts (pytest fixture): list of DUTs + rand_one_dut_hostname (str): hostname of DUT + rand_one_dut_portname_oper_up (str): port to test, e.g., 's6100-1|Ethernet0' + enum_dut_lossless_prio (str): lossless priority to test, e.g., 's6100-1|3' + all_prio_list (pytest fixture): list of all the priorities + prio_dscp_map (pytest fixture): priority vs. DSCP map (key = priority). + enum_pfc_pause_delay_test_params (str): pfc delay value to test, + and delay responses e.g. "200|False" + + Returns: + N/A + """ + + pytest_require(enum_pfc_pause_delay_test_params is not None, + "Skip this testcase since pfc pause delay values have not been configured yet") + dut_hostname, dut_port = rand_one_dut_portname_oper_up.split('|') + dut_hostname2, lossless_prio = enum_dut_lossless_prio.split('|') + pytest_require(rand_one_dut_hostname == dut_hostname == dut_hostname2, + "Priority and port are not mapped to the expected DUT") + + testbed_config, port_config_list = snappi_testbed_config + duthost = duthosts[rand_one_dut_hostname] + lossless_prio = int(lossless_prio) + + pause_prio_list = [lossless_prio] + test_prio_list = [lossless_prio] + bg_prio_list = [p for p in all_prio_list] + bg_prio_list.remove(lossless_prio) + + """ Populate headroom test params """ + _, pfc_pause_delay_str, headroom_test_result_str = enum_pfc_pause_delay_test_params.split( + '|') + pfc_pause_delay = int(pfc_pause_delay_str) + headroom_test_result = True if headroom_test_result_str == 'True' else False + headroom_test_params = [pfc_pause_delay, headroom_test_result] + snappi_extra_params = SnappiTestParams() + snappi_extra_params.headroom_test_params = headroom_test_params + + run_pfc_test(api=snappi_api, + testbed_config=testbed_config, + port_config_list=port_config_list, + conn_data=conn_graph_facts, + fanout_data=fanout_graph_facts, + duthost=duthost, + dut_port=dut_port, + global_pause=False, + pause_prio_list=pause_prio_list, + test_prio_list=test_prio_list, + bg_prio_list=bg_prio_list, + prio_dscp_map=prio_dscp_map, + test_traffic_pause=True, + snappi_extra_params=snappi_extra_params) + + +def test_pfc_pause_multi_lossless_headroom(snappi_api, # noqa F811 + snappi_testbed_config, # noqa F811 + conn_graph_facts, # noqa F811 + fanout_graph_facts, # noqa F811 + duthosts, + rand_one_dut_hostname, + rand_one_dut_portname_oper_up, + lossless_prio_list, # noqa F811 + lossy_prio_list, # noqa F811 + prio_dscp_map, # noqa F811 + enum_pfc_pause_delay_test_params): + """ + Test headroom capacity for DUT for multiple lossless priorities + + Args: + snappi_api (pytest fixture): SNAPPI session + snappi_testbed_config (pytest fixture): testbed configuration information + conn_graph_facts (pytest fixture): connection graph + fanout_graph_facts (pytest fixture): fanout graph + duthosts (pytest fixture): list of DUTs + rand_one_dut_hostname (str): hostname of DUT + rand_one_dut_portname_oper_up (str): port to test, e.g., 's6100-1|Ethernet0' + lossless_prio_list (pytest fixture): list of all the lossless priorities + lossy_prio_list (pytest fixture): list of all the lossy priorities + prio_dscp_map (pytest fixture): priority vs. DSCP map (key = priority). + enum_pfc_pause_delay_test_params (str): pfc delay value to test, + and delay responses e.g. "200|False" + + Returns: + N/A + """ + + pytest_require(enum_pfc_pause_delay_test_params is not None, + "Skip this testcase since pfc pause delay values have not been configured yet") + dut_hostname, dut_port = rand_one_dut_portname_oper_up.split('|') + pytest_require(rand_one_dut_hostname == dut_hostname, + "Port is not mapped to the expected DUT") + + testbed_config, port_config_list = snappi_testbed_config + duthost = duthosts[rand_one_dut_hostname] + pause_prio_list = lossless_prio_list + test_prio_list = lossless_prio_list + bg_prio_list = lossy_prio_list + + """ Populate headroom test params """ + _, pfc_pause_delay_str, headroom_test_result_str = enum_pfc_pause_delay_test_params.split( + '|') + pfc_pause_delay = int(pfc_pause_delay_str) + headroom_test_result = True if headroom_test_result_str == 'True' else False + headroom_test_params = [pfc_pause_delay, headroom_test_result] + snappi_extra_params = SnappiTestParams() + snappi_extra_params.headroom_test_params = headroom_test_params + + run_pfc_test(api=snappi_api, + testbed_config=testbed_config, + port_config_list=port_config_list, + conn_data=conn_graph_facts, + fanout_data=fanout_graph_facts, + duthost=duthost, + dut_port=dut_port, + global_pause=False, + pause_prio_list=pause_prio_list, + test_prio_list=test_prio_list, + bg_prio_list=bg_prio_list, + prio_dscp_map=prio_dscp_map, + test_traffic_pause=True, + snappi_extra_params=snappi_extra_params) diff --git a/tests/snappi_tests/pfc/test_pfc_pause_unset_bit_enable_vector.py b/tests/snappi_tests/pfc/test_pfc_pause_unset_bit_enable_vector.py new file mode 100644 index 00000000000..554e84a9800 --- /dev/null +++ b/tests/snappi_tests/pfc/test_pfc_pause_unset_bit_enable_vector.py @@ -0,0 +1,133 @@ +import logging +import pytest + +from tests.snappi_tests.pfc.files.helper import run_pfc_test +from tests.common.helpers.assertions import pytest_require +from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ + fanout_graph_facts # noqa F401 +from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ + snappi_api, snappi_testbed_config # noqa F401 +from tests.common.snappi_tests.qos_fixtures import prio_dscp_map, all_prio_list, lossless_prio_list,\ + lossy_prio_list # noqa F401 +from tests.common.snappi_tests.snappi_test_params import SnappiTestParams + +logger = logging.getLogger(__name__) + +pytestmark = [pytest.mark.topology('tgen')] + + +def test_pfc_unset_cev_single_prio(snappi_api, # noqa F811 + snappi_testbed_config, # noqa F811 + conn_graph_facts, # noqa F811 + fanout_graph_facts, # noqa F811 + duthosts, + rand_one_dut_hostname, + rand_one_dut_portname_oper_up, + enum_dut_lossless_prio, + all_prio_list, # noqa F811 + prio_dscp_map): # noqa F811 + """ + Test if PFC frames with no bit set in the class enable vector are ignored by the DUT + for a single lossless priority + Args: + snappi_api (pytest fixture): SNAPPI session + snappi_testbed_config (pytest fixture): testbed configuration information + conn_graph_facts (pytest fixture): connection graph + fanout_graph_facts (pytest fixture): fanout graph + duthosts (pytest fixture): list of DUTs + rand_one_dut_hostname (str): hostname of DUT + rand_one_dut_portname_oper_up (str): port to test, e.g., 's6100-1|Ethernet0' + enum_dut_lossless_prio (str): lossless priority to test, e.g., 's6100-1|3' + all_prio_list (pytest fixture): list of all the priorities + prio_dscp_map (pytest fixture): priority vs. DSCP map (key = priority). + Returns: + N/A + """ + dut_hostname, dut_port = rand_one_dut_portname_oper_up.split('|') + dut_hostname2, lossless_prio = enum_dut_lossless_prio.split('|') + pytest_require(rand_one_dut_hostname == dut_hostname == dut_hostname2, + "Priority and port are not mapped to the expected DUT") + + testbed_config, port_config_list = snappi_testbed_config + duthost = duthosts[rand_one_dut_hostname] + lossless_prio = int(lossless_prio) + + pause_prio_list = [lossless_prio] + test_prio_list = [lossless_prio] + bg_prio_list = [p for p in all_prio_list] + bg_prio_list.remove(lossless_prio) + + snappi_extra_params = SnappiTestParams() + snappi_extra_params.set_pfc_class_enable_vec = False + + run_pfc_test(api=snappi_api, + testbed_config=testbed_config, + port_config_list=port_config_list, + conn_data=conn_graph_facts, + fanout_data=fanout_graph_facts, + duthost=duthost, + dut_port=dut_port, + global_pause=False, + pause_prio_list=pause_prio_list, + test_prio_list=test_prio_list, + bg_prio_list=bg_prio_list, + prio_dscp_map=prio_dscp_map, + test_traffic_pause=False, + snappi_extra_params=snappi_extra_params) + + +def test_pfc_unset_cev_multi_prio(snappi_api, # noqa F811 + snappi_testbed_config, # noqa F811 + conn_graph_facts, # noqa F811 + fanout_graph_facts, # noqa F811 + duthosts, + rand_one_dut_hostname, + rand_one_dut_portname_oper_up, + lossless_prio_list, # noqa F811 + lossy_prio_list, # noqa F811 + prio_dscp_map): # noqa F811 + """ + Test if PFC frames with no bit set in the class enable vector are ignored by the DUT + for multiple lossless priorities + Args: + snappi_api (pytest fixture): SNAPPI session + snappi_testbed_config (pytest fixture): testbed configuration information + conn_graph_facts (pytest fixture): connection graph + fanout_graph_facts (pytest fixture): fanout graph + duthosts (pytest fixture): list of DUTs + rand_one_dut_hostname (str): hostname of DUT + rand_one_dut_portname_oper_up (str): port to test, e.g., 's6100-1|Ethernet0' + lossless_prio_list (pytest fixture): list of all the lossless priorities + lossy_prio_list (pytest fixture): list of all the lossy priorities + prio_dscp_map (pytest fixture): priority vs. DSCP map (key = priority). + Returns: + N/A + """ + dut_hostname, dut_port = rand_one_dut_portname_oper_up.split('|') + pytest_require(rand_one_dut_hostname == dut_hostname, + "Port is not mapped to the expected DUT") + + testbed_config, port_config_list = snappi_testbed_config + duthost = duthosts[rand_one_dut_hostname] + + pause_prio_list = lossless_prio_list + test_prio_list = lossless_prio_list + bg_prio_list = lossy_prio_list + + snappi_extra_params = SnappiTestParams() + snappi_extra_params.set_pfc_class_enable_vec = False + + run_pfc_test(api=snappi_api, + testbed_config=testbed_config, + port_config_list=port_config_list, + conn_data=conn_graph_facts, + fanout_data=fanout_graph_facts, + duthost=duthost, + dut_port=dut_port, + global_pause=False, + pause_prio_list=pause_prio_list, + test_prio_list=test_prio_list, + bg_prio_list=bg_prio_list, + prio_dscp_map=prio_dscp_map, + test_traffic_pause=False, + snappi_extra_params=snappi_extra_params) diff --git a/tests/snappi_tests/pfc/test_pfc_pause_zero_mac.py b/tests/snappi_tests/pfc/test_pfc_pause_zero_mac.py new file mode 100644 index 00000000000..66573b9a88f --- /dev/null +++ b/tests/snappi_tests/pfc/test_pfc_pause_zero_mac.py @@ -0,0 +1,138 @@ +import logging +import pytest + +from tests.snappi_tests.pfc.files.helper import run_pfc_test +from tests.common.helpers.assertions import pytest_require +from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ + fanout_graph_facts # noqa F401 +from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ + snappi_api, snappi_testbed_config # noqa F401 +from tests.common.snappi_tests.qos_fixtures import prio_dscp_map, all_prio_list, lossless_prio_list,\ + lossy_prio_list # noqa F401 +from tests.common.snappi_tests.snappi_test_params import SnappiTestParams + +logger = logging.getLogger(__name__) + +pytestmark = [pytest.mark.topology('tgen')] + + +def test_pfc_zero_src_mac_single_lossless_prio(snappi_api, # noqa F811 + snappi_testbed_config, # noqa F811 + conn_graph_facts, # noqa F811 + fanout_graph_facts, # noqa F811 + duthosts, + rand_one_dut_hostname, + rand_one_dut_portname_oper_up, + enum_dut_lossless_prio, + all_prio_list, # noqa F811 + prio_dscp_map): # noqa F811 + """ + Test if PFC pause frames with zero source MAC address are counted by the DUT + for a single lossless priority + + Args: + snappi_api (pytest fixture): SNAPPI session + snappi_testbed_config (pytest fixture): testbed configuration information + conn_graph_facts (pytest fixture): connection graph + fanout_graph_facts (pytest fixture): fanout graph + duthosts (pytest fixture): list of DUTs + rand_one_dut_hostname (str): hostname of DUT + rand_one_dut_portname_oper_up (str): port to test, e.g., 's6100-1|Ethernet0' + enum_dut_lossless_prio (str): lossless priority to test, e.g., 's6100-1|3' + all_prio_list (pytest fixture): list of all the priorities + prio_dscp_map (pytest fixture): priority vs. DSCP map (key = priority). + + Returns: + N/A + """ + + dut_hostname, dut_port = rand_one_dut_portname_oper_up.split('|') + dut_hostname2, lossless_prio = enum_dut_lossless_prio.split('|') + pytest_require(rand_one_dut_hostname == dut_hostname == dut_hostname2, + "Priority and port are not mapped to the expected DUT") + + testbed_config, port_config_list = snappi_testbed_config + duthost = duthosts[rand_one_dut_hostname] + lossless_prio = int(lossless_prio) + + pause_prio_list = [lossless_prio] + test_prio_list = [lossless_prio] + bg_prio_list = [p for p in all_prio_list] + bg_prio_list.remove(lossless_prio) + + snappi_extra_params = SnappiTestParams() + snappi_extra_params.pfc_pause_src_mac = "00:00:00:00:00:00" + + run_pfc_test(api=snappi_api, + testbed_config=testbed_config, + port_config_list=port_config_list, + conn_data=conn_graph_facts, + fanout_data=fanout_graph_facts, + duthost=duthost, + dut_port=dut_port, + global_pause=False, + pause_prio_list=pause_prio_list, + test_prio_list=test_prio_list, + bg_prio_list=bg_prio_list, + prio_dscp_map=prio_dscp_map, + test_traffic_pause=True, + snappi_extra_params=snappi_extra_params) + + +def test_pfc_zero_src_mac_multi_lossless_prio(snappi_api, # noqa F811 + snappi_testbed_config, # noqa F811 + conn_graph_facts, # noqa F811 + fanout_graph_facts, # noqa F811 + duthosts, + rand_one_dut_hostname, + rand_one_dut_portname_oper_up, + lossless_prio_list, # noqa F811 + lossy_prio_list, # noqa F811 + prio_dscp_map): # noqa F811 + """ + Test if PFC pause frames with zero source MAC address are counted by the DUT + for multiple lossless priorities + + Args: + snappi_api (pytest fixture): SNAPPI session + snappi_testbed_config (pytest fixture): testbed configuration information + conn_graph_facts (pytest fixture): connection graph + fanout_graph_facts (pytest fixture): fanout graph + duthosts (pytest fixture): list of DUTs + rand_one_dut_hostname (str): hostname of DUT + rand_one_dut_portname_oper_up (str): port to test, e.g., 's6100-1|Ethernet0' + lossless_prio_list (pytest fixture): list of all the lossless priorities + lossy_prio_list (pytest fixture): list of all the lossy priorities + prio_dscp_map (pytest fixture): priority vs. DSCP map (key = priority). + + Returns: + N/A + """ + + dut_hostname, dut_port = rand_one_dut_portname_oper_up.split('|') + pytest_require(rand_one_dut_hostname == dut_hostname, + "Port is not mapped to the expected DUT") + + testbed_config, port_config_list = snappi_testbed_config + duthost = duthosts[rand_one_dut_hostname] + pause_prio_list = lossless_prio_list + test_prio_list = lossless_prio_list + bg_prio_list = lossy_prio_list + + snappi_extra_params = SnappiTestParams() + snappi_extra_params.pfc_pause_src_mac = "00:00:00:00:00:00" + + run_pfc_test(api=snappi_api, + testbed_config=testbed_config, + port_config_list=port_config_list, + conn_data=conn_graph_facts, + fanout_data=fanout_graph_facts, + duthost=duthost, + dut_port=dut_port, + global_pause=False, + pause_prio_list=pause_prio_list, + test_prio_list=test_prio_list, + bg_prio_list=bg_prio_list, + prio_dscp_map=prio_dscp_map, + test_traffic_pause=True, + snappi_extra_params=snappi_extra_params) diff --git a/tests/snappi_tests/pfc/test_valid_pfc_frame_with_snappi.py b/tests/snappi_tests/pfc/test_valid_pfc_frame_with_snappi.py new file mode 100644 index 00000000000..c0339cfd3cc --- /dev/null +++ b/tests/snappi_tests/pfc/test_valid_pfc_frame_with_snappi.py @@ -0,0 +1,89 @@ +import logging +import pytest + +from tests.snappi_tests.pfc.files.helper import run_pfc_test +from tests.common.helpers.assertions import pytest_require +from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ + fanout_graph_facts # noqa F401 +from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ + snappi_api, snappi_testbed_config # noqa F401 +from tests.common.snappi_tests.qos_fixtures import prio_dscp_map, all_prio_list, lossless_prio_list,\ + lossy_prio_list # noqa F401 +from tests.common.snappi_tests.snappi_test_params import SnappiTestParams +from tests.common.snappi_tests.common_helpers import packet_capture + +logger = logging.getLogger(__name__) + +pytestmark = [pytest.mark.topology('tgen')] + +def test_valid_pfc_frame(snappi_api, # noqa F811 + snappi_testbed_config, # noqa F811 + conn_graph_facts, # noqa F811 + fanout_graph_facts, # noqa F811 + duthosts, + rand_one_dut_hostname, + rand_one_dut_portname_oper_up, + lossless_prio_list, # noqa F811 + lossy_prio_list, # noqa F811 + prio_dscp_map): # noqa F811 + """ + Test if PFC Pause frame generated by device under test (DUT) is valid. + + Topology: + snappi (1) -> DUT -> snappi (2) + + Test steps: + 1) Create congestion on ingress port of ixia (snappi 2). This is done by letting 1 send data traffic to 2, and 2 + sending PFC pause frames to DUT. + 2) tgen 2 sends PFC pause frames to DUT. + 3) DUT responds to PFC frames by also sending back PFC pause frames back to tgen 1. + 4) Using packet capture on tgen 1 port, verify PFC pause frames meet IEEE 802.1Qbb code point standards. + a) There is a pause quanta specified in the frame (value between 0x0 and 0xFFFF). + b) There is a valid class enable vector set on the frame - an 8-bit mask that specifies + which 802.1p priority levels should be paused. + c) The destination MAC address on the frame is "01:80:c2:00:00:01" + + Args: + snappi_api (pytest fixture): SNAPPI session + snappi_testbed_config (pytest fixture): testbed configuration information + conn_graph_facts (pytest fixture): connection graph + fanout_graph_facts (pytest fixture): fanout graph + duthosts (pytest fixture): list of DUTs + rand_one_dut_hostname (str): hostname of DUT + rand_one_dut_portname_oper_up (str): port to test, e.g., 's6100-1|Ethernet0' + lossless_prio_list (pytest fixture): list of all the lossless priorities + lossy_prio_list (pytest fixture): list of all the lossy priorities + prio_dscp_map (pytest fixture): priority vs. DSCP map (key = priority). + + Returns: + N/A + """ + + dut_hostname, dut_port = rand_one_dut_portname_oper_up.split('|') + pytest_require(rand_one_dut_hostname == dut_hostname, + "Port is not mapped to the expected DUT") + + testbed_config, port_config_list = snappi_testbed_config + duthost = duthosts[rand_one_dut_hostname] + pause_prio_list = lossless_prio_list + test_prio_list = lossless_prio_list + bg_prio_list = lossy_prio_list + + snappi_extra_params = SnappiTestParams() + snappi_extra_params.packet_capture_type = packet_capture.PFC_CAPTURE + snappi_extra_params.is_snappi_ingress_port_cap = False + + run_pfc_test(api=snappi_api, + testbed_config=testbed_config, + port_config_list=port_config_list, + conn_data=conn_graph_facts, + fanout_data=fanout_graph_facts, + duthost=duthost, + dut_port=dut_port, + global_pause=False, + pause_prio_list=pause_prio_list, + test_prio_list=test_prio_list, + bg_prio_list=bg_prio_list, + prio_dscp_map=prio_dscp_map, + test_traffic_pause=True, + snappi_extra_params=snappi_extra_params) diff --git a/tests/snappi_tests/pfcwd/__init__.py b/tests/snappi_tests/pfcwd/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/snappi_tests/pfcwd/files/__init__.py b/tests/snappi_tests/pfcwd/files/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/snappi/pfcwd/files/pfcwd_basic_helper.py b/tests/snappi_tests/pfcwd/files/pfcwd_basic_helper.py similarity index 88% rename from tests/snappi/pfcwd/files/pfcwd_basic_helper.py rename to tests/snappi_tests/pfcwd/files/pfcwd_basic_helper.py index af77f69349f..d490106ff5a 100644 --- a/tests/snappi/pfcwd/files/pfcwd_basic_helper.py +++ b/tests/snappi_tests/pfcwd/files/pfcwd_basic_helper.py @@ -5,21 +5,23 @@ from tests.common.helpers.assertions import pytest_assert from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ fanout_graph_facts -from tests.common.snappi.snappi_helpers import get_dut_port_id -from tests.common.snappi.common_helpers import pfc_class_enable_vector,\ +from tests.common.snappi_tests.snappi_helpers import get_dut_port_id +from tests.common.snappi_tests.common_helpers import pfc_class_enable_vector,\ get_pfcwd_poll_interval, get_pfcwd_detect_time, get_pfcwd_restore_time,\ - enable_packet_aging, start_pfcwd -from tests.common.snappi.port import select_ports, select_tx_port -from tests.common.snappi.snappi_helpers import wait_for_arp + enable_packet_aging, start_pfcwd, sec_to_nanosec +from tests.common.snappi_tests.port import select_ports, select_tx_port +from tests.common.snappi_tests.snappi_helpers import wait_for_arp logger = logging.getLogger(__name__) PAUSE_FLOW_NAME = "Pause Storm" +WARM_UP_TRAFFIC_NAME = "Warm Up Traffic" DATA_FLOW1_NAME = "Data Flow 1" DATA_FLOW2_NAME = "Data Flow 2" +WARM_UP_TRAFFIC_DUR = 1 DATA_PKT_SIZE = 1024 SNAPPI_POLL_DELAY_SEC = 2 -DEVIATION = 0.25 +DEVIATION = 0.3 def run_pfcwd_basic_test(api, testbed_config, @@ -55,6 +57,9 @@ def run_pfcwd_basic_test(api, start_pfcwd(duthost) enable_packet_aging(duthost) + # Set appropriate pfcwd loss deviation - these values are based on empirical testing + DEVIATION = 0.35 if duthost.facts['asic_type'] in ["broadcom"] else 0.3 + """ Get the ID of the port to test """ port_id = get_dut_port_id(dut_hostname=duthost.hostname, dut_port=dut_port, @@ -68,15 +73,20 @@ def run_pfcwd_basic_test(api, detect_time_sec = get_pfcwd_detect_time(host_ans=duthost, intf=dut_port) / 1000.0 restore_time_sec = get_pfcwd_restore_time(host_ans=duthost, intf=dut_port) / 1000.0 + """ Warm up traffic is initially sent before any other traffic to prevent pfcwd + fake alerts caused by idle links (non-incremented packet counters) during pfcwd detection periods """ + warm_up_traffic_dur_sec = WARM_UP_TRAFFIC_DUR + warm_up_traffic_delay_sec = 0 + if trigger_pfcwd: """ Large enough to trigger PFC watchdog """ pfc_storm_dur_sec = ceil(detect_time_sec + poll_interval_sec + 0.1) - flow1_delay_sec = restore_time_sec / 2 + flow1_delay_sec = restore_time_sec / 2 + WARM_UP_TRAFFIC_DUR flow1_dur_sec = pfc_storm_dur_sec """ Start data traffic 2 after PFC is restored """ - flow2_delay_sec = pfc_storm_dur_sec + restore_time_sec + poll_interval_sec + flow2_delay_sec = pfc_storm_dur_sec + restore_time_sec + poll_interval_sec + WARM_UP_TRAFFIC_DUR flow2_dur_sec = 1 flow1_max_loss_rate = 1 @@ -84,11 +94,11 @@ def run_pfcwd_basic_test(api, else: pfc_storm_dur_sec = detect_time_sec * 0.5 - flow1_delay_sec = pfc_storm_dur_sec * 0.1 + flow1_delay_sec = pfc_storm_dur_sec * 0.1 + WARM_UP_TRAFFIC_DUR flow1_dur_sec = ceil(pfc_storm_dur_sec) """ Start data traffic 2 after the completion of data traffic 1 """ - flow2_delay_sec = flow1_delay_sec + flow1_dur_sec + 0.1 + flow2_delay_sec = flow1_delay_sec + flow1_dur_sec + WARM_UP_TRAFFIC_DUR + 0.1 flow2_dur_sec = 1 flow1_max_loss_rate = 0 @@ -102,9 +112,9 @@ def run_pfcwd_basic_test(api, port_id=port_id, pause_flow_name=PAUSE_FLOW_NAME, pause_flow_dur_sec=pfc_storm_dur_sec, - data_flow_name_list=[DATA_FLOW1_NAME, DATA_FLOW2_NAME], - data_flow_delay_sec_list=[flow1_delay_sec, flow2_delay_sec], - data_flow_dur_sec_list=[flow1_dur_sec, flow2_dur_sec], + data_flow_name_list=[WARM_UP_TRAFFIC_NAME, DATA_FLOW1_NAME, DATA_FLOW2_NAME], + data_flow_delay_sec_list=[warm_up_traffic_delay_sec, flow1_delay_sec, flow2_delay_sec], + data_flow_dur_sec_list=[warm_up_traffic_dur_sec, flow1_dur_sec, flow2_dur_sec], data_pkt_size=DATA_PKT_SIZE, prio_list=prio_list, prio_dscp_map=prio_dscp_map) @@ -123,8 +133,6 @@ def run_pfcwd_basic_test(api, data_flow_min_loss_rate_list=[flow1_min_loss_rate, 0], data_flow_max_loss_rate_list=[flow1_max_loss_rate, 0]) -sec_to_nanosec = lambda x : x * 1e9 - def __gen_traffic(testbed_config, port_config_list, @@ -214,7 +222,7 @@ def __gen_traffic(testbed_config, pause_flow.rate.pps = pps pause_flow.size.fixed = 64 pause_flow.duration.fixed_packets.packets = int(pause_pkt_cnt) - pause_flow.duration.fixed_packets.delay.nanoseconds = 0 + pause_flow.duration.fixed_packets.delay.nanoseconds = int(sec_to_nanosec(WARM_UP_TRAFFIC_DUR)) pause_flow.metrics.enable = True pause_flow.metrics.loss = True @@ -272,8 +280,8 @@ def __run_traffic(api, config, all_flow_names, exp_dur_sec): api.set_config(config) logger.info('Wait for Arp to Resolve ...') - wait_for_arp(api, max_attempts=10, poll_interval_sec=2) - + wait_for_arp(api, max_attempts=30, poll_interval_sec=2) + logger.info('Starting transmit on all flows ...') ts = api.transmit_state() ts.state = ts.START diff --git a/tests/snappi/pfcwd/files/pfcwd_burst_storm_helper.py b/tests/snappi_tests/pfcwd/files/pfcwd_burst_storm_helper.py similarity index 78% rename from tests/snappi/pfcwd/files/pfcwd_burst_storm_helper.py rename to tests/snappi_tests/pfcwd/files/pfcwd_burst_storm_helper.py index 81f77b02e48..689c31d7935 100644 --- a/tests/snappi/pfcwd/files/pfcwd_burst_storm_helper.py +++ b/tests/snappi_tests/pfcwd/files/pfcwd_burst_storm_helper.py @@ -3,17 +3,19 @@ import logging from tests.common.helpers.assertions import pytest_assert -from tests.common.snappi.snappi_helpers import get_dut_port_id -from tests.common.snappi.common_helpers import pfc_class_enable_vector,\ +from tests.common.snappi_tests.snappi_helpers import get_dut_port_id +from tests.common.snappi_tests.common_helpers import pfc_class_enable_vector,\ get_pfcwd_poll_interval, get_pfcwd_detect_time, get_pfcwd_restore_time,\ - enable_packet_aging, start_pfcwd -from tests.common.snappi.port import select_ports, select_tx_port -from tests.common.snappi.snappi_helpers import wait_for_arp + enable_packet_aging, start_pfcwd, sec_to_nanosec +from tests.common.snappi_tests.port import select_ports, select_tx_port +from tests.common.snappi_tests.snappi_helpers import wait_for_arp logger = logging.getLogger(__name__) PAUSE_FLOW_PREFIX = "Pause Storm" +WARM_UP_TRAFFIC_NAME = "Warm Up Traffic" DATA_FLOW_PREFIX = "Data Flow" +WARM_UP_TRAFFIC_DUR = 1 BURST_EVENTS = 15 DATA_PKT_SIZE = 1024 SNAPPI_POLL_DELAY_SEC = 2 @@ -68,6 +70,11 @@ def run_pfcwd_burst_storm_test(api, pause_flow_dur_sec = poll_interval_sec * 0.5 pause_flow_gap_sec = burst_cycle_sec - pause_flow_dur_sec + """ Warm up traffic is initially sent before any other traffic to prevent pfcwd + fake alerts caused by idle links (non-incremented packet counters) during pfcwd detection periods """ + warm_up_traffic_dur_sec = WARM_UP_TRAFFIC_DUR + warm_up_traffic_delay_sec = 0 + __gen_traffic(testbed_config=testbed_config, port_config_list=port_config_list, port_id=port_id, @@ -75,8 +82,9 @@ def run_pfcwd_burst_storm_test(api, pause_flow_dur_sec=pause_flow_dur_sec, pause_flow_count=BURST_EVENTS, pause_flow_gap_sec=pause_flow_gap_sec, - data_flow_prefix=DATA_FLOW_PREFIX, - data_flow_dur_sec=data_flow_dur_sec, + data_flow_prefix_list=[WARM_UP_TRAFFIC_NAME, DATA_FLOW_PREFIX], + data_flow_delay_sec_list=[warm_up_traffic_delay_sec, WARM_UP_TRAFFIC_DUR], + data_flow_dur_sec_list=[warm_up_traffic_dur_sec, data_flow_dur_sec], data_pkt_size=DATA_PKT_SIZE, prio_list=prio_list, prio_dscp_map=prio_dscp_map) @@ -96,9 +104,6 @@ def run_pfcwd_burst_storm_test(api, pause_flow_prefix=PAUSE_FLOW_PREFIX) -sec_to_nanosec = lambda x : x * 1e9 - - def __gen_traffic(testbed_config, port_config_list, port_id, @@ -106,8 +111,9 @@ def __gen_traffic(testbed_config, pause_flow_count, pause_flow_dur_sec, pause_flow_gap_sec, - data_flow_prefix, - data_flow_dur_sec, + data_flow_prefix_list, + data_flow_delay_sec_list, + data_flow_dur_sec_list, data_pkt_size, prio_list, prio_dscp_map): @@ -122,8 +128,9 @@ def __gen_traffic(testbed_config, pause_flow_count (int): number of PFC pause storms pause_flow_dur_sec (float): duration of each PFC pause storm pause_flow_gap_sec (float): gap between PFC pause storms - data_flow_prefix (str): prefix of names of data flows - data_flow_dur_sec (int): duration of all the data flows + data_flow_prefix_list (list): list of prefixes of names of data flows + data_flow_delay_sec_list (list): list of data flow start delays in second + data_flow_dur_sec_list (list): list of durations of all the data flows data_pkt_size (int): data packet size in bytes prio_list (list): priorities to generate PFC storms and data traffic prio_dscp_map (dict): Priority vs. DSCP map (key = priority). @@ -157,32 +164,38 @@ def __gen_traffic(testbed_config, tx_port_name = testbed_config.ports[tx_port_id].name rx_port_name = testbed_config.ports[rx_port_id].name - for prio in prio_list: - data_flow = testbed_config.flows.flow( - name='{} Prio {}'.format(data_flow_prefix, prio))[-1] + """ For each data flow """ + for i in range(len(data_flow_prefix_list)): + + """ For each priority """ + for prio in prio_list: + data_flow = testbed_config.flows.flow( + name='{} Prio {}'.format(data_flow_prefix_list[i], prio))[-1] - data_flow.tx_rx.port.tx_name = tx_port_name - data_flow.tx_rx.port.rx_name = rx_port_name + data_flow.tx_rx.port.tx_name = tx_port_name + data_flow.tx_rx.port.rx_name = rx_port_name - eth, ipv4 = data_flow.packet.ethernet().ipv4() - eth.src.value = tx_mac - eth.dst.value = rx_mac - eth.pfc_queue.value = prio + eth, ipv4 = data_flow.packet.ethernet().ipv4() + eth.src.value = tx_mac + eth.dst.value = rx_mac + eth.pfc_queue.value = prio - ipv4.src.value = tx_port_config.ip - ipv4.dst.value = rx_port_config.ip - ipv4.priority.choice = ipv4.priority.DSCP - ipv4.priority.dscp.phb.values = prio_dscp_map[prio] - ipv4.priority.dscp.ecn.value = ( - ipv4.priority.dscp.ecn.CAPABLE_TRANSPORT_1) + ipv4.src.value = tx_port_config.ip + ipv4.dst.value = rx_port_config.ip + ipv4.priority.choice = ipv4.priority.DSCP + ipv4.priority.dscp.phb.values = prio_dscp_map[prio] + ipv4.priority.dscp.ecn.value = ( + ipv4.priority.dscp.ecn.CAPABLE_TRANSPORT_1) - data_flow.size.fixed = data_pkt_size - data_flow.rate.percentage = data_flow_rate_percent - data_flow.duration.fixed_seconds.seconds = data_flow_dur_sec - data_flow.duration.fixed_seconds.delay.nanoseconds = 0 + data_flow.size.fixed = data_pkt_size + data_flow.rate.percentage = data_flow_rate_percent + data_flow.duration.fixed_seconds.seconds = ( + data_flow_dur_sec_list[i]) + data_flow.duration.fixed_seconds.delay.nanoseconds = int( + sec_to_nanosec(data_flow_delay_sec_list[i])) - data_flow.metrics.enable = True - data_flow.metrics.loss = True + data_flow.metrics.enable = True + data_flow.metrics.loss = True """ Generate a series of PFC storms """ speed_str = testbed_config.layer1[0].speed @@ -220,7 +233,7 @@ def __gen_traffic(testbed_config, pause_pkt.pause_class_6.value = pause_time[6] pause_pkt.pause_class_7.value = pause_time[7] - pause_flow_start_time = id * (pause_flow_dur_sec + pause_flow_gap_sec) + pause_flow_start_time = id * (pause_flow_dur_sec + pause_flow_gap_sec) + WARM_UP_TRAFFIC_DUR pause_flow.rate.pps = pause_pps pause_flow.size.fixed = 64 @@ -248,7 +261,7 @@ def __run_traffic(api, config, all_flow_names, exp_dur_sec): api.set_config(config) logger.info('Wait for Arp to Resolve ...') - wait_for_arp(api, max_attempts=10, poll_interval_sec=2) + wait_for_arp(api, max_attempts=30, poll_interval_sec=2) logger.info('Starting transmit on all flows ...') ts = api.transmit_state() diff --git a/tests/snappi/pfcwd/files/pfcwd_multi_node_helper.py b/tests/snappi_tests/pfcwd/files/pfcwd_multi_node_helper.py similarity index 83% rename from tests/snappi/pfcwd/files/pfcwd_multi_node_helper.py rename to tests/snappi_tests/pfcwd/files/pfcwd_multi_node_helper.py index a33b007e9dd..5b6cd537968 100644 --- a/tests/snappi/pfcwd/files/pfcwd_multi_node_helper.py +++ b/tests/snappi_tests/pfcwd/files/pfcwd_multi_node_helper.py @@ -5,19 +5,21 @@ from tests.common.helpers.assertions import pytest_assert, pytest_require from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ fanout_graph_facts -from tests.common.snappi.snappi_helpers import get_dut_port_id -from tests.common.snappi.common_helpers import pfc_class_enable_vector,\ - start_pfcwd, enable_packet_aging, get_pfcwd_poll_interval, get_pfcwd_detect_time -from tests.common.snappi.port import select_ports -from tests.common.snappi.snappi_helpers import wait_for_arp +from tests.common.snappi_tests.snappi_helpers import get_dut_port_id +from tests.common.snappi_tests.common_helpers import pfc_class_enable_vector,\ + start_pfcwd, enable_packet_aging, get_pfcwd_poll_interval, get_pfcwd_detect_time, sec_to_nanosec +from tests.common.snappi_tests.port import select_ports +from tests.common.snappi_tests.snappi_helpers import wait_for_arp logger = logging.getLogger(__name__) PAUSE_FLOW_NAME = 'Pause Storm' +WARM_UP_TRAFFIC_NAME = "Warm Up Traffic" TEST_FLOW_NAME = 'Test Flow' TEST_FLOW_AGGR_RATE_PERCENT = 45 BG_FLOW_NAME = 'Background Flow' BG_FLOW_AGGR_RATE_PERCENT = 45 +WARM_UP_TRAFFIC_DUR = 1 DATA_PKT_SIZE = 1024 SNAPPI_POLL_DELAY_SEC = 2 TOLERANCE_THRESHOLD = 0.05 @@ -125,6 +127,10 @@ def run_pfcwd_multi_node_test(api, speed_str = testbed_config.layer1[0].speed speed_gbps = int(speed_str.split('_')[1]) + """ Retrieve ASIC information for DUT """ + asic_type = duthost.facts['asic_type'] + rx_tx_tol_thrhlds = [0.0001, 0.0002] # Maintain a 0.01% and 0.02% deviation between tx and rx frames + __verify_results(rows=flow_stats, speed_gbps=speed_gbps, pause_flow_name=PAUSE_FLOW_NAME, @@ -136,7 +142,9 @@ def run_pfcwd_multi_node_test(api, data_pkt_size=DATA_PKT_SIZE, trigger_pfcwd=trigger_pfcwd, pause_port_id=port_id, - tolerance=TOLERANCE_THRESHOLD) + rx_deviation=TOLERANCE_THRESHOLD, + rx_tx_deviations=rx_tx_tol_thrhlds, + asic_type=asic_type) def __data_flow_name(name_prefix, src_id, dst_id, prio): @@ -228,6 +236,30 @@ def __gen_traffic(testbed_config, N/A """ + tx_port_id_list, rx_port_id_list = select_ports(port_config_list=port_config_list, + pattern=traffic_pattern, + rx_port_id=port_id) + + """ Warm up traffic is initially sent before any other traffic to prevent pfcwd + fake alerts caused by idle links (non-incremented packet counters) during pfcwd detection periods """ + warm_up_traffic_dur_sec = WARM_UP_TRAFFIC_DUR + warm_up_traffic_delay_sec = 0 + warm_up_traffic_prio_list = test_flow_prio_list + warm_up_traffic_rate_percent = test_flow_rate_percent + + """ Generate warm-up traffic """ + __gen_data_flows(testbed_config=testbed_config, + port_config_list=port_config_list, + src_port_id_list=tx_port_id_list, + dst_port_id_list=rx_port_id_list, + flow_name_prefix=WARM_UP_TRAFFIC_NAME, + flow_prio_list=warm_up_traffic_prio_list, + flow_rate_percent=warm_up_traffic_rate_percent, + flow_dur_sec=warm_up_traffic_dur_sec, + flow_delay_sec=warm_up_traffic_delay_sec, + data_pkt_size=data_pkt_size, + prio_dscp_map=prio_dscp_map) + """ Generate a PFC pause storm """ pause_port_id = port_id __gen_pause_flow(testbed_config=testbed_config, @@ -235,12 +267,10 @@ def __gen_traffic(testbed_config, src_port_id=pause_port_id, flow_name=pause_flow_name, pause_prio_list=pause_prio_list, - flow_dur_sec=pfc_storm_dur_sec) - - tx_port_id_list, rx_port_id_list = select_ports(port_config_list=port_config_list, - pattern=traffic_pattern, - rx_port_id=port_id) + flow_dur_sec=pfc_storm_dur_sec, + flow_delay_sec=WARM_UP_TRAFFIC_DUR) + """ Generate test flow traffic """ __gen_data_flows(testbed_config=testbed_config, port_config_list=port_config_list, src_port_id_list=tx_port_id_list, @@ -249,9 +279,11 @@ def __gen_traffic(testbed_config, flow_prio_list=test_flow_prio_list, flow_rate_percent=test_flow_rate_percent, flow_dur_sec=data_flow_dur_sec, + flow_delay_sec=WARM_UP_TRAFFIC_DUR, data_pkt_size=data_pkt_size, prio_dscp_map=prio_dscp_map) + """ Generate background flow traffic """ __gen_data_flows(testbed_config=testbed_config, port_config_list=port_config_list, src_port_id_list=tx_port_id_list, @@ -260,6 +292,7 @@ def __gen_traffic(testbed_config, flow_prio_list=bg_flow_prio_list, flow_rate_percent=bg_flow_rate_percent, flow_dur_sec=data_flow_dur_sec, + flow_delay_sec=WARM_UP_TRAFFIC_DUR, data_pkt_size=data_pkt_size, prio_dscp_map=prio_dscp_map) @@ -272,6 +305,7 @@ def __gen_data_flows(testbed_config, flow_prio_list, flow_rate_percent, flow_dur_sec, + flow_delay_sec, data_pkt_size, prio_dscp_map): """ @@ -286,6 +320,7 @@ def __gen_data_flows(testbed_config, flow_prio_list (list): priorities of data flows flow_rate_percent (int): rate percentage for each flow flow_dur_sec (int): duration of each flow in second + flow_delay_sec (int): delay before starting all flows in second data_pkt_size (int): packet size of data flows in byte prio_dscp_map (dict): Priority vs. DSCP map (key = priority). @@ -307,6 +342,7 @@ def __gen_data_flows(testbed_config, flow_prio=prio, flow_rate_percent=flow_rate_percent, flow_dur_sec=flow_dur_sec, + flow_delay_sec=flow_delay_sec, data_pkt_size=data_pkt_size, prio_dscp_map=prio_dscp_map) @@ -319,6 +355,7 @@ def __gen_data_flow(testbed_config, flow_prio, flow_rate_percent, flow_dur_sec, + flow_delay_sec, data_pkt_size, prio_dscp_map): """ @@ -333,6 +370,7 @@ def __gen_data_flow(testbed_config, flow_prio_list (list): priorities of the flow flow_rate_percent (int): rate percentage for the flow flow_dur_sec (int): duration of the flow in second + flow_delay_sec (int): delay before starting flow in second data_pkt_size (int): packet size of the flow in byte prio_dscp_map (dict): Priority vs. DSCP map (key = priority). @@ -375,6 +413,7 @@ def __gen_data_flow(testbed_config, flow.size.fixed = data_pkt_size flow.rate.percentage = flow_rate_percent flow.duration.fixed_seconds.seconds = flow_dur_sec + flow.duration.fixed_seconds.delay.nanoseconds = int(sec_to_nanosec(flow_delay_sec)) flow.metrics.enable = True flow.metrics.loss = True @@ -385,7 +424,8 @@ def __gen_pause_flow(testbed_config, src_port_id, flow_name, pause_prio_list, - flow_dur_sec): + flow_dur_sec, + flow_delay_sec): """ Generate the configuration for a PFC pause storm @@ -396,6 +436,7 @@ def __gen_pause_flow(testbed_config, flow_name (str): flow' name pause_prio_list (list): priorities to pause for PFC frames flow_dur_sec (float): duration of the flow in second + flow_delay_sec (int): delay before starting pause flow in second Returns: N/A @@ -441,7 +482,7 @@ def __gen_pause_flow(testbed_config, pause_flow.rate.pps = pps pause_flow.size.fixed = 64 pause_flow.duration.fixed_packets.packets = int(pkt_cnt) - pause_flow.duration.fixed_packets.delay.nanoseconds = 0 + pause_flow.duration.fixed_packets.delay.nanoseconds = int(sec_to_nanosec(flow_delay_sec)) pause_flow.metrics.enable = True pause_flow.metrics.loss = True @@ -463,7 +504,7 @@ def __run_traffic(api, config, all_flow_names, exp_dur_sec): api.set_config(config) logger.info('Wait for Arp to Resolve ...') - wait_for_arp(api, max_attempts=10, poll_interval_sec=2) + wait_for_arp(api, max_attempts=30, poll_interval_sec=2) logger.info('Starting transmit on all flows ...') ts = api.transmit_state() @@ -517,7 +558,9 @@ def __verify_results(rows, data_pkt_size, trigger_pfcwd, pause_port_id, - tolerance): + rx_deviation, + rx_tx_deviations, + asic_type): """ Verify if we get expected experiment results @@ -533,11 +576,17 @@ def __verify_results(rows, test_flow_pause (bool): if test flows are expected to be paused trigger_pfcwd (bool): if PFC watchdog is expected to be triggered pause_port_id (int): ID of the port to send PFC pause frames - tolerance (float): maximum allowable deviation + rx_deviation (float): maximum allowable deviation for rx_frames relative to theoretical value + rx_tx_deviations (list of floats): maximum allowable % deviation for rx_frames relative to tx_frames + asic_type (str): asic_type information for DUT Returns: N/A """ + + """ Check for whether DUT is a Mellanox device """ + is_mlnx_device = True if "mellanox" in asic_type.lower() else False + for row in rows: flow_name = row.name tx_frames = row.frames_tx @@ -556,8 +605,8 @@ def __verify_results(rows, exp_bg_flow_rx_pkts = bg_flow_rate_percent / 100.0 * speed_gbps \ * 1e9 * data_flow_dur_sec / 8.0 / data_pkt_size deviation = (rx_frames - exp_bg_flow_rx_pkts) / float(exp_bg_flow_rx_pkts) - pytest_assert(abs(deviation) < tolerance, - '{} should receive {} packets (actual {})'.\ + pytest_assert(abs(deviation) < rx_deviation, + '{} should receive {} packets (actual {})'. format(flow_name, exp_bg_flow_rx_pkts, rx_frames)) elif test_flow_name in flow_name: @@ -568,12 +617,17 @@ def __verify_results(rows, exp_test_flow_rx_pkts = test_flow_rate_percent / 100.0 * speed_gbps \ * 1e9 * data_flow_dur_sec / 8.0 / data_pkt_size - if trigger_pfcwd and\ - (src_port_id == pause_port_id or dst_port_id == pause_port_id): + if trigger_pfcwd and dst_port_id == pause_port_id: """ Once PFC watchdog is triggered, it will impact bi-directional traffic """ pytest_assert(tx_frames > rx_frames, '{} should have dropped packets'.format(flow_name)) + elif trigger_pfcwd and src_port_id == pause_port_id: + if is_mlnx_device: + """ During a pfc storm with pfcwd triggered, Mellanox devices do not drop Rx packets """ + pytest_assert(tx_frames == rx_frames, + '{} should not have dropped packets for Mellanox device'.format(flow_name)) + elif not trigger_pfcwd and dst_port_id == pause_port_id: """ This test flow is delayed by PFC storm """ pytest_assert(tx_frames == rx_frames, @@ -583,11 +637,12 @@ def __verify_results(rows, format(flow_name, exp_test_flow_rx_pkts, rx_frames)) else: - """ Otherwise, the test flow is not impacted by PFC storm """ - pytest_assert(tx_frames == rx_frames, - '{} should not have any dropped packet'.format(flow_name)) + for dev_pct in rx_tx_deviations: + """ Otherwise, the test flow is not impacted by PFC storm """ + pytest_assert(abs(tx_frames - rx_frames)/float(tx_frames) < dev_pct, + '{} should be within {} percent deviation'.format(flow_name, dev_pct*100)) deviation = (rx_frames - exp_test_flow_rx_pkts) / float(exp_test_flow_rx_pkts) - pytest_assert(abs(deviation) < tolerance, - '{} should receive {} packets (actual {})'.\ + pytest_assert(abs(deviation) < rx_deviation, + '{} should receive {} packets (actual {})'. format(flow_name, exp_test_flow_rx_pkts, rx_frames)) diff --git a/tests/snappi/pfcwd/files/pfcwd_runtime_traffic_helper.py b/tests/snappi_tests/pfcwd/files/pfcwd_runtime_traffic_helper.py similarity index 73% rename from tests/snappi/pfcwd/files/pfcwd_runtime_traffic_helper.py rename to tests/snappi_tests/pfcwd/files/pfcwd_runtime_traffic_helper.py index fa9a41e9134..609fe79b380 100644 --- a/tests/snappi/pfcwd/files/pfcwd_runtime_traffic_helper.py +++ b/tests/snappi_tests/pfcwd/files/pfcwd_runtime_traffic_helper.py @@ -2,15 +2,17 @@ import logging from tests.common.helpers.assertions import pytest_assert -from tests.common.snappi.snappi_helpers import get_dut_port_id -from tests.common.snappi.common_helpers import start_pfcwd, stop_pfcwd -from tests.common.snappi.port import select_ports, select_tx_port -from tests.common.snappi.snappi_helpers import wait_for_arp +from tests.common.snappi_tests.snappi_helpers import get_dut_port_id +from tests.common.snappi_tests.common_helpers import start_pfcwd, stop_pfcwd, sec_to_nanosec +from tests.common.snappi_tests.port import select_ports, select_tx_port +from tests.common.snappi_tests.snappi_helpers import wait_for_arp DATA_FLOW_NAME = "Data Flow" +WARM_UP_TRAFFIC_NAME = "Warm Up Traffic" DATA_PKT_SIZE = 1024 DATA_FLOW_DURATION_SEC = 15 -PFCWD_START_DELAY_SEC = 3 +WARM_UP_TRAFFIC_DUR = 1 +PFCWD_START_DELAY_SEC = 3 + WARM_UP_TRAFFIC_DUR SNAPPI_POLL_DELAY_SEC = 2 TOLERANCE_THRESHOLD = 0.05 @@ -55,11 +57,17 @@ def run_pfcwd_runtime_traffic_test(api, pytest_assert(port_id is not None, 'Fail to get ID for port {}'.format(dut_port)) + """ Warm up traffic is initially sent before any other traffic to prevent pfcwd + fake alerts caused by idle links (non-incremented packet counters) during pfcwd detection periods """ + warm_up_traffic_dur_sec = WARM_UP_TRAFFIC_DUR + warm_up_traffic_delay_sec = 0 + __gen_traffic(testbed_config=testbed_config, port_config_list=port_config_list, port_id=port_id, - data_flow_name=DATA_FLOW_NAME, - data_flow_dur_sec=DATA_FLOW_DURATION_SEC, + data_flow_name_list=[WARM_UP_TRAFFIC_NAME, DATA_FLOW_NAME], + data_flow_delay_sec_list=[warm_up_traffic_delay_sec, WARM_UP_TRAFFIC_DUR], + data_flow_dur_sec_list=[warm_up_traffic_dur_sec, DATA_FLOW_DURATION_SEC], data_pkt_size=DATA_PKT_SIZE, prio_list=prio_list, prio_dscp_map=prio_dscp_map) @@ -78,7 +86,9 @@ def run_pfcwd_runtime_traffic_test(api, speed_str = testbed_config.layer1[0].speed speed_gbps = int(speed_str.split('_')[1]) - __verify_results(rows=flow_stats, + data_flows = [flow_stat for flow_stat in flow_stats if DATA_FLOW_NAME in flow_stat.name] + + __verify_results(rows=data_flows, speed_gbps=speed_gbps, data_flow_dur_sec=DATA_FLOW_DURATION_SEC, data_pkt_size=DATA_PKT_SIZE, @@ -88,8 +98,9 @@ def run_pfcwd_runtime_traffic_test(api, def __gen_traffic(testbed_config, port_config_list, port_id, - data_flow_name, - data_flow_dur_sec, + data_flow_name_list, + data_flow_delay_sec_list, + data_flow_dur_sec_list, data_pkt_size, prio_list, prio_dscp_map): @@ -100,8 +111,9 @@ def __gen_traffic(testbed_config, testbed_config (obj): testbed L1/L2/L3 configuration port_config_list (list): list of port configuration port_id (int): ID of DUT port to test. - data_flow_name (str): data flow name - data_flow_dur_sec (int): duration of data flows in second + data_flow_name_list (list): list of data flow names + data_flow_delay_sec_list (list): list of data flow start delays in second + data_flow_dur_sec_list (list): list of data flow durations in second data_pkt_size (int): size of data packets in byte prio_list (list): priorities of data flows prio_dscp_map (dict): Priority vs. DSCP map (key = priority). @@ -134,32 +146,38 @@ def __gen_traffic(testbed_config, rx_port_name = testbed_config.ports[rx_port_id].name data_flow_rate_percent = int(100 / len(prio_list)) - """ For each priority """ - for prio in prio_list: - data_flow = testbed_config.flows.flow( - name='{} Prio {}'.format(data_flow_name, prio))[-1] + """ For each data flow """ + for i in range(len(data_flow_name_list)): + + """ For each priority """ + for prio in prio_list: + data_flow = testbed_config.flows.flow( + name='{} Prio {}'.format(data_flow_name_list[i], prio))[-1] - data_flow.tx_rx.port.tx_name = tx_port_name - data_flow.tx_rx.port.rx_name = rx_port_name + data_flow.tx_rx.port.tx_name = tx_port_name + data_flow.tx_rx.port.rx_name = rx_port_name - eth, ipv4 = data_flow.packet.ethernet().ipv4() - eth.src.value = tx_mac - eth.dst.value = rx_mac - eth.pfc_queue.value = prio + eth, ipv4 = data_flow.packet.ethernet().ipv4() + eth.src.value = tx_mac + eth.dst.value = rx_mac + eth.pfc_queue.value = prio - ipv4.src.value = tx_port_config.ip - ipv4.dst.value = rx_port_config.ip - ipv4.priority.choice = ipv4.priority.DSCP - ipv4.priority.dscp.phb.values = prio_dscp_map[prio] - ipv4.priority.dscp.ecn.value = ( - ipv4.priority.dscp.ecn.CAPABLE_TRANSPORT_1) + ipv4.src.value = tx_port_config.ip + ipv4.dst.value = rx_port_config.ip + ipv4.priority.choice = ipv4.priority.DSCP + ipv4.priority.dscp.phb.values = prio_dscp_map[prio] + ipv4.priority.dscp.ecn.value = ( + ipv4.priority.dscp.ecn.CAPABLE_TRANSPORT_1) - data_flow.size.fixed = data_pkt_size - data_flow.rate.percentage = data_flow_rate_percent - data_flow.duration.fixed_seconds.seconds = data_flow_dur_sec + data_flow.size.fixed = data_pkt_size + data_flow.rate.percentage = data_flow_rate_percent + data_flow.duration.fixed_seconds.seconds = ( + data_flow_dur_sec_list[i]) + data_flow.duration.fixed_seconds.delay.nanoseconds = int( + sec_to_nanosec(data_flow_delay_sec_list[i])) - data_flow.metrics.enable = True - data_flow.metrics.loss = True + data_flow.metrics.enable = True + data_flow.metrics.loss = True def __run_traffic(api, config, duthost, all_flow_names, pfcwd_start_delay_sec, exp_dur_sec): @@ -180,7 +198,7 @@ def __run_traffic(api, config, duthost, all_flow_names, pfcwd_start_delay_sec, e api.set_config(config) logger.info('Wait for Arp to Resolve ...') - wait_for_arp(api, max_attempts=10, poll_interval_sec=2) + wait_for_arp(api, max_attempts=30, poll_interval_sec=2) logger.info('Starting transmit on all flows ...') ts = api.transmit_state() diff --git a/tests/snappi/pfcwd/test_pfcwd_a2a_with_snappi.py b/tests/snappi_tests/pfcwd/test_pfcwd_a2a_with_snappi.py similarity index 87% rename from tests/snappi/pfcwd/test_pfcwd_a2a_with_snappi.py rename to tests/snappi_tests/pfcwd/test_pfcwd_a2a_with_snappi.py index d692d052c33..b2b042808e4 100644 --- a/tests/snappi/pfcwd/test_pfcwd_a2a_with_snappi.py +++ b/tests/snappi_tests/pfcwd/test_pfcwd_a2a_with_snappi.py @@ -3,15 +3,13 @@ from tests.common.helpers.assertions import pytest_require, pytest_assert from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ fanout_graph_facts -from tests.common.snappi.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ +from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ snappi_api, snappi_testbed_config -from tests.common.snappi.qos_fixtures import prio_dscp_map, all_prio_list,\ +from tests.common.snappi_tests.qos_fixtures import prio_dscp_map, all_prio_list,\ lossless_prio_list, lossy_prio_list +from tests.snappi_tests.pfcwd.files.pfcwd_multi_node_helper import run_pfcwd_multi_node_test -from files.pfcwd_multi_node_helper import run_pfcwd_multi_node_test -from files.helper import skip_pfcwd_test - -pytestmark = [ pytest.mark.topology('snappi') ] +pytestmark = [ pytest.mark.topology('tgen') ] @pytest.mark.parametrize("trigger_pfcwd", [True, False]) def test_pfcwd_all_to_all(snappi_api, @@ -51,7 +49,6 @@ def test_pfcwd_all_to_all(snappi_api, "Priority and port are not mapped to the expected DUT") duthost = duthosts[rand_one_dut_hostname] - skip_pfcwd_test(duthost=duthost, trigger_pfcwd=trigger_pfcwd) testbed_config, port_config_list = snappi_testbed_config lossless_prio = int(lossless_prio) diff --git a/tests/snappi/pfcwd/test_pfcwd_basic_with_snappi.py b/tests/snappi_tests/pfcwd/test_pfcwd_basic_with_snappi.py similarity index 96% rename from tests/snappi/pfcwd/test_pfcwd_basic_with_snappi.py rename to tests/snappi_tests/pfcwd/test_pfcwd_basic_with_snappi.py index f265e8a0bd4..62e5a69ae9b 100644 --- a/tests/snappi/pfcwd/test_pfcwd_basic_with_snappi.py +++ b/tests/snappi_tests/pfcwd/test_pfcwd_basic_with_snappi.py @@ -1,19 +1,21 @@ +import logging import pytest from tests.common.helpers.assertions import pytest_require, pytest_assert from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ fanout_graph_facts -from tests.common.snappi.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ +from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ snappi_api, snappi_testbed_config -from tests.common.snappi.qos_fixtures import prio_dscp_map, lossless_prio_list +from tests.common.snappi_tests.qos_fixtures import prio_dscp_map, lossless_prio_list +from tests.common.config_reload import config_reload from tests.common.reboot import reboot from tests.common.utilities import wait_until -from files.pfcwd_basic_helper import run_pfcwd_basic_test -from files.helper import skip_pfcwd_test +from tests.snappi_tests.pfcwd.files.pfcwd_basic_helper import run_pfcwd_basic_test +from tests.snappi_tests.files.helper import skip_warm_reboot logger = logging.getLogger(__name__) -pytestmark = [ pytest.mark.topology('snappi') ] +pytestmark = [ pytest.mark.topology('tgen') ] @pytest.mark.parametrize("trigger_pfcwd", [True, False]) def test_pfcwd_basic_single_lossless_prio(snappi_api, @@ -50,7 +52,6 @@ def test_pfcwd_basic_single_lossless_prio(snappi_api, "Priority and port are not mapped to the expected DUT") duthost = duthosts[rand_one_dut_hostname] - skip_pfcwd_test(duthost=duthost, trigger_pfcwd=trigger_pfcwd) testbed_config, port_config_list = snappi_testbed_config lossless_prio = int(lossless_prio) @@ -101,7 +102,6 @@ def test_pfcwd_basic_multi_lossless_prio(snappi_api, "Port is not mapped to the expected DUT") duthost = duthosts[rand_one_dut_hostname] - skip_pfcwd_test(duthost=duthost, trigger_pfcwd=trigger_pfcwd) testbed_config, port_config_list = snappi_testbed_config @@ -157,7 +157,7 @@ def test_pfcwd_basic_single_lossless_prio_reboot(snappi_api, "Priority and port are not mapped to the expected DUT") duthost = duthosts[rand_one_dut_hostname] - skip_pfcwd_test(duthost=duthost, trigger_pfcwd=trigger_pfcwd) + skip_warm_reboot(duthost, reboot_type) testbed_config, port_config_list = snappi_testbed_config lossless_prio = int(lossless_prio) @@ -220,7 +220,7 @@ def test_pfcwd_basic_multi_lossless_prio_reboot(snappi_api, "Port is not mapped to the expected DUT") duthost = duthosts[rand_one_dut_hostname] - skip_pfcwd_test(duthost=duthost, trigger_pfcwd=trigger_pfcwd) + skip_warm_reboot(duthost, reboot_type) testbed_config, port_config_list = snappi_testbed_config @@ -280,7 +280,6 @@ def test_pfcwd_basic_single_lossless_prio_service_restart(snappi_api, "Priority and port are not mapped to the expected DUT") duthost = duthosts[rand_one_dut_hostname] - skip_pfcwd_test(duthost=duthost, trigger_pfcwd=trigger_pfcwd) testbed_config, port_config_list = snappi_testbed_config lossless_prio = int(lossless_prio) @@ -303,6 +302,8 @@ def test_pfcwd_basic_single_lossless_prio_service_restart(snappi_api, prio_dscp_map=prio_dscp_map, trigger_pfcwd=trigger_pfcwd) + config_reload(duthost=duthost, config_source='minigraph', safe_reload=True) + @pytest.mark.disable_loganalyzer @pytest.mark.parametrize('restart_service', ['swss']) @@ -342,7 +343,6 @@ def test_pfcwd_basic_multi_lossless_prio_restart_service(snappi_api, "Port is not mapped to the expected DUT") duthost = duthosts[rand_one_dut_hostname] - skip_pfcwd_test(duthost=duthost, trigger_pfcwd=trigger_pfcwd) testbed_config, port_config_list = snappi_testbed_config @@ -363,3 +363,5 @@ def test_pfcwd_basic_multi_lossless_prio_restart_service(snappi_api, prio_list=lossless_prio_list, prio_dscp_map=prio_dscp_map, trigger_pfcwd=trigger_pfcwd) + + config_reload(duthost=duthost, config_source='minigraph', safe_reload=True) diff --git a/tests/snappi/pfcwd/test_pfcwd_burst_storm_with_snappi.py b/tests/snappi_tests/pfcwd/test_pfcwd_burst_storm_with_snappi.py similarity index 89% rename from tests/snappi/pfcwd/test_pfcwd_burst_storm_with_snappi.py rename to tests/snappi_tests/pfcwd/test_pfcwd_burst_storm_with_snappi.py index 3ef9a160d12..f22e566c1ce 100644 --- a/tests/snappi/pfcwd/test_pfcwd_burst_storm_with_snappi.py +++ b/tests/snappi_tests/pfcwd/test_pfcwd_burst_storm_with_snappi.py @@ -4,14 +4,14 @@ from tests.common.helpers.assertions import pytest_require from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ fanout_graph_facts -from tests.common.snappi.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ +from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ snappi_api, snappi_testbed_config -from tests.common.snappi.qos_fixtures import prio_dscp_map -from files.pfcwd_burst_storm_helper import run_pfcwd_burst_storm_test +from tests.common.snappi_tests.qos_fixtures import prio_dscp_map +from tests.snappi_tests.pfcwd.files.pfcwd_burst_storm_helper import run_pfcwd_burst_storm_test logger = logging.getLogger(__name__) -pytestmark = [ pytest.mark.topology('snappi') ] +pytestmark = [ pytest.mark.topology('tgen') ] def test_pfcwd_burst_storm_single_lossless_prio(snappi_api, snappi_testbed_config, diff --git a/tests/snappi/pfcwd/test_pfcwd_m2o_with_snappi.py b/tests/snappi_tests/pfcwd/test_pfcwd_m2o_with_snappi.py similarity index 84% rename from tests/snappi/pfcwd/test_pfcwd_m2o_with_snappi.py rename to tests/snappi_tests/pfcwd/test_pfcwd_m2o_with_snappi.py index 95f7912a197..1b1ca9d254a 100644 --- a/tests/snappi/pfcwd/test_pfcwd_m2o_with_snappi.py +++ b/tests/snappi_tests/pfcwd/test_pfcwd_m2o_with_snappi.py @@ -3,18 +3,16 @@ from tests.common.helpers.assertions import pytest_require, pytest_assert from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ fanout_graph_facts -from tests.common.snappi.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ +from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ snappi_api, snappi_testbed_config -from tests.common.snappi.qos_fixtures import prio_dscp_map, all_prio_list,\ +from tests.common.snappi_tests.qos_fixtures import prio_dscp_map, all_prio_list,\ lossless_prio_list, lossy_prio_list +from tests.snappi_tests.pfcwd.files.pfcwd_multi_node_helper import run_pfcwd_multi_node_test -from files.pfcwd_multi_node_helper import run_pfcwd_multi_node_test -from files.helper import skip_pfcwd_test +pytestmark = [ pytest.mark.topology('tgen') ] -pytestmark = [ pytest.mark.topology('snappi') ] -# @pytest.mark.parametrize("trigger_pfcwd", [True, False]) -@pytest.mark.parametrize("trigger_pfcwd", [True]) +@pytest.mark.parametrize("trigger_pfcwd", [True, False]) def test_pfcwd_many_to_one(snappi_api, snappi_testbed_config, conn_graph_facts, @@ -52,7 +50,6 @@ def test_pfcwd_many_to_one(snappi_api, "Priority and port are not mapped to the expected DUT") duthost = duthosts[rand_one_dut_hostname] - skip_pfcwd_test(duthost=duthost, trigger_pfcwd=trigger_pfcwd) testbed_config, port_config_list = snappi_testbed_config lossless_prio = int(lossless_prio) diff --git a/tests/snappi/pfcwd/test_pfcwd_runtime_traffic_with_snappi.py b/tests/snappi_tests/pfcwd/test_pfcwd_runtime_traffic_with_snappi.py similarity index 87% rename from tests/snappi/pfcwd/test_pfcwd_runtime_traffic_with_snappi.py rename to tests/snappi_tests/pfcwd/test_pfcwd_runtime_traffic_with_snappi.py index baf9d919c7b..46386c45d76 100644 --- a/tests/snappi/pfcwd/test_pfcwd_runtime_traffic_with_snappi.py +++ b/tests/snappi_tests/pfcwd/test_pfcwd_runtime_traffic_with_snappi.py @@ -3,13 +3,13 @@ from tests.common.helpers.assertions import pytest_require, pytest_assert from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ fanout_graph_facts -from tests.common.snappi.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ +from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ snappi_api, snappi_testbed_config -from tests.common.snappi.qos_fixtures import prio_dscp_map, all_prio_list +from tests.common.snappi_tests.qos_fixtures import prio_dscp_map, all_prio_list -from files.pfcwd_runtime_traffic_helper import run_pfcwd_runtime_traffic_test +from tests.snappi_tests.pfcwd.files.pfcwd_runtime_traffic_helper import run_pfcwd_runtime_traffic_test -pytestmark = [ pytest.mark.topology('snappi') ] +pytestmark = [ pytest.mark.topology('tgen') ] def test_pfcwd_runtime_traffic(snappi_api, snappi_testbed_config, diff --git a/tests/snappi_tests/qos/__init__.py b/tests/snappi_tests/qos/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/snappi_tests/qos/files/__init__.py b/tests/snappi_tests/qos/files/__init__.py new file mode 100644 index 00000000000..0f18bfccfca --- /dev/null +++ b/tests/snappi_tests/qos/files/__init__.py @@ -0,0 +1 @@ +# Local library for QoS Snappi tests. diff --git a/tests/snappi_tests/qos/files/packet_reorder_helper.py b/tests/snappi_tests/qos/files/packet_reorder_helper.py new file mode 100644 index 00000000000..7e04c7a53c5 --- /dev/null +++ b/tests/snappi_tests/qos/files/packet_reorder_helper.py @@ -0,0 +1,314 @@ +import time +import logging + +from tests.common.helpers.assertions import pytest_assert +from tests.common.snappi_tests.snappi_helpers import get_dut_port_id +from tests.common.snappi_tests.port import select_ports, select_tx_port +from tests.common.snappi_tests.snappi_helpers import wait_for_arp + +logger = logging.getLogger(__name__) + +FLOW_NAME = 'IP-IP Test Flow' +TX_PORT_NAME = None +RX_PORT_NAME = None +FLOW_AGGR_RATE_PERCENT = 70 +LARGER_PKT_SIZE = 512 +SMALLER_PKT_SIZE = 256 +PKT_STEP_SIZE = LARGER_PKT_SIZE - SMALLER_PKT_SIZE +UDP_SRC_PORT = 63 +UDP_DST_PORT = 63 +UDP_PKT_LEN = 190 +TOTAL_NUM_PKTS = 100000 +EXP_FLOW_DUR_SEC = 3 +SNAPPI_POLL_DELAY_SEC = 2 +INNER_PKT_SRC_IP = "20.0.20.0" +INNER_PKT_DST_IP = "21.0.20.0" +SEQUENCE_CHECKING_THRESHOLD = 1 + + +def run_ipip_packet_reorder_test(api, + testbed_config, + port_config_list, + conn_data, + fanout_data, + duthost, + dut_port, + flow_prio_list, + prio_dscp_map): + """ + Run a IP-IP Packet Reorder Test + Args: + api (obj): snappi session + testbed_config (obj): testbed L1/L2/L3 configuration + port_config_list (list): list of port configuration + conn_data (dict): the dictionary returned by conn_graph_fact. + fanout_data (dict): the dictionary returned by fanout_graph_fact. + duthost (Ansible host instance): device under test + dut_port (str): DUT port to test + flow_prio_list (list): priorities of flows + prio_dscp_map (dict): Priority vs. DSCP map (key = priority). + + Returns: + N/A + """ + + pytest_assert(testbed_config is not None, 'Fail to get L2/3 testbed config') + + # Get the ID of the port to test + port_id = get_dut_port_id(dut_hostname=duthost.hostname, + dut_port=dut_port, + conn_data=conn_data, + fanout_data=fanout_data) + + pytest_assert(port_id is not None, + 'Fail to get ID for port {}'.format(dut_port)) + + flow_rate_percent = int(FLOW_AGGR_RATE_PERCENT / len(flow_prio_list)) + + # Generate traffic config + __gen_traffic(testbed_config=testbed_config, + port_config_list=port_config_list, + port_id=port_id, + flow_name=FLOW_NAME, + flow_prio_list=flow_prio_list, + flow_rate_percent=flow_rate_percent, + total_tx_pkts=TOTAL_NUM_PKTS, + larger_pkt_size=LARGER_PKT_SIZE, + smaller_pkt_size=SMALLER_PKT_SIZE, + pkt_step_size=PKT_STEP_SIZE, + prio_dscp_map=prio_dscp_map) + + flows = testbed_config.flows + all_flow_names = [flow.name for flow in flows] + + # Run traffic + flow_metrics = __run_traffic(api=api, + config=testbed_config, + all_flow_names=all_flow_names, + timeout=EXP_FLOW_DUR_SEC) + + # Verify results i.e. no out of order packets + __verify_results(api=api, + flow_metrics=flow_metrics, + exp_rx_pkts=TOTAL_NUM_PKTS) + + +def __gen_traffic(testbed_config, + port_config_list, + port_id, + flow_name, + flow_prio_list, + flow_rate_percent, + total_tx_pkts, + larger_pkt_size, + smaller_pkt_size, + pkt_step_size, + prio_dscp_map): + """ + Generate configurations of flows, and device configurations on both the DUT, and ixia device which + emulates a neighbor. + Args: + testbed_config (obj): testbed L1/L2/L3 configuration + port_config_list (list): list of port configuration + port_id (int): ID of DUT port to test + flow_name (str): name of flow + flow_prio_list (list): priorities of the flow + flow_rate_percent (int): rate percentage for each flow + total_tx_pkts (int): total number of packets to transmit + larger_pkt_size (int): packet size of larger data flow in bytes + smaller_pkt_size (int): packet size of smaller data flow in bytes + pkt_step_size (int): packet size step of flow in bytes + prio_dscp_map (dict): Priority vs. DSCP map (key = priority). + Returns: + N/A + """ + + rx_port_id = port_id + tx_port_id_list, _ = select_ports(port_config_list=port_config_list, + pattern="many to one", + rx_port_id=rx_port_id) + + pytest_assert(len(tx_port_id_list) > 0, "Cannot find any TX ports") + tx_port_id = select_tx_port(tx_port_id_list=tx_port_id_list, + rx_port_id=rx_port_id) + pytest_assert(tx_port_id is not None, "Cannot find a suitable TX port") + + tx_port_config = next((port_tx for port_tx in port_config_list if port_tx.id == tx_port_id), None) + rx_port_config = next((port_rx for port_rx in port_config_list if port_rx.id == rx_port_id), None) + + # Set the correct MAC address for the switch + tx_mac = tx_port_config.mac + if tx_port_config.gateway == rx_port_config.gateway and \ + tx_port_config.prefix_len == rx_port_config.prefix_len: + # If soruce and destination port are in the same subnet, use the rx port MAC, else use the switch MAC + rx_mac = rx_port_config.mac + else: + rx_mac = tx_port_config.gateway_mac + + tx_port_name = testbed_config.ports[tx_port_id].name + rx_port_name = testbed_config.ports[rx_port_id].name + + global TX_PORT_NAME, RX_PORT_NAME + TX_PORT_NAME = tx_port_name + RX_PORT_NAME = rx_port_name + + for prio in flow_prio_list: + # Begin configuring flows + ipip_flow = testbed_config.flows.flow(name="{} Packet_Prio_{}".format(flow_name, prio))[-1] + ipip_flow.tx_rx.port.tx_name = tx_port_name + ipip_flow.tx_rx.port.rx_name = rx_port_name + eth, outer_ipv4, inner_ipv4, udp = ipip_flow.packet.ethernet().ipv4().ipv4().udp() + + # Configure ethernet header + eth.src.value = tx_mac + eth.dst.value = rx_mac + + # Configure outer IPv4 header + outer_ipv4.src.value = tx_port_config.ip + outer_ipv4.dst.value = rx_port_config.ip + outer_ipv4.identification.choice = "increment" + outer_ipv4.identification.increment.start = 1 + outer_ipv4.identification.increment.step = 1 + outer_ipv4.identification.increment.count = total_tx_pkts + outer_ipv4.priority.choice = outer_ipv4.priority.DSCP + outer_ipv4.priority.dscp.ecn.value = ( + outer_ipv4.priority.dscp.ecn.CAPABLE_TRANSPORT_1) + + # Configure inner IPv4 header + inner_ipv4.src.value = INNER_PKT_SRC_IP + inner_ipv4.dst.value = INNER_PKT_DST_IP + inner_ipv4.priority.choice = inner_ipv4.priority.DSCP + inner_ipv4.priority.dscp.ecn.value = ( + inner_ipv4.priority.dscp.ecn.CAPABLE_TRANSPORT_1) + + # Configure UDP header + udp.src_port.value = UDP_SRC_PORT + udp.dst_port.value = UDP_DST_PORT + udp.length.value = UDP_PKT_LEN + + # Configure the appropriate priorities for each header + eth.pfc_queue.value = prio + outer_ipv4.priority.dscp.phb.values = prio_dscp_map[prio] + inner_ipv4.priority.dscp.phb.values = prio_dscp_map[prio] + + # Configure packet size and other variables + ipip_flow.size.increment.start = smaller_pkt_size + ipip_flow.size.increment.end = larger_pkt_size + ipip_flow.size.increment.step = pkt_step_size + ipip_flow.rate.percentage = flow_rate_percent + ipip_flow.duration.fixed_packets.packets = total_tx_pkts + + ipip_flow.metrics.enable = True + ipip_flow.metrics.loss = True + + +def __run_traffic(api, + config, + all_flow_names, + timeout): + + """ + Run traffic and dump per-flow statistics + Args: + api (obj): snappi session + config (obj): experiment config (testbed config + flow config) + all_flow_names (list): list of names of all the flows + timeout (int): time to wait in seconds before snappi begins recovering metrics + Returns: + flow_metrics (list): list of flow metrics + """ + + api.set_config(config) + + logger.info('Wait for Arp to Resolve ...') + wait_for_arp(api, max_attempts=30, poll_interval_sec=2) + + logger.info("Setting up Ixia API session to capture advanced statistics ...") + __configure_advanced_stats(api) + + logger.info('Starting transmit on all flows ...') + ts = api.transmit_state() + ts.state = ts.START + api.set_transmit_state(ts) + + time.sleep(timeout) + + attempts = 0 + max_attempts = 20 + + while attempts < max_attempts: + request = api.metrics_request() + request.flow.flow_names = all_flow_names + rows = api.get_metrics(request).flow_metrics + + """ If all the flows have stopped """ + transmit_states = [row.transmit for row in rows] + if len(rows) == len(all_flow_names) and\ + list(set(transmit_states)) == ['stopped']: + time.sleep(SNAPPI_POLL_DELAY_SEC) + break + else: + time.sleep(1) + attempts += 1 + + pytest_assert(attempts < max_attempts, + "Flows do not stop in {} seconds".format(max_attempts)) + + """ Dump per-flow statistics """ + request = api.metrics_request() + request.flow.flow_names = all_flow_names + flow_metrics = api.get_metrics(request).flow_metrics + logger.info('Stop transmit on all flows ...') + ts = api.transmit_state() + ts.state = ts.STOP + api.set_transmit_state(ts) + + return flow_metrics + + +def __configure_advanced_stats(api): + """ + Set up advanced statistics on the Ixia API session + Args: + api (obj): snappi session + Returns: + N/A + """ + + # Connect to restpy session + restpy_session = api.assistant.Session + ixnet = restpy_session.Ixnetwork + statVarIxia = ixnet.Traffic.Statistics + statVarIxia.AdvancedSequenceChecking.Enabled = True + statVarIxia.AdvancedSequenceChecking.AdvancedSequenceThreshold = SEQUENCE_CHECKING_THRESHOLD + + +def __verify_results(api, + flow_metrics, + exp_rx_pkts): + """ + Verify if we get expected experiment results + Args: + api (obj): snappi session + flow_metrics (list): per-flow statistics + exp_rx_pkts (int): total number of packets to receive + Returns: + N/A + """ + + # Calculate total frames sent and received across all configured ports + total_tx = sum([flow_metric.frames_tx for flow_metric in flow_metrics]) + total_rx = sum([flow_metric.frames_rx for flow_metric in flow_metrics]) + + pytest_assert(total_tx == total_rx, "Number of total Tx packets = {} and Rx packets = {} are not equal." + .format(total_tx, total_rx)) + pytest_assert(total_rx == exp_rx_pkts, "Number of total Rx packets = {} are not equal to expected packets = {}" + .format(total_rx, exp_rx_pkts)) + + # Check for packet re-order + flow_stat = api.assistant.StatViewAssistant("Flow Statistics") + for stat in flow_stat.Rows: + in_order_frames = int(stat["In Order Frames"]) + reordered_frames = int(stat["Reordered Frames"]) + error_msg = "Frames are out of order. Reordered frames = {}".format(reordered_frames) + pytest_assert(in_order_frames == total_tx and reordered_frames == 0, error_msg) diff --git a/tests/snappi_tests/qos/test_ipip_packet_reorder_with_snappi.py b/tests/snappi_tests/qos/test_ipip_packet_reorder_with_snappi.py new file mode 100644 index 00000000000..1123c4dcc60 --- /dev/null +++ b/tests/snappi_tests/qos/test_ipip_packet_reorder_with_snappi.py @@ -0,0 +1,62 @@ +import logging +import pytest + +from tests.snappi_tests.qos.files.packet_reorder_helper import run_ipip_packet_reorder_test +from tests.common.helpers.assertions import pytest_require +from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ + fanout_graph_facts # noqa F401 +from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ + snappi_api, snappi_testbed_config # noqa F401 +from tests.common.snappi_tests.qos_fixtures import prio_dscp_map # noqa F401 + +logger = logging.getLogger(__name__) + +pytestmark = [pytest.mark.topology('tgen')] + + +def test_ip_in_ip_packet_reorder(snappi_api, # noqa F811 + snappi_testbed_config, # noqa F811 + conn_graph_facts, # noqa F811 + fanout_graph_facts, # noqa F811 + duthosts, + rand_one_dut_hostname, + rand_one_dut_portname_oper_up, + enum_dut_all_prio, + prio_dscp_map): # noqa F811 + """ + Validate that IPinIP RDMA packets are not being reordered on a single lossless priority + + Args: + snappi_api (pytest fixture): SNAPPI session + snappi_testbed_config (pytest fixture): testbed configuration information + conn_graph_facts (pytest fixture): connection graph + fanout_graph_facts (pytest fixture): fanout graph + duthosts (pytest fixture): list of DUTs + rand_one_dut_hostname (str): hostname of DUT + rand_one_dut_portname_oper_up (str): port to test, e.g., 's6100-1|Ethernet0' + enum_dut_all_prio (str): priority (lossy and lossless) to test, e.g., 's6100-1|3' + prio_dscp_map (pytest fixture): priority vs. DSCP map (key = priority). + + Returns: + N/A + """ + + dut_hostname, dut_port = rand_one_dut_portname_oper_up.split('|') + dut_hostname2, prio = enum_dut_all_prio.split('|') + pytest_require(rand_one_dut_hostname == dut_hostname == dut_hostname2, + "Priority and port are not mapped to the expected DUT") + + testbed_config, port_config_list = snappi_testbed_config + duthost = duthosts[rand_one_dut_hostname] + test_prio = int(prio) + flow_prio_list = [test_prio] + + run_ipip_packet_reorder_test(api=snappi_api, + testbed_config=testbed_config, + port_config_list=port_config_list, + conn_data=conn_graph_facts, + fanout_data=fanout_graph_facts, + duthost=duthost, + dut_port=dut_port, + flow_prio_list=flow_prio_list, + prio_dscp_map=prio_dscp_map) diff --git a/tests/snappi/test_snappi.py b/tests/snappi_tests/test_snappi.py similarity index 95% rename from tests/snappi/test_snappi.py rename to tests/snappi_tests/test_snappi.py index 5d71d4d2d91..f400f29b68d 100644 --- a/tests/snappi/test_snappi.py +++ b/tests/snappi_tests/test_snappi.py @@ -4,11 +4,11 @@ from tests.common.helpers.assertions import pytest_assert, pytest_require from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\ fanout_graph_facts -from tests.common.snappi.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ +from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port,\ snappi_api, snappi_testbed_config -from tests.common.snappi.snappi_helpers import wait_for_arp -from tests.common.snappi.port import select_ports -from tests.common.snappi.qos_fixtures import prio_dscp_map +from tests.common.snappi_tests.snappi_helpers import wait_for_arp +from tests.common.snappi_tests.port import select_ports +from tests.common.snappi_tests.qos_fixtures import prio_dscp_map SNAPPI_POLL_DELAY_SEC = 2 @@ -127,7 +127,7 @@ def test_snappi(snappi_api, snappi_api.set_config(config) # """Wait for Arp""" - wait_for_arp(snappi_api, max_attempts=10, poll_interval_sec=2) + wait_for_arp(snappi_api, max_attempts=30, poll_interval_sec=2) # """ Start traffic """ ts = snappi_api.transmit_state() diff --git a/tests/snmp/conftest.py b/tests/snmp/conftest.py index ec6909c81fa..be9b9fd7197 100644 --- a/tests/snmp/conftest.py +++ b/tests/snmp/conftest.py @@ -1,11 +1,19 @@ import pytest from tests.common.utilities import wait_until + @pytest.fixture(scope="module", autouse=True) def setup_check_snmp_ready(duthosts): for duthost in duthosts: assert wait_until(300, 20, 0, duthost.is_service_fully_started, "snmp"), "SNMP service is not running" + +@pytest.fixture(scope="module", autouse=True) +def enable_queue_counterpoll_type(duthosts): + for duthost in duthosts: + duthost.command('counterpoll queue enable') + + def pytest_addoption(parser): """ Adds options to pytest that are used by the snmp tests. diff --git a/tests/snmp/test_snmp_fdb.py b/tests/snmp/test_snmp_fdb.py index 2541b52473f..bbb0481759d 100644 --- a/tests/snmp/test_snmp_fdb.py +++ b/tests/snmp/test_snmp_fdb.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) pytestmark = [ - pytest.mark.topology('t0') + pytest.mark.topology('t0', 'm0') ] # Use original ports intead of sub interfaces for ptfadapter if it's t0-backend diff --git a/tests/snmp/test_snmp_memory.py b/tests/snmp/test_snmp_memory.py index ad282fa053d..3689bd745b4 100644 --- a/tests/snmp/test_snmp_memory.py +++ b/tests/snmp/test_snmp_memory.py @@ -19,11 +19,23 @@ @pytest.fixture(autouse=True, scope="module") def get_parameter(request): """ - Get optional parameter percentage or return default 4% + Get optional parameter percentage """ - global percent - percent = request.config.getoption("--percentage") or 4 - return percent + global user_input_percentage + user_input_percentage = request.config.getoption("--percentage") + return user_input_percentage + +def get_percentage_threshold(total_mem): + """ + When total memory is small, the same difference will be more + pronounced. So we should allow for more difference. + """ + if user_input_percentage: + return user_input_percentage + if total_mem > 2 * 1024 * 1024: + return 4 + else: + return 12 @pytest.fixture() def load_memory(duthosts, enum_rand_one_per_hwsku_hostname): @@ -34,7 +46,7 @@ def load_memory(duthosts, enum_rand_one_per_hwsku_hostname): duthost.copy(src='snmp/memory.py', dest='/tmp/memory.py') duthost.shell("nohup python /tmp/memory.py > /dev/null 2>&1 &") yield - duthost.shell("killall python /tmp/memory.py", module_ignore_errors=True) + duthost.shell("pkill -SIGTERM -f 'python /tmp/memory.py'", module_ignore_errors=True) def collect_memory(duthost): """ @@ -53,9 +65,12 @@ def test_snmp_memory(duthosts, enum_rand_one_per_hwsku_hostname, localhost, cred """ duthost = duthosts[enum_rand_one_per_hwsku_hostname] host_ip = duthost.host.options['inventory_manager'].get_host(duthost.hostname).vars['ansible_host'] - compare = (('ansible_sysTotalFreeMemery', 'MemFree'), ('ansible_sysTotalBuffMemory', 'Buffers'), + compare = (('ansible_sysTotalFreeMemory', 'MemFree'), ('ansible_sysTotalBuffMemory', 'Buffers'), ('ansible_sysCachedMemory', 'Cached'), ('ansible_sysTotalSharedMemory', 'Shmem')) + mem_total = collect_memory(duthost)['MemTotal'] + percentage = get_percentage_threshold(int(mem_total)) + # Checking memory attributes within a certain percentage is not guarantee to # work 100% of the time. There could always be a big memory change between the # test read from snmp and read from system. @@ -64,15 +79,15 @@ def test_snmp_memory(duthosts, enum_rand_one_per_hwsku_hostname, localhost, cred snmp_facts = get_snmp_facts(localhost, host=host_ip, version="v2c", community=creds_all_duts[duthost.hostname]["snmp_rocommunity"], wait=True)['ansible_facts'] facts = collect_memory(duthost) - # Verify correct behaviour of sysTotalMemery - pytest_assert(not abs(snmp_facts['ansible_sysTotalMemery'] - int(facts['MemTotal'])), - "Unexpected res sysTotalMemery {} v.s. {}".format(snmp_facts['ansible_sysTotalMemery'], facts['MemTotal'])) + # Verify correct behaviour of sysTotalMemory + pytest_assert(not abs(snmp_facts['ansible_sysTotalMemory'] - int(facts['MemTotal'])), + "Unexpected res sysTotalMemory {} v.s. {}".format(snmp_facts['ansible_sysTotalMemory'], facts['MemTotal'])) - # Verify correct behaviour of sysTotalFreeMemery, sysTotalBuffMemory, sysCachedMemory, sysTotalSharedMemory + # Verify correct behaviour of sysTotalFreeMemory, sysTotalBuffMemory, sysCachedMemory, sysTotalSharedMemory new_comp = set() snmp_diff = [] for snmp, sys_data in compare: - if CALC_DIFF(snmp_facts[snmp], facts[sys_data]) > percent: + if CALC_DIFF(snmp_facts[snmp], facts[sys_data]) > percentage: snmp_diff.append(snmp) new_comp.add((snmp, sys_data)) @@ -80,9 +95,9 @@ def test_snmp_memory(duthosts, enum_rand_one_per_hwsku_hostname, localhost, cred if not snmp_diff: return - logging.info("Snmp memory MIBs: {} differs more than {} %".format(snmp_diff, percent)) + logging.info("Snmp memory MIBs: {} differs more than {} %".format(snmp_diff, percentage)) - pytest.fail("Snmp memory MIBs: {} differs more than {} %".format(snmp_diff, percent)) + pytest.fail("Snmp memory MIBs: {} differs more than {} %".format(snmp_diff, percentage)) def test_snmp_memory_load(duthosts, enum_rand_one_per_hwsku_hostname, localhost, creds_all_duts, load_memory): @@ -95,5 +110,46 @@ def test_snmp_memory_load(duthosts, enum_rand_one_per_hwsku_hostname, localhost, snmp_facts = get_snmp_facts(localhost, host=host_ip, version="v2c", community=creds_all_duts[duthost.hostname]["snmp_rocommunity"], wait=True)['ansible_facts'] mem_free = duthost.shell("grep MemFree /proc/meminfo | awk '{print $2}'")['stdout'] - pytest_assert(CALC_DIFF(snmp_facts['ansible_sysTotalFreeMemery'], mem_free) < percent, - "sysTotalFreeMemery differs by more than {}".format(percent)) + mem_total = duthost.shell("grep MemTotal /proc/meminfo | awk '{print $2}'")['stdout'] + percentage = get_percentage_threshold(int(mem_total)) + pytest_assert(CALC_DIFF(snmp_facts['ansible_sysTotalFreeMemory'], mem_free) < percentage, + "sysTotalFreeMemory differs by more than {}".format(percentage)) + +def test_snmp_swap(duthosts, enum_rand_one_per_hwsku_hostname, localhost, creds_all_duts): + """ + Verify swap info is correct + """ + duthost = duthosts[enum_rand_one_per_hwsku_hostname] + host_ip = duthost.host.options['inventory_manager'].get_host(duthost.hostname).vars['ansible_host'] + + total_swap = duthost.shell("grep SwapTotal /proc/meminfo | awk '{print $2}'")['stdout'] + free_swap = duthost.shell("grep SwapFree /proc/meminfo | awk '{print $2}'")['stdout'] + + mem_total = duthost.shell("grep MemTotal /proc/meminfo | awk '{print $2}'")['stdout'] + percentage = get_percentage_threshold(int(mem_total)) + + if total_swap == "0": + pytest.skip("Swap is not on for this device, snmp does not support swap related queries when swap isn't on") + + snmp_facts = get_snmp_facts(localhost, host=host_ip, version="v2c", include_swap=True, + community=creds_all_duts[duthost.hostname]["snmp_rocommunity"], wait=True)['ansible_facts'] + snmp_total_swap = snmp_facts['ansible_sysTotalSwap'] + snmp_free_swap = snmp_facts['ansible_sysTotalFreeSwap'] + + logging.info("total_swap {}, free_swap {}, snmp_total_swap {}, snmp_free_swap {}".format(total_swap, free_swap, snmp_total_swap, snmp_free_swap)) + + pytest_assert(CALC_DIFF(snmp_total_swap, total_swap) < percentage, + "sysTotalSwap differs by more than {}: expect {} received {}".format(percentage, total_swap, snmp_total_swap)) + + if snmp_free_swap == 0 or snmp_total_swap / snmp_free_swap >= 2: + """ + Free swap is less than half of total swap, compare used swap instead + The comparison could get inaccurate if the number to compare is close to 0, + so we test only one of used/free swap space. + """ + pytest_assert(CALC_DIFF(snmp_total_swap - snmp_free_swap, int(total_swap) - int(free_swap)) < percentage, + "Used Swap (calculated using sysTotalFreeSwap) differs by more than {}: expect {} received {}".format( + percentage, snmp_total_swap - snmp_free_swap, int(total_swap) - int(free_swap))) + else: + pytest_assert(CALC_DIFF(snmp_free_swap, free_swap) < percentage, + "sysTotalFreeSwap differs by more than {}: expect {} received {}".format(percentage, snmp_free_swap, free_swap)) diff --git a/tests/snmp/test_snmp_phy_entity.py b/tests/snmp/test_snmp_phy_entity.py index b6367ec5625..33bd6d6c609 100644 --- a/tests/snmp/test_snmp_phy_entity.py +++ b/tests/snmp/test_snmp_phy_entity.py @@ -264,7 +264,6 @@ def test_fan_info(duthosts, enum_rand_one_per_hwsku_hostname, snmp_physical_enti :return: """ snmp_physical_entity_info = snmp_physical_entity_and_sensor_info["entity_mib"] - snmp_entity_sensor_info = snmp_physical_entity_and_sensor_info["sensor_mib"] duthost = duthosts[enum_rand_one_per_hwsku_hostname] keys = redis_get_keys(duthost, STATE_DB, FAN_KEY_TEMPLATE.format('*')) # Ignore the test if the platform does not have fans (e.g Line card) @@ -323,6 +322,7 @@ def test_fan_info(duthosts, enum_rand_one_per_hwsku_hostname, snmp_physical_enti # snmp_entity_sensor_info is only supported in image newer than 202012 if is_sensor_test_supported(duthost): + snmp_entity_sensor_info = snmp_physical_entity_and_sensor_info["sensor_mib"] expect_sensor_oid = expect_oid + SENSOR_TYPE_FAN assert expect_sensor_oid in snmp_entity_sensor_info, 'Cannot find fan {} in entity sensor mib'.format(name) tachometers_sensor_fact = snmp_entity_sensor_info[expect_sensor_oid] @@ -391,7 +391,6 @@ def _check_psu_sensor(duthost, psu_name, psu_info, psu_oid, snmp_physical_entity :return: """ snmp_physical_entity_info = snmp_physical_entity_and_sensor_info["entity_mib"] - snmp_entity_sensor_info = snmp_physical_entity_and_sensor_info["sensor_mib"] for field, sensor_tuple in PSU_SENSOR_INFO.items(): expect_oid = psu_oid + DEVICE_TYPE_POWER_MONITOR + sensor_tuple[2] if is_null_str(psu_info[field]): @@ -416,6 +415,7 @@ def _check_psu_sensor(duthost, psu_name, psu_info, psu_oid, snmp_physical_entity # snmp_entity_sensor_info is only supported in image newer than 202012 if is_sensor_test_supported(duthost): + snmp_entity_sensor_info = snmp_physical_entity_and_sensor_info["sensor_mib"] entity_sensor_snmp_facts = snmp_entity_sensor_info[expect_oid] if field == "current": assert entity_sensor_snmp_facts['entPhySensorType'] == str(int(EntitySensorDataType.AMPERES)) @@ -442,7 +442,6 @@ def test_thermal_info(duthosts, enum_rand_one_per_hwsku_hostname, snmp_physical_ :return: """ snmp_physical_entity_info = snmp_physical_entity_and_sensor_info["entity_mib"] - snmp_entity_sensor_info = snmp_physical_entity_and_sensor_info["sensor_mib"] duthost = duthosts[enum_rand_one_per_hwsku_hostname] keys = redis_get_keys(duthost, STATE_DB, THERMAL_KEY_TEMPLATE.format('*')) assert keys, 'Thermal information does not exist in DB' @@ -475,6 +474,7 @@ def test_thermal_info(duthosts, enum_rand_one_per_hwsku_hostname, snmp_physical_ # snmp_entity_sensor_info is only supported in image newer than 202012 if is_sensor_test_supported(duthost): + snmp_entity_sensor_info = snmp_physical_entity_and_sensor_info["sensor_mib"] thermal_sensor_snmp_fact = snmp_entity_sensor_info[expect_oid] assert thermal_sensor_snmp_fact['entPhySensorType'] == str(int(EntitySensorDataType.CELSIUS)) assert thermal_sensor_snmp_fact['entPhySensorPrecision'] == '3' @@ -621,7 +621,7 @@ def test_turn_off_psu_and_check_psu_info(duthosts, enum_rand_one_per_hwsku_hostn pdu_controller.turn_off_outlet(first_outlet) assert wait_until(30, 5, 0, check_outlet_status, pdu_controller, first_outlet, False) # wait for psud update the database - assert wait_until(180, 20, 0, _check_psu_status_after_power_off, duthost, localhost, creds_all_duts) + assert wait_until(180, 20, 5, _check_psu_status_after_power_off, duthost, localhost, creds_all_duts) def _check_psu_status_after_power_off(duthost, localhost, creds_all_duts): @@ -634,7 +634,6 @@ def _check_psu_status_after_power_off(duthost, localhost, creds_all_duts): """ snmp_physical_entity_and_sensor_info = get_entity_and_sensor_mib(duthost, localhost, creds_all_duts) entity_mib_info = snmp_physical_entity_and_sensor_info["entity_mib"] - entity_sensor_mib_info = snmp_physical_entity_and_sensor_info["sensor_mib"] keys = redis_get_keys(duthost, STATE_DB, PSU_KEY_TEMPLATE.format('*')) power_off_psu_found = False @@ -650,7 +649,12 @@ def _check_psu_status_after_power_off(duthost, localhost, creds_all_duts): for field, sensor_tuple in PSU_SENSOR_INFO.items(): sensor_oid = expect_oid + DEVICE_TYPE_POWER_MONITOR + sensor_tuple[2] # entity_sensor_mib_info is only supported in image newer than 202012 + if sensor_oid in entity_mib_info: + if psu_info['current'] == '0.0' and psu_info['power'] == '0.0': + power_off_psu_found = True + break if is_sensor_test_supported(duthost): + entity_sensor_mib_info = snmp_physical_entity_and_sensor_info["sensor_mib"] if sensor_oid not in entity_mib_info and sensor_oid not in entity_sensor_mib_info: power_off_psu_found = True break @@ -692,7 +696,6 @@ def test_remove_insert_fan_and_check_fan_info(duthosts, enum_rand_one_per_hwsku_ snmp_physical_entity_and_sensor_info = get_entity_and_sensor_mib(duthost, localhost, creds_all_duts) entity_mib_info = snmp_physical_entity_and_sensor_info["entity_mib"] - entity_sensor_mib_info = snmp_physical_entity_and_sensor_info["sensor_mib"] for key in keys: fan_info = redis_hgetall(duthost, STATE_DB, key) @@ -718,6 +721,7 @@ def test_remove_insert_fan_and_check_fan_info(duthosts, enum_rand_one_per_hwsku_ tachometers_oid = expect_oid + SENSOR_TYPE_FAN # entity_sensor_mib_info is only supported in image newer than 202012 if is_sensor_test_supported(duthost): + entity_sensor_mib_info = snmp_physical_entity_and_sensor_info["sensor_mib"] assert tachometers_oid not in entity_mib_info and tachometers_oid not in entity_sensor_mib_info, \ 'Absence fan tachometers info should not in mib' else: diff --git a/tests/span/conftest.py b/tests/span/conftest.py index e5b99e93e08..e3c0f864a43 100644 --- a/tests/span/conftest.py +++ b/tests/span/conftest.py @@ -5,7 +5,7 @@ import pytest from tests.common.storage_backend.backend_utils import skip_test_module_over_backend_topologies - +from tests.common.utilities import skip_release @pytest.fixture(scope="module") def cfg_facts(duthosts, rand_one_dut_hostname, skip_test_module_over_backend_topologies): @@ -56,6 +56,12 @@ def ports_for_test(cfg_facts): 'vlan': vlan } +@pytest.fixture(scope='session', autouse=True) +def skip_unsupported_release(duthost): + """ Span mirror is not supported on release < 202012 + """ + skip_release(duthost, ["201811", "201911"]) + @pytest.fixture(scope='module', autouse=True) def skip_unsupported_asic_type(duthost): SPAN_UNSUPPORTED_ASIC_TYPE = ["broadcom", "cisco-8000"] diff --git a/tests/ssh/conftest.py b/tests/ssh/conftest.py index 46d7a8cd95d..3846ea641a0 100644 --- a/tests/ssh/conftest.py +++ b/tests/ssh/conftest.py @@ -35,6 +35,11 @@ def generate_ssh_ciphers(request, typename): remote_cmd = "ssh -Q kex" permitted_list = PERMITTED_KEXS + # If --collect-only is specified, return the permitted list directly. Otherwise, pytest will try to + # connect to DUT. If DUT is not online, pytest will fail with collecting test items. + if hasattr(request.config.option, "collectonly") and request.config.option.collectonly: + return permitted_list + testbed_name = request.config.option.testbed testbed_file = request.config.option.testbed_file testbed_module = imp.load_source('testbed', 'common/testbed.py') diff --git a/tests/ssh/test_ssh_ciphers.py b/tests/ssh/test_ssh_ciphers.py index b31c88eaf7c..a6369e5806b 100644 --- a/tests/ssh/test_ssh_ciphers.py +++ b/tests/ssh/test_ssh_ciphers.py @@ -14,6 +14,8 @@ def connect_with_specified_ciphers(duthosts, rand_one_dut_hostname, specified_cipher, creds, typename): duthost = duthosts[rand_one_dut_hostname] dutuser, dutpass = creds['sonicadmin_user'], creds['sonicadmin_password'] + sonic_admin_alt_password = duthost.host.options['variable_manager']._hostvars[duthost.hostname].get( + "ansible_altpassword") dutip = duthost.mgmt_ip if typename == "enc": @@ -34,12 +36,20 @@ def connect_with_specified_ciphers(duthosts, rand_one_dut_hostname, specified_ci i = connect.expect('{}@{}:'.format(dutuser, duthost.hostname), timeout=10) pytest_assert(i == 0, "Failed to connect") - except pexpect.exceptions.EOF: - pytest.fail("EOF reached") - except pexpect.exceptions.TIMEOUT: - pytest.fail("Timeout reached") - except Exception as e: - pytest.fail("Cannot connect to DUT host via SSH: {}".format(e)) + except: + try: + connect = pexpect.spawn(ssh_cmd) + connect.expect('.*[Pp]assword:') + connect.sendline(sonic_admin_alt_password) + + i = connect.expect('{}@{}:'.format(dutuser, duthost.hostname), timeout=10) + pytest_assert(i == 0, "Failed to connect") + except pexpect.exceptions.EOF: + pytest.fail("EOF reached") + except pexpect.exceptions.TIMEOUT: + pytest.fail("Timeout reached") + except Exception as e: + pytest.fail("Cannot connect to DUT host via SSH: {}".format(e)) def test_ssh_protocol_version(duthosts, rand_one_dut_hostname): duthost = duthosts[rand_one_dut_hostname] diff --git a/tests/stress/test_stress_routes.py b/tests/stress/test_stress_routes.py index 19b4a23630a..d46d5d6e490 100644 --- a/tests/stress/test_stress_routes.py +++ b/tests/stress/test_stress_routes.py @@ -1,10 +1,5 @@ #!/usr/bin/env python -import math -import os -import yaml -import re -import requests import logging import pytest @@ -19,11 +14,12 @@ logger = logging.getLogger(__name__) + def announce_withdraw_routes(duthost, localhost, ptf_ip, topo_name): logger.info("announce ipv4 and ipv6 routes") localhost.announce_routes(topo_name=topo_name, ptf_ip=ptf_ip, action="announce", path="../ansible/") - wait_until(MAX_WAIT_TIME, CRM_POLLING_INTERVAL, 0, lambda: check_queue_status(duthost, "outq") == True) + wait_until(MAX_WAIT_TIME, CRM_POLLING_INTERVAL, 0, lambda: check_queue_status(duthost, "outq") is True) logger.info("ipv4 route used {}".format(get_crm_resources(duthost, "ipv4_route", "used"))) logger.info("ipv6 route used {}".format(get_crm_resources(duthost, "ipv6_route", "used"))) @@ -32,16 +28,22 @@ def announce_withdraw_routes(duthost, localhost, ptf_ip, topo_name): logger.info("withdraw ipv4 and ipv6 routes") localhost.announce_routes(topo_name=topo_name, ptf_ip=ptf_ip, action="withdraw", path="../ansible/") - wait_until(MAX_WAIT_TIME, CRM_POLLING_INTERVAL, 0, lambda: check_queue_status(duthost, "inq") == True) + wait_until(MAX_WAIT_TIME, CRM_POLLING_INTERVAL, 0, lambda: check_queue_status(duthost, "inq") is True) sleep_to_wait(CRM_POLLING_INTERVAL * 5) logger.info("ipv4 route used {}".format(get_crm_resources(duthost, "ipv4_route", "used"))) logger.info("ipv6 route used {}".format(get_crm_resources(duthost, "ipv6_route", "used"))) def test_announce_withdraw_route(duthost, localhost, tbinfo, get_function_conpleteness_level, - withdraw_and_announce_existing_routes): + withdraw_and_announce_existing_routes, loganalyzer): ptf_ip = tbinfo["ptf_ip"] topo_name = tbinfo["topo"]["name"] + if loganalyzer: + ignoreRegex = [ + ".*ERR route_check.py:.*", + ".*ERR.* \'routeCheck\' status failed.*" + ] + loganalyzer[duthost.hostname].ignore_regex.extend(ignoreRegex) normalized_level = get_function_conpleteness_level if normalized_level is None: @@ -61,6 +63,6 @@ def test_announce_withdraw_route(duthost, localhost, tbinfo, get_function_conple ipv6_route_used_after = get_crm_resources(duthost, "ipv6_route", "used") pytest_assert(abs(ipv4_route_used_after - ipv4_route_used_before) < ALLOW_ROUTES_CHANGE_NUMS, - "ipv4 route used after is not equal to it used before") + "ipv4 route used after is not equal to it used before") pytest_assert(abs(ipv6_route_used_after - ipv6_route_used_before) < ALLOW_ROUTES_CHANGE_NUMS, - "ipv6 route used after is not equal to it used before") + "ipv6 route used after is not equal to it used before") diff --git a/tests/sub_port_interfaces/conftest.py b/tests/sub_port_interfaces/conftest.py index 0e5cdccf006..a0b29028155 100644 --- a/tests/sub_port_interfaces/conftest.py +++ b/tests/sub_port_interfaces/conftest.py @@ -1,3 +1,4 @@ +import logging import os import ipaddress import time @@ -7,8 +8,10 @@ from tests.common import config_reload from tests.common.helpers.assertions import pytest_assert as py_assert -from tests.common.utilities import get_host_visible_vars +from tests.common.helpers.backend_acl import apply_acl_rules, bind_acl_table +from tests.common.platform.processes_utils import wait_critical_processes from tests.common.utilities import wait_until +from tests.common.mellanox_data import is_mellanox_device, get_chip_type from tests.common import constants from sub_ports_helpers import DUT_TMP_DIR from sub_ports_helpers import TEMPLATE_DIR @@ -40,6 +43,7 @@ from sub_ports_helpers import remove_static_route_from_dut from sub_ports_helpers import update_dut_arp_table +logger = logging.getLogger(__name__) def pytest_addoption(parser): """ @@ -53,24 +57,29 @@ def pytest_addoption(parser): help="Max numbers of sub-ports for test_max_numbers_of_sub_ports test case", ) - -@pytest.fixture(scope='module', autouse=True) -def skip_unsupported_asic_type(duthost): - SUBPORT_UNSUPPORTED_ASIC_LIST = ["th2"] - vendor = duthost.facts["asic_type"] - hostvars = get_host_visible_vars(duthost.host.options['inventory'], duthost.hostname) - for asic in SUBPORT_UNSUPPORTED_ASIC_LIST: - vendorAsic = "{0}_{1}_hwskus".format(vendor, asic) - if vendorAsic in hostvars.keys() and duthost.facts['hwsku'] in hostvars[vendorAsic]: - pytest.skip( - "Skipping test since subport is not supported on {0} {1} platforms".format(vendor, asic)) - - @pytest.fixture(params=['port', 'port_in_lag']) def port_type(request): """Port type to test, could be either port or port-channel.""" return request.param +@pytest.fixture +def acl_rule_cleanup(duthost, tbinfo): + """Cleanup all the existing DATAACL rules""" + if "t0-backend" in tbinfo["topo"]["name"]: + duthost.shell('acl-loader delete') + + yield + +@pytest.fixture +def modify_acl_table(duthost, tbinfo, port_type, acl_rule_cleanup): + """ Remove the DATAACL table prior to the test and recreate it at the end""" + if "t0-backend" in tbinfo["topo"]["name"] and 'lag' in port_type: + duthost.command('config acl remove table DATAACL') + + yield + + if "t0-backend" in tbinfo["topo"]["name"] and 'lag' in port_type: + bind_acl_table(duthost, tbinfo) @pytest.fixture def define_sub_ports_configuration(request, duthost, ptfhost, ptfadapter, port_type, tbinfo): @@ -101,6 +110,11 @@ def define_sub_ports_configuration(request, duthost, ptfhost, ptfadapter, port_t """ sub_ports_config = {} max_numbers_of_sub_ports = request.config.getoption("--max_numbers_of_sub_ports") + if is_mellanox_device(duthost) and get_chip_type(duthost) == 'spectrum1': + if max_numbers_of_sub_ports > 215: + logger.info("Maximum number of sub ports provided by user is {} not supported on SPC1, " + "will be used value: 215".format(max_numbers_of_sub_ports)) + max_numbers_of_sub_ports = 215 vlan_ranges_dut = range(20, 60, 10) vlan_ranges_ptf = range(20, 60, 10) @@ -154,7 +168,7 @@ def define_sub_ports_configuration(request, duthost, ptfhost, ptfadapter, port_t @pytest.fixture -def apply_config_on_the_dut(define_sub_ports_configuration, duthost, reload_dut_config): +def apply_config_on_the_dut(define_sub_ports_configuration, duthost, reload_dut_config, modify_acl_table): """ Apply Sub-ports configuration on the DUT and remove after tests @@ -205,7 +219,7 @@ def apply_config_on_the_ptf(define_sub_ports_configuration, ptfhost, reload_ptf_ @pytest.fixture(params=['same', 'different']) -def apply_route_config(request, ptfhost, define_sub_ports_configuration, apply_config_on_the_dut, apply_config_on_the_ptf): +def apply_route_config(request, tbinfo, duthost, ptfhost, port_type, define_sub_ports_configuration, apply_config_on_the_dut, apply_config_on_the_ptf): """ Apply route configuration on the PTF and remove after tests @@ -254,6 +268,10 @@ def apply_route_config(request, ptfhost, define_sub_ports_configuration, apply_c new_sub_ports[src_port].append((next_hop_sub_port, name_of_namespace)) + if "t0-backend" in tbinfo["topo"]["name"] and 'lag' not in port_type: + parent_port_list = list(set([sub_port.split('.')[0] for sub_port in sub_ports_keys])) + apply_acl_rules(duthost, tbinfo, parent_port_list) + yield { 'new_sub_ports': new_sub_ports, 'sub_ports': sub_ports @@ -274,7 +292,7 @@ def apply_route_config(request, ptfhost, define_sub_ports_configuration, apply_c @pytest.fixture(params=['svi', 'l3']) -def apply_route_config_for_port(request, duthost, ptfhost, define_sub_ports_configuration, apply_config_on_the_dut, apply_config_on_the_ptf): +def apply_route_config_for_port(request, tbinfo, duthost, ptfhost, port_type, define_sub_ports_configuration, apply_config_on_the_dut, apply_config_on_the_ptf): """ Apply route configuration on the PTF and remove after tests @@ -361,6 +379,11 @@ def apply_route_config_for_port(request, duthost, ptfhost, define_sub_ports_conf port_map[ptf_port]['dst_ports'].append((next_hop_sub_port, name_of_namespace)) + if "t0-backend" in tbinfo["topo"]["name"] and 'lag' not in port_type: + parent_port_list = list(set([sub_port.split('.')[0] for sub_port in sub_ports_keys])) + intf_list = parent_port_list + dut_ports.values() + apply_acl_rules(duthost, tbinfo, intf_list) + yield { 'port_map': port_map, 'sub_ports': sub_ports @@ -506,7 +529,7 @@ def apply_balancing_config(duthost, ptfhost, ptfadapter, define_sub_ports_config @pytest.fixture -def reload_dut_config(request, duthost, define_sub_ports_configuration): +def reload_dut_config(request, duthost, define_sub_ports_configuration, loganalyzer): """ DUT's configuration reload on teardown @@ -516,6 +539,9 @@ def reload_dut_config(request, duthost, define_sub_ports_configuration): define_sub_ports_configuration: Dictonary of parameters for configuration DUT """ yield + if loganalyzer and loganalyzer[duthost.hostname]: + loganalyzer[duthost.hostname].add_start_ignore_mark() + sub_ports = define_sub_ports_configuration['sub_ports'] dut_ports = define_sub_ports_configuration['dut_ports'] cfg_facts = duthost.config_facts(host=duthost.hostname, source="running")['ansible_facts'] @@ -531,7 +557,9 @@ def reload_dut_config(request, duthost, define_sub_ports_configuration): remove_lag_port(duthost, cfg_facts, lag_port) duthost.shell('sudo config load -y /etc/sonic/config_db.json') - + wait_critical_processes(duthost) + if loganalyzer and loganalyzer[duthost.hostname]: + loganalyzer[duthost.hostname].add_end_ignore_mark() @pytest.fixture def reload_ptf_config(request, ptfhost, define_sub_ports_configuration): @@ -571,3 +599,12 @@ def teardown_test_class(duthost): """ yield config_reload(duthost) + +@pytest.fixture(autouse=True) +def ignore_expected_loganalyzer_exception(duthost, loganalyzer): + if loganalyzer and loganalyzer[duthost.hostname]: + ignore_regex_list = [ + ".*ERR teamd[0-9]*#tlm_teamd.*process_add_queue: Can't connect to teamd after.*attempts. LAG 'PortChannel.*'", + ".*ERR swss[0-9]*#orchagent.*update: Failed to get port by bridge port ID.*" + ] + loganalyzer[duthost.hostname].ignore_regex.extend(ignore_regex_list) diff --git a/tests/sub_port_interfaces/sub_ports_helpers.py b/tests/sub_port_interfaces/sub_ports_helpers.py index d9135f6c07b..8b7aa054f2e 100644 --- a/tests/sub_port_interfaces/sub_ports_helpers.py +++ b/tests/sub_port_interfaces/sub_ports_helpers.py @@ -24,6 +24,7 @@ SUB_PORTS_TEMPLATE = 'sub_port_config.j2' TUNNEL_TEMPLATE = 'tunnel_config.j2' PTF_NN_AGENT_TEMPLATE = 'ptf_nn_agent.conf.ptf.j2' +ACL_TEMPLATE = 'backend_acl_update_config.j2' ACTION_FWD = 'fwd' ACTION_DROP = 'drop' TCP_PORT = 80 @@ -1039,3 +1040,4 @@ def restart_ptf_nn_agent(ptfhost): ptfhost.command('supervisorctl reread') ptfhost.command('supervisorctl update') ptfhost.command('supervisorctl restart ptf_nn_agent') + diff --git a/tests/sub_port_interfaces/templates/backend_acl_update_config.j2 b/tests/sub_port_interfaces/templates/backend_acl_update_config.j2 new file mode 100644 index 00000000000..b641f38ea15 --- /dev/null +++ b/tests/sub_port_interfaces/templates/backend_acl_update_config.j2 @@ -0,0 +1,69 @@ +{%- set vlan2ports = {} %} +{%- for vlan in VLAN %} + {% set portlist = [] %} + {%- for vlan_name, port in VLAN_MEMBER %} + {%- if vlan_name == vlan %} + {%- if portlist.append(port) %}{%- endif %} + {%- endif %} + {%- endfor %} + {%- set _ = vlan2ports.update({vlan: portlist| sort | join(',')}) %} +{%- endfor %} + + +{ + "acl": { + "acl-sets": { + "acl-set": { + "DATAACL": { + "acl-entries": { + "acl-entry": { + {% for vlan, vlan_entries in VLAN.items() %} + "{{ loop.index }}": { + "config": { + "sequence-id": {{ loop.index }} + }, + "actions": { + "config": { + "forwarding-action": "ACCEPT" + } + }, + "l2": { + "config": { + "vlan_id": "{{ vlan_entries['vlanid'] }}" + } + }, + "input_interface": { + "interface_ref": { + "config": { + "interface": "{{ vlan2ports[vlan] }}" + } + } + } + + }, + {% endfor -%} + "999": { + "config": { + "sequence-id": 999 + }, + "actions": { + "config": { + "forwarding-action": "ACCEPT" + } + }, + "input_interface": { + "interface_ref": { + "config": { + "interface": "{{ intf_list }}" + } + } + } + } + + } + } + } + } + } + } +} diff --git a/tests/sub_port_interfaces/test_sub_port_interfaces.py b/tests/sub_port_interfaces/test_sub_port_interfaces.py index 3947c20be13..5b53e9164c5 100644 --- a/tests/sub_port_interfaces/test_sub_port_interfaces.py +++ b/tests/sub_port_interfaces/test_sub_port_interfaces.py @@ -476,3 +476,4 @@ def test_balancing_sub_ports(self, duthost, ptfhost, ptfadapter, apply_balancing ip_dst=ip_dst, type_of_traffic='balancing', ttl=63) + diff --git a/tests/syslog/test_syslog.py b/tests/syslog/test_syslog.py index f4547f3e958..e92c628d96c 100644 --- a/tests/syslog/test_syslog.py +++ b/tests/syslog/test_syslog.py @@ -92,9 +92,13 @@ def test_syslog(rand_selected_dut, dummy_syslog_server_ip_a, dummy_syslog_server logger.info("Configuring the DUT") # Add dummy rsyslog destination for testing if dummy_syslog_server_ip_a is not None: + if "201911" in duthost.os_version and ":" in dummy_syslog_server_ip_a: + pytest.skip("IPv6 syslog server IP not supported on 201911") duthost.shell("sudo config syslog add {}".format(dummy_syslog_server_ip_a)) logger.debug("Added new rsyslog server IP {}".format(dummy_syslog_server_ip_a)) if dummy_syslog_server_ip_b is not None: + if "201911" in duthost.os_version and ":" in dummy_syslog_server_ip_b: + pytest.skip("IPv6 syslog server IP not supported on 201911") duthost.shell("sudo config syslog add {}".format(dummy_syslog_server_ip_b)) logger.debug("Added new rsyslog server IP {}".format(dummy_syslog_server_ip_b)) diff --git a/tests/system_health/test_system_health.py b/tests/system_health/test_system_health.py index bb167ce144d..b1e42ed900b 100644 --- a/tests/system_health/test_system_health.py +++ b/tests/system_health/test_system_health.py @@ -34,7 +34,7 @@ EXTERNAL_CHECKER_MOCK_FILE = 'mock_valid_external_checker.txt' DEFAULT_BOOT_TIMEOUT = 300 -DEFAULT_INTERVAL = 60 +DEFAULT_INTERVAL = 62 FAST_INTERVAL = 10 THERMAL_CHECK_INTERVAL = 70 PSU_CHECK_INTERVAL = FAST_INTERVAL + 5 @@ -109,8 +109,9 @@ def test_service_checker(duthosts, enum_rand_one_per_hwsku_hostname): expect_summary = SUMMARY_OK if not expect_error_dict else SUMMARY_NOT_OK result = wait_until(WAIT_TIMEOUT, 10, 2, check_system_health_info, duthost, 'summary', expect_summary) - summary = redis_get_field_value(duthost, STATE_DB, HEALTH_TABLE_NAME, 'summary') - assert result == True, 'Expect summary {}, got {}'.format(expect_summary, summary) + # Output the content of whole SYSTEM_HEALTH_INFO table for easy debug when test case failed. + table_output = redis_get_system_health_info(duthost, STATE_DB, HEALTH_TABLE_NAME) + assert result == True, 'Expect summary {}, got {}'.format(expect_summary, table_output) @pytest.mark.disable_loganalyzer @@ -136,7 +137,7 @@ def test_service_checker_with_process_exit(duthosts, enum_rand_one_per_hwsku_hos result = wait_until(WAIT_TIMEOUT, 10, 2, check_system_health_info, duthost, category, expected_value) assert result == True, '{} is not recorded'.format(critical_process) summary = redis_get_field_value(duthost, STATE_DB, HEALTH_TABLE_NAME, 'summary') - assert summary == SUMMARY_NOT_OK + assert summary == SUMMARY_NOT_OK, 'Expect summary {}, got {}'.format(SUMMARY_NOT_OK, summary) break @@ -358,6 +359,11 @@ def redis_get_field_value(duthost, db_id, key, field_name): content = output['stdout'].strip() return content +def redis_get_system_health_info(duthost, db_id, key): + cmd = 'redis-cli --raw -n {} HGETALL \"{}\"'.format(db_id, key) + output = duthost.shell(cmd)['stdout'].strip() + return output + def check_system_health_info(duthost, category, expected_value): value = redis_get_field_value(duthost, STATE_DB, HEALTH_TABLE_NAME, category) return value == expected_value diff --git a/tests/tacacs/000-ro_disk.conf b/tests/tacacs/000-ro_disk.conf new file mode 100644 index 00000000000..59c26dc6b69 --- /dev/null +++ b/tests/tacacs/000-ro_disk.conf @@ -0,0 +1,7 @@ +# +# Disk is set to RO state +# Send logs to tmpfs +# +*.*;cron,auth,authpriv.none -/run/mount/log/syslog +auth,authpriv.* /run/mount/log/auth.log + diff --git a/tests/tacacs/conftest.py b/tests/tacacs/conftest.py index f2ce3c6e726..501f2117e9f 100644 --- a/tests/tacacs/conftest.py +++ b/tests/tacacs/conftest.py @@ -2,10 +2,11 @@ import logging import yaml import pytest -from .utils import setup_tacacs_client, setup_tacacs_server, cleanup_tacacs +import copy +from .utils import setup_tacacs_client, setup_tacacs_server, cleanup_tacacs, restore_tacacs_servers logger = logging.getLogger(__name__) -TACACS_CREDS_FILE='tacacs_creds.yaml' +TACACS_CREDS_FILE = 'tacacs_creds.yaml' @pytest.fixture(scope="module") @@ -18,7 +19,14 @@ def tacacs_creds(creds_all_duts): @pytest.fixture(scope="module") def check_tacacs(ptfhost, duthosts, enum_rand_one_per_hwsku_hostname, tacacs_creds): - logger.info('tacacs_creds: {}'.format(str(tacacs_creds))) + print_tacacs_creds = copy.deepcopy(tacacs_creds) + if isinstance(print_tacacs_creds, dict): + for tacacs_creds_msg in print_tacacs_creds.values(): + if isinstance(tacacs_creds_msg, dict): + if tacacs_creds_msg.get("docker_registry_password"): + tacacs_creds_msg["docker_registry_password"] = "******" + + logger.info('tacacs_creds: {}'.format(str(print_tacacs_creds))) duthost = duthosts[enum_rand_one_per_hwsku_hostname] tacacs_server_ip = ptfhost.mgmt_ip setup_tacacs_client(duthost, tacacs_creds, tacacs_server_ip) @@ -27,6 +35,7 @@ def check_tacacs(ptfhost, duthosts, enum_rand_one_per_hwsku_hostname, tacacs_cre yield cleanup_tacacs(ptfhost, tacacs_creds, duthost) + restore_tacacs_servers(duthost) @pytest.fixture(scope="module") @@ -42,3 +51,4 @@ def check_tacacs_v6(ptfhost, duthosts, enum_rand_one_per_hwsku_hostname, tacacs_ yield cleanup_tacacs(ptfhost, tacacs_creds, duthost) + restore_tacacs_servers(duthost) diff --git a/tests/tacacs/tac_plus.conf.j2 b/tests/tacacs/tac_plus.conf.j2 index 35101db440b..2e105dbd541 100644 --- a/tests/tacacs/tac_plus.conf.j2 +++ b/tests/tacacs/tac_plus.conf.j2 @@ -56,10 +56,7 @@ user = {{ tacacs_authorization_user }} { deny -exec permit .* } - cmd = /lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 { - deny .* - } - cmd = /lib/arm-linux-gnueabihf/ld-linux-armhf.so.3 { + cmd = /lib/arch-linux-abi/ld-linux-arch.so { deny .* } cmd = /usr/bin/dash { diff --git a/tests/tacacs/test_authorization.py b/tests/tacacs/test_authorization.py index f41da77c6c0..8025be18f61 100644 --- a/tests/tacacs/test_authorization.py +++ b/tests/tacacs/test_authorization.py @@ -1,11 +1,13 @@ import logging -import os import paramiko +import time import pytest +from _pytest.outcomes import Failed -from .utils import stop_tacacs_server, start_tacacs_server, per_command_check_skip_versions, remove_all_tacacs_server +from tests.tacacs.utils import stop_tacacs_server, start_tacacs_server +from tests.tacacs.utils import per_command_check_skip_versions, remove_all_tacacs_server, get_ld_path from tests.common.helpers.assertions import pytest_assert -from tests.common.utilities import skip_release +from tests.common.utilities import skip_release, wait_until pytestmark = [ pytest.mark.disable_loganalyzer, @@ -15,22 +17,28 @@ logger = logging.getLogger(__name__) -TIMEOUT_LIMIT = 120 +TIMEOUT_LIMIT = 120 + def ssh_connect_remote(remote_ip, remote_username, remote_password): ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - ssh.connect(remote_ip, username=remote_username, password=remote_password, allow_agent=False, look_for_keys=False, auth_timeout=TIMEOUT_LIMIT) + ssh.connect( + remote_ip, username=remote_username, password=remote_password, allow_agent=False, + look_for_keys=False, auth_timeout=TIMEOUT_LIMIT) return ssh + def check_ssh_connect_remote_failed(remote_ip, remote_username, remote_password): login_failed = False try: ssh_connect_remote(remote_ip, remote_username, remote_password) except paramiko.ssh_exception.AuthenticationException as e: login_failed = True + logger.info("Paramiko SSH connect failed with authentication: " + repr(e)) + + pytest_assert(login_failed) - pytest_assert(login_failed == True) def ssh_run_command(ssh_client, command): stdin, stdout, stderr = ssh_client.exec_command(command, timeout=TIMEOUT_LIMIT) @@ -39,10 +47,11 @@ def ssh_run_command(ssh_client, command): stderr_lines = stderr.readlines() return exit_code, stdout_lines, stderr_lines + def check_ssh_output(res, exp_val): content_exist = False - for l in res: - if exp_val in l: + for line in res: + if exp_val in line: content_exist = True break pytest_assert(content_exist) @@ -51,15 +60,21 @@ def check_ssh_output(res, exp_val): def remote_user_client(duthosts, enum_rand_one_per_hwsku_hostname, tacacs_creds): duthost = duthosts[enum_rand_one_per_hwsku_hostname] dutip = duthost.mgmt_ip - with ssh_connect_remote(dutip, tacacs_creds['tacacs_authorization_user'], tacacs_creds['tacacs_authorization_user_passwd']) as ssh_client: + with ssh_connect_remote( + dutip, + tacacs_creds['tacacs_authorization_user'], + tacacs_creds['tacacs_authorization_user_passwd'] + ) as ssh_client: yield ssh_client + @pytest.fixture def local_user_client(): with paramiko.SSHClient() as ssh_client: ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) yield ssh_client + @pytest.fixture(scope="module", autouse=True) def check_image_version(duthost): """Skips this test if the SONiC image installed on DUT is older than 202112 @@ -70,22 +85,47 @@ def check_image_version(duthost): """ skip_release(duthost, per_command_check_skip_versions) -def check_authorization_tacacs_only(duthosts, enum_rand_one_per_hwsku_hostname, tacacs_creds, check_tacacs, remote_user_client): + +@pytest.fixture +def setup_authorization_tacacs(duthosts, enum_rand_one_per_hwsku_hostname): duthost = duthosts[enum_rand_one_per_hwsku_hostname] duthost.shell("sudo config aaa authorization tacacs+") + yield + duthost.shell("sudo config aaa authorization local") # Default authorization method is local - """ - Verify TACACS+ user run command in server side whitelist: - If command have local permission, user can run command. - """ + +@pytest.fixture +def setup_authorization_tacacs_local(duthosts, enum_rand_one_per_hwsku_hostname): + duthost = duthosts[enum_rand_one_per_hwsku_hostname] + duthost.shell("sudo config aaa authorization \"tacacs+ local\"") + yield + duthost.shell("sudo config aaa authorization local") # Default authorization method is local + + +def verify_show_aaa(remote_user_client): exit_code, stdout, stderr = ssh_run_command(remote_user_client, "show aaa") - pytest_assert(exit_code == 0) - check_ssh_output(stdout, 'AAA authentication') + if exit_code != 0: + return False + try: + check_ssh_output(stdout, 'AAA authentication') + return True + except Failed: + return False + + +def check_authorization_tacacs_only(duthosts, enum_rand_one_per_hwsku_hostname, tacacs_creds, remote_user_client): + duthost = duthosts[enum_rand_one_per_hwsku_hostname] """ Verify TACACS+ user run command in server side whitelist: - If command not have local permission, user can't run command. + If command have local permission, user can run command. """ + # The "config tacacs add" commands will trigger hostcfgd to regenerate tacacs config. + # If we immediately run "show aaa" command, the client may still be using the first invalid tacacs server. + # The second valid tacacs may not take effect yet. Wait some time for the valid tacacs server to take effect. + succeeded = wait_until(10, 1, 0, verify_show_aaa, remote_user_client) + pytest_assert(succeeded) + exit_code, stdout, stderr = ssh_run_command(remote_user_client, "config aaa") pytest_assert(exit_code == 1) check_ssh_output(stderr, 'Root privileges are required for this operation') @@ -97,15 +137,20 @@ def check_authorization_tacacs_only(duthosts, enum_rand_one_per_hwsku_hostname, # Verify Local user can't login. dutip = duthost.mgmt_ip - check_ssh_connect_remote_failed(dutip, tacacs_creds['local_user'], - tacacs_creds['local_user_passwd']) + check_ssh_connect_remote_failed( + dutip, tacacs_creds['local_user'], + tacacs_creds['local_user_passwd'] + ) -def test_authorization_tacacs_only(duthosts, enum_rand_one_per_hwsku_hostname, tacacs_creds, check_tacacs, remote_user_client): - check_authorization_tacacs_only(duthosts, enum_rand_one_per_hwsku_hostname, tacacs_creds, check_tacacs, remote_user_client) +def test_authorization_tacacs_only(duthosts, enum_rand_one_per_hwsku_hostname, setup_authorization_tacacs, + tacacs_creds, check_tacacs, remote_user_client): + check_authorization_tacacs_only(duthosts, enum_rand_one_per_hwsku_hostname, tacacs_creds, remote_user_client) -def test_authorization_tacacs_only_some_server_down(localhost, duthosts, enum_rand_one_per_hwsku_hostname, tacacs_creds, ptfhost, check_tacacs, remote_user_client): +def test_authorization_tacacs_only_some_server_down( + duthosts, enum_rand_one_per_hwsku_hostname, + setup_authorization_tacacs, tacacs_creds, ptfhost, check_tacacs, remote_user_client): """ Setup multiple tacacs server for this UT. Tacacs server 127.0.0.1 not accessible. @@ -128,14 +173,15 @@ def test_authorization_tacacs_only_some_server_down(localhost, duthosts, enum_ra Verify TACACS+ user can't run command not in server side whitelist. Verify Local user can't login. """ - check_authorization_tacacs_only(duthosts, enum_rand_one_per_hwsku_hostname, tacacs_creds, check_tacacs, remote_user_client) + check_authorization_tacacs_only(duthosts, enum_rand_one_per_hwsku_hostname, tacacs_creds, remote_user_client) # Cleanup duthost.shell("sudo config tacacs delete %s" % invalid_tacacs_server_ip) + duthost.shell("sudo config tacacs timeout 5") -def test_authorization_tacacs_only_then_server_down_after_login(duthosts, enum_rand_one_per_hwsku_hostname, tacacs_creds, ptfhost, check_tacacs, remote_user_client): - duthost = duthosts[enum_rand_one_per_hwsku_hostname] - duthost.shell("sudo config aaa authorization tacacs+") + +def test_authorization_tacacs_only_then_server_down_after_login( + setup_authorization_tacacs, ptfhost, check_tacacs, remote_user_client): # Verify when server are accessible, TACACS+ user can run command in server side whitelist. exit_code, stdout, stderr = ssh_run_command(remote_user_client, "show aaa") @@ -153,9 +199,11 @@ def test_authorization_tacacs_only_then_server_down_after_login(duthosts, enum_r # Cleanup UT. start_tacacs_server(ptfhost) -def test_authorization_tacacs_and_local(duthosts, enum_rand_one_per_hwsku_hostname, tacacs_creds, check_tacacs, remote_user_client): + +def test_authorization_tacacs_and_local( + duthosts, enum_rand_one_per_hwsku_hostname, + setup_authorization_tacacs_local, tacacs_creds, check_tacacs, remote_user_client): duthost = duthosts[enum_rand_one_per_hwsku_hostname] - duthost.shell("sudo config aaa authorization \"tacacs+ local\"") """ Verify TACACS+ user run command in server side whitelist: @@ -163,12 +211,7 @@ def test_authorization_tacacs_and_local(duthosts, enum_rand_one_per_hwsku_hostna """ exit_code, stdout, stderr = ssh_run_command(remote_user_client, "show aaa") pytest_assert(exit_code == 0) - check_ssh_output(stdout, 'AAA authentication') - """ - Verify TACACS+ user run command in server side whitelist: - If command not have local permission, user can't run command. - """ exit_code, stdout, stderr = ssh_run_command(remote_user_client, "config aaa") pytest_assert(exit_code == 1) check_ssh_output(stderr, 'Root privileges are required for this operation') @@ -180,13 +223,16 @@ def test_authorization_tacacs_and_local(duthosts, enum_rand_one_per_hwsku_hostna # Verify Local user can't login. dutip = duthost.mgmt_ip - check_ssh_connect_remote_failed(dutip, tacacs_creds['local_user'], - tacacs_creds['local_user_passwd']) + check_ssh_connect_remote_failed( + dutip, tacacs_creds['local_user'], + tacacs_creds['local_user_passwd'] + ) -def test_authorization_tacacs_and_local_then_server_down_after_login(duthosts, enum_rand_one_per_hwsku_hostname, tacacs_creds, ptfhost, check_tacacs, remote_user_client, local_user_client): +def test_authorization_tacacs_and_local_then_server_down_after_login( + duthosts, enum_rand_one_per_hwsku_hostname, + setup_authorization_tacacs_local, tacacs_creds, ptfhost, check_tacacs, remote_user_client, local_user_client): duthost = duthosts[enum_rand_one_per_hwsku_hostname] - duthost.shell("sudo config aaa authorization \"tacacs+ local\"") # Shutdown tacacs server stop_tacacs_server(ptfhost) @@ -204,9 +250,11 @@ def test_authorization_tacacs_and_local_then_server_down_after_login(duthosts, e # Verify Local user can login when tacacs closed, and run command with local permission. dutip = duthost.mgmt_ip - local_user_client.connect(dutip, username=tacacs_creds['local_user'], - password=tacacs_creds['local_user_passwd'], - allow_agent=False, look_for_keys=False, auth_timeout=TIMEOUT_LIMIT) + local_user_client.connect( + dutip, username=tacacs_creds['local_user'], + password=tacacs_creds['local_user_passwd'], + allow_agent=False, look_for_keys=False, auth_timeout=TIMEOUT_LIMIT + ) exit_code, stdout, stderr = ssh_run_command(local_user_client, "show aaa") pytest_assert(exit_code == 0) @@ -215,15 +263,17 @@ def test_authorization_tacacs_and_local_then_server_down_after_login(duthosts, e # Start tacacs server start_tacacs_server(ptfhost) - # Verify after Local user login, then server becomes accessible, Local user still can run command with local permission. + # Verify after Local user login, then server becomes accessible, + # Local user still can run command with local permission. exit_code, stdout, stderr = ssh_run_command(local_user_client, "show aaa") pytest_assert(exit_code == 0) check_ssh_output(stdout, 'AAA authentication') -def test_authorization_local(duthosts, enum_rand_one_per_hwsku_hostname, tacacs_creds, ptfhost, check_tacacs, remote_user_client, local_user_client): +def test_authorization_local( + duthosts, enum_rand_one_per_hwsku_hostname, + tacacs_creds, ptfhost, check_tacacs, remote_user_client, local_user_client): duthost = duthosts[enum_rand_one_per_hwsku_hostname] - duthost.shell("sudo config aaa authorization local") """ TACACS server up: @@ -233,10 +283,6 @@ def test_authorization_local(duthosts, enum_rand_one_per_hwsku_hostname, tacacs_ pytest_assert(exit_code == 0) check_ssh_output(stdout, 'AAA authentication') - """ - TACACS server up: - Verify TACACS+ user can't run command if not have permission in local. - """ exit_code, stdout, stderr = ssh_run_command(remote_user_client, "config aaa") pytest_assert(exit_code == 1) check_ssh_output(stderr, 'Root privileges are required for this operation') @@ -249,9 +295,11 @@ def test_authorization_local(duthosts, enum_rand_one_per_hwsku_hostname, tacacs_ Verify Local user can login, and run command with local permission. """ dutip = duthost.mgmt_ip - local_user_client.connect(dutip, username=tacacs_creds['local_user'], - password=tacacs_creds['local_user_passwd'], - allow_agent=False, look_for_keys=False, auth_timeout=TIMEOUT_LIMIT) + local_user_client.connect( + dutip, username=tacacs_creds['local_user'], + password=tacacs_creds['local_user_passwd'], + allow_agent=False, look_for_keys=False, auth_timeout=TIMEOUT_LIMIT + ) exit_code, stdout, stderr = ssh_run_command(local_user_client, "show aaa") pytest_assert(exit_code == 0) @@ -261,13 +309,19 @@ def test_authorization_local(duthosts, enum_rand_one_per_hwsku_hostname, tacacs_ start_tacacs_server(ptfhost) -def test_bypass_authorization(duthosts, enum_rand_one_per_hwsku_hostname, tacacs_creds, check_tacacs, remote_user_client): +def test_bypass_authorization( + duthosts, enum_rand_one_per_hwsku_hostname, + setup_authorization_tacacs, check_tacacs, remote_user_client): duthost = duthosts[enum_rand_one_per_hwsku_hostname] - duthost.shell("sudo config aaa authorization tacacs+") """ Verify user can't run script with sh/python with following command. python ./testscript.py + + NOTE: TACACS UT using tac_plus as server side, there is a bug that tac_plus can't handle an authorization + message contains more than 10 attributes. + Because every command parameter will convert to a TACACS attribute, please don't using more than 5 + command parameters in test case. """ exit_code, stdout, stderr = ssh_run_command(remote_user_client, 'echo "" >> ./testscript.py') pytest_assert(exit_code == 0) @@ -285,30 +339,18 @@ def test_bypass_authorization(duthosts, enum_rand_one_per_hwsku_hostname, tacacs pytest_assert(exit_code == 0) check_ssh_output(stdout, '/bin/sh') - """ - Verify user can't run command with loader: - /lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 sh - Or - /lib/arm-linux-gnueabihf/ld-linux-armhf.so.3 sh - """ - ld_path_x86 = "/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2" - if os.path.isfile(ld_path_x86): - exit_code, stdout, stderr = ssh_run_command(remote_user_client, ld_path_x86 + " sh") - pytest_assert(exit_code == 1) - check_ssh_output(stdout, 'authorize failed by TACACS+ with given arguments, not executing') - - ld_path_arm = "/lib/arm-linux-gnueabihf/ld-linux-armhf.so.3" - if os.path.isfile(ld_path_arm): - exit_code, stdout, stderr = ssh_run_command(remote_user_client, ld_path_arm + " sh") + # Verify user can't run command with loader: + # /lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 sh + ld_path = get_ld_path(duthost) + if not ld_path: + exit_code, stdout, stderr = ssh_run_command(remote_user_client, ld_path + " sh") pytest_assert(exit_code == 1) check_ssh_output(stdout, 'authorize failed by TACACS+ with given arguments, not executing') - """ - Verify user can't run command with prefix/quoting: - \sh - "sh" - echo $(sh -c ls) - """ + # Verify user can't run command with prefix/quoting: + # \sh + # "sh" + # echo $(sh -c ls) exit_code, stdout, stderr = ssh_run_command(remote_user_client, "\\sh") pytest_assert(exit_code == 1) check_ssh_output(stdout, 'authorize failed by TACACS+ with given arguments, not executing') @@ -322,9 +364,11 @@ def test_bypass_authorization(duthosts, enum_rand_one_per_hwsku_hostname, tacacs pytest_assert(exit_code == 0) check_ssh_output(stdout, 'authorize failed by TACACS+ with given arguments, not executing') -def test_backward_compatibility_disable_authorization(duthosts, enum_rand_one_per_hwsku_hostname, tacacs_creds, ptfhost, check_tacacs, remote_user_client, local_user_client): + +def test_backward_compatibility_disable_authorization( + duthosts, enum_rand_one_per_hwsku_hostname, + tacacs_creds, ptfhost, check_tacacs, remote_user_client, local_user_client): duthost = duthosts[enum_rand_one_per_hwsku_hostname] - duthost.shell("sudo config aaa authorization local") # Verify domain account can run command if have permission in local. exit_code, stdout, stderr = ssh_run_command(remote_user_client, "show aaa") @@ -336,14 +380,18 @@ def test_backward_compatibility_disable_authorization(duthosts, enum_rand_one_pe # Verify domain account can't login to device successfully. dutip = duthost.mgmt_ip - check_ssh_connect_remote_failed(dutip, tacacs_creds['tacacs_authorization_user'], - tacacs_creds['tacacs_authorization_user_passwd']) + check_ssh_connect_remote_failed( + dutip, tacacs_creds['tacacs_authorization_user'], + tacacs_creds['tacacs_authorization_user_passwd'] + ) # Verify local admin account can run command if have permission in local. dutip = duthost.mgmt_ip - local_user_client.connect(dutip, username=tacacs_creds['local_user'], - password=tacacs_creds['local_user_passwd'], - allow_agent=False, look_for_keys=False, auth_timeout=TIMEOUT_LIMIT) + local_user_client.connect( + dutip, username=tacacs_creds['local_user'], + password=tacacs_creds['local_user_passwd'], + allow_agent=False, look_for_keys=False, auth_timeout=TIMEOUT_LIMIT + ) exit_code, stdout, stderr = ssh_run_command(local_user_client, "show aaa") pytest_assert(exit_code == 0) diff --git a/tests/tacacs/test_ro_disk.py b/tests/tacacs/test_ro_disk.py index 808ace40ce2..4f88db848eb 100644 --- a/tests/tacacs/test_ro_disk.py +++ b/tests/tacacs/test_ro_disk.py @@ -1,6 +1,9 @@ +import os +import time import pytest import logging +from ansible.errors import AnsibleConnectionFailure from tests.common.devices.base import RunAnsibleModuleFail from tests.common.utilities import wait_until from tests.common.utilities import skip_release @@ -15,6 +18,10 @@ logger = logging.getLogger(__name__) +MOUNT_DIR = "/run/mount" +LOG_DIR = os.path.join(MOUNT_DIR, "log") +DATA_DIR = "logs/tacacs" + def check_disk_ro(duthost): try: @@ -41,25 +48,37 @@ def chk_ssh_remote_run(localhost, remote_ip, username, password, cmd): return rc == 0 -def do_reboot(duthost, localhost, dutip, rw_user, rw_pass): +def do_reboot(duthost, localhost, dutip="", rw_user="", rw_pass=""): # occasionally reboot command fails with some kernel error messages # Hence retry if needed. # wait_time = 20 retries = 3 + rebooted = False + for i in range(retries): # Regular reboot command would not work, as it would try to # collect show tech, which will fail in RO state. # - chk_ssh_remote_run(localhost, dutip, rw_user, rw_pass, "sudo /sbin/reboot") try: + if dutip: + chk_ssh_remote_run(localhost, dutip, rw_user, rw_pass, "sudo /sbin/reboot") + else: + duthost.shell("/sbin/reboot") + localhost.wait_for(host=duthost.mgmt_ip, port=22, state="stopped", delay=5, timeout=60) + rebooted = True break + except AnsibleConnectionFailure as e: + logger.error("DUT not reachable, exception: {} attempt:{}/{}". + format(repr(e), i, retries)) except RunAnsibleModuleFail as e: logger.error("DUT did not go down, exception: {} attempt:{}/{}". format(repr(e), i, retries)) - assert i<3, "Failed to reboot" + wait(wait_time, msg="Wait {} seconds before retry.".format(wait_time)) + + assert rebooted, "Failed to reboot" localhost.wait_for(host=duthost.mgmt_ip, port=22, state="started", delay=10, timeout=300) wait(wait_time, msg="Wait {} seconds for system to be stable.".format(wait_time)) assert wait_until(300, 20, 0, duthost.critical_services_fully_started), \ @@ -78,6 +97,24 @@ def do_setup_tacacs(ptfhost, duthost, tacacs_creds): logger.info('Upon reboot: complete: setup tacacs_creds') +def do_check_clean_state(duthost): + for i in [ "upper", "work", "log" ]: + res = duthost.shell("ls -l {}".format(os.path.join(MOUNT_DIR,i)), module_ignore_errors=True) + if res["rc"] == 0: + # Log current state in-depth + duthost.shell("find {} -ls".format(MOUNT_DIR), module_ignore_errors=True) + return False + return True + + +def fetch_into_file(localhost, remote_ip, rwuser, rwpass, src_file, dst_file): + chk_ssh_remote_run(localhost, remote_ip, rwuser, rwpass, "sudo chmod a+r {}".format(src_file)) + scp_cmd = "scp -o StrictHostKeyChecking=no {}@{}:{} {}".format(rwuser, remote_ip, src_file, dst_file) + cmd = "sshpass -p {} {}".format(rwpass, scp_cmd) + ret = os.system(cmd) + logger.info("ret={} cmd={}".format(ret, scp_cmd)) + + def test_ro_disk(localhost, ptfhost, duthosts, enum_rand_one_per_hwsku_hostname, tacacs_creds, check_tacacs): """test tacacs rw user @@ -93,40 +130,83 @@ def test_ro_disk(localhost, ptfhost, duthosts, enum_rand_one_per_hwsku_hostname, rw_user = tacacs_creds['tacacs_rw_user'] rw_pass = tacacs_creds['tacacs_rw_user_passwd'] - res = duthost.shell("ls -l /run/mount/*", module_ignore_errors=True) - if res["rc"] == 0: + if not do_check_clean_state(duthost): # System has some partial state left behind from last run. # reboot to clear it # logger.info("PRETEST: reboot {} to restore system state". format(enum_rand_one_per_hwsku_hostname)) - do_reboot(duthost, localhost, dutip, rw_user, rw_pass) - + do_reboot(duthost, localhost) + assert do_check_clean_state(duthost), "state not good even after reboot" do_setup_tacacs(ptfhost, duthost, tacacs_creds) + # just check it out that ro user could indeed login + ret = chk_ssh_remote_run(localhost, dutip, ro_user, ro_pass, "ls") + assert ret, "Failed pre-test ssh login as ro user" + + logger.debug("Delete ro user to simulate new login in RO state.") + duthost.shell("sudo deluser --remove-home {}".format(ro_user)) + logger.info("del user {} done".format(ro_user)) + res = duthost.shell("ls -l /home/{}".format(ro_user), module_ignore_errors=True) - if res["rc"] == 0: - logger.debug("ro user pre-exists; deleting") - try: - duthost.shell("sudo deluser --remove-home {}".format(ro_user), - module_ignore_errors=True) - finally: - # If any failure, it implies user not valid, which is good enough. - logger.info("del user {} done".format(ro_user)) + assert res["rc"] != 0, "Failed to remove ro user dir" + + # Ensure rw user can get in, as we need this to be able to reboot + ret = chk_ssh_remote_run(localhost, dutip, rw_user, rw_pass, "ls") + assert ret, "Failed to ssh as rw user" try: - # Ensure rw user can get in, as we need this to be able to reboot - ret = chk_ssh_remote_run(localhost, dutip, rw_user, rw_pass, "ls") + # Redirect logs to tmpfs + # + duthost.shell("sudo mkdir {}".format(LOG_DIR)) + + conf_path = os.path.join(os.path.dirname( + os.path.abspath(__file__)), "000-ro_disk.conf") + duthost.copy(src=conf_path, dest="/etc/rsyslog.d/000-ro_disk.conf") + + # To get file in decent size. Force a rotate + duthost.shell("logrotate --force /etc/logrotate.d/rsyslog") - assert ret, "Failed to ssh as rw user" + res = duthost.shell("systemctl restart rsyslog") + assert res["rc"] == 0, "failed to restart rsyslog" + + # Pause 2 seconds to ensure the new .conf is read in by rsyslogd + time.sleep(2) + + # Remove file, so the reboot at the end of test will revert this logs redirect. + duthost.shell("rm /etc/rsyslog.d/000-ro_disk.conf") # Set disk in RO state simulate_ro(duthost) logger.debug("user={}".format(ro_user)) - assert wait_until(600, 20, 0, chk_ssh_remote_run, localhost, dutip, - ro_user, ro_pass, "cat /etc/passwd"), "Failed to ssh as ro user" + # Wait for 15 minutes + # Reason: + # Monit does not start upon boot for 5 minutes. + # Note: Monit invokes disk check every 5 cycles/minutes + # We need to wait solid +10mins before concluding. + # + res = wait_until(900, 20, 0, chk_ssh_remote_run, localhost, dutip, + ro_user, ro_pass, "cat /etc/passwd") + logger.info("res={}".format(res)) + + chk_ssh_remote_run(localhost, dutip, rw_user, rw_pass, "sudo find {} -ls".format(MOUNT_DIR)) + chk_ssh_remote_run(localhost, dutip, rw_user, rw_pass, "systemctl status monit") + + chk_ssh_remote_run(localhost, dutip, rw_user, rw_pass, "sudo find /home -ls") + + if not os.path.exists(DATA_DIR): + os.makedirs(DATA_DIR) + + + # Fetch files of interest + # + for f in [ "/etc/passwd", os.path.join(LOG_DIR, "auth.log"), + os.path.join(LOG_DIR, "syslog")]: + fetch_into_file(localhost, dutip, rw_user, rw_pass, f, + os.path.join(DATA_DIR, os.path.basename(f))) + assert res, "Failed to ssh as ro user" finally: logger.debug("START: reboot {} to restore disk RW state". diff --git a/tests/tacacs/test_ro_user.py b/tests/tacacs/test_ro_user.py index d439f6ff826..a8a511d9eae 100644 --- a/tests/tacacs/test_ro_user.py +++ b/tests/tacacs/test_ro_user.py @@ -96,7 +96,7 @@ def test_ro_user_allowed_command(localhost, duthosts, enum_rand_one_per_hwsku_ho # Run as RO and use the commands allowed by the sudoers file commands = { - "cat": ["sudo cat /var/log/syslog", "sudo cat /var/log/syslog.1", "sudo cat /var/log/syslog.2.gz"], + "cat": ["sudo cat /var/log/syslog", "sudo cat /var/log/syslog.1"], "brctl": ["sudo brctl show"], "docker": [ "sudo docker exec snmp cat /etc/snmp/snmpd.conf", diff --git a/tests/tacacs/utils.py b/tests/tacacs/utils.py index 9298072c77a..8524b680c7c 100644 --- a/tests/tacacs/utils.py +++ b/tests/tacacs/utils.py @@ -1,37 +1,47 @@ import crypt import logging -import os import re from tests.common.errors import RunAnsibleModuleFail -from tests.common.utilities import wait_until, check_skip_release +from tests.common.utilities import wait_until, check_skip_release, delete_running_config from tests.common.helpers.assertions import pytest_assert -from tests.common.errors import RunAnsibleModuleFail logger = logging.getLogger(__name__) -# per-command authorization and accounting feature not avaliable in following versions + +# per-command authorization and accounting feature not available in following versions per_command_check_skip_versions = ["201811", "201911", "202012", "202106"] + def check_output(output, exp_val1, exp_val2): pytest_assert(not output['failed'], output['stderr']) - for l in output['stdout_lines']: - fds = l.split(':') + for line in output['stdout_lines']: + fds = line.split(':') if fds[0] == exp_val1: pytest_assert(fds[4] == exp_val2) + def check_all_services_status(ptfhost): res = ptfhost.command("service --status-all") logger.info(res["stdout_lines"]) def start_tacacs_server(ptfhost): + def tacacs_running(ptfhost): + out = ptfhost.command("service tacacs_plus status", module_ignore_errors=True)["stdout"] + return "tacacs+ running" in out + ptfhost.command("service tacacs_plus restart", module_ignore_errors=True) - return "tacacs+ running" in ptfhost.command("service tacacs_plus status", module_ignore_errors=True)["stdout_lines"] + return wait_until(5, 1, 0, tacacs_running, ptfhost) + def stop_tacacs_server(ptfhost): - ptfhost.service(name="tacacs_plus", state="stopped") - check_all_services_status(ptfhost) + def tacacs_not_running(ptfhost): + out = ptfhost.command("service tacacs_plus status", module_ignore_errors=True)["stdout"] + return "tacacs+ apparently not running" in out + ptfhost.shell("service tacacs_plus stop") + return wait_until(5, 1, 0, tacacs_not_running, ptfhost) + def setup_local_user(duthost, tacacs_creds): try: @@ -40,18 +50,21 @@ def setup_local_user(duthost, tacacs_creds): logger.info("local user not exist") duthost.shell("sudo useradd {}".format(tacacs_creds['local_user'])) - duthost.shell('sudo echo "{}:{}" | chpasswd'.format(tacacs_creds['local_user'],tacacs_creds['local_user_passwd'])) + duthost.shell('sudo echo "{}:{}" | chpasswd'.format(tacacs_creds['local_user'], tacacs_creds['local_user_passwd'])) + def setup_tacacs_client(duthost, tacacs_creds, tacacs_server_ip): """setup tacacs client""" # configure tacacs client + default_tacacs_servers = [] duthost.shell("sudo config tacacs passkey %s" % tacacs_creds[duthost.hostname]['tacacs_passkey']) # get default tacacs servers config_facts = duthost.config_facts(host=duthost.hostname, source="running")['ansible_facts'] for tacacs_server in config_facts.get('TACPLUS_SERVER', {}): duthost.shell("sudo config tacacs delete %s" % tacacs_server) + default_tacacs_servers.append(tacacs_server) duthost.shell("sudo config tacacs add %s" % tacacs_server_ip) duthost.shell("sudo config tacacs authtype login") @@ -65,19 +78,95 @@ def setup_tacacs_client(duthost, tacacs_creds, tacacs_server_ip): # setup local user setup_local_user(duthost, tacacs_creds) + return default_tacacs_servers + + +def restore_tacacs_servers(duthost): + # Restore the TACACS plus server in config_db.json + config_facts = duthost.config_facts(host=duthost.hostname, source="persistent")["ansible_facts"] + for tacacs_server in config_facts.get("TACPLUS_SERVER", {}): + duthost.shell("sudo config tacacs add %s" % tacacs_server) + + cmds = [] + aaa_config = config_facts.get("AAA", {}) + if aaa_config: + cfg = aaa_config.get("authentication", {}).get("login", "") + if cfg: + cmds.append("config aaa authentication login %s" % cfg) + + cfg = aaa_config.get("authentication", {}).get("failthrough", "") + if cfg.lower() == "true": + cmds.append("config aaa authentication failthrough enable") + elif cfg.lower() == "false": + cmds.append("config aaa authentication failthrough disable") + + cfg = aaa_config.get("authorization", {}).get("login", "") + if cfg: + cmds.append("config aaa authorization %s" % cfg) + + cfg = aaa_config.get("accounting", {}).get("login", "") + if cfg: + cmds.append("config aaa accounting %s" % cfg) + + tacplus_config = config_facts.get("TACPLUS", {}) + if tacplus_config: + cfg = tacplus_config.get("global", {}).get("auth_type", "") + if cfg: + cmds.append("config tacacs authtype %s" % cfg) + + cfg = tacplus_config.get("global", {}).get("passkey", "") + if cfg: + cmds.append("config tacacs passkey %s" % cfg) -def fix_symbolic_link_in_config(duthost, ptfhost, symbolic_link_path): + cfg = tacplus_config.get("global", {}).get("timeout", "") + if cfg: + cmds.append("config tacacs timeout %s" % cfg) + + # Cleanup AAA and TACPLUS config + delete_tacacs_json = [{"AAA": {}}, {"TACPLUS": {}}] + delete_running_config(delete_tacacs_json, duthost) + + # Restore AAA and TACPLUS config + duthost.shell_cmds(cmds=cmds) + + +def fix_symbolic_link_in_config(duthost, ptfhost, symbolic_link_path, path_to_be_fix=None): """ Fix symbolic link in tacacs config - Because tac_plus server not support regex in command name, and SONiC will send full path to tacacs server side for authorization, so the 'python' and 'ld' path in tac_plus config file need fix. + Because tac_plus server not support regex in command name, and SONiC will send full path to tacacs server side + for authorization, so the 'python' and 'ld' path in tac_plus config file need fix. """ read_link_command = "readlink -f {0}".format(symbolic_link_path) target_path = duthost.shell(read_link_command)['stdout'] # Escape path string, will use it as regex in sed command. + link_path_regex = re.escape(symbolic_link_path) + if path_to_be_fix is not None: + link_path_regex = re.escape(path_to_be_fix) + target_path_regex = re.escape(target_path) ptfhost.shell("sed -i 's/{0}/{1}/g' /etc/tacacs+/tac_plus.conf".format(link_path_regex, target_path_regex)) + +def get_ld_path(duthost): + """ + Fix symbolic link in tacacs config + Because tac_plus server not support regex in command name, and SONiC will send full path to tacacs server side + for authorization, so the 'python' and 'ld' path in tac_plus config file need fix. + """ + find_ld_command = "find /lib/ -type f,l -regex '\/lib\/.*-linux-.*/ld-linux-.*\.so\.[0-9]*'" # noqa W605 + return duthost.shell(find_ld_command)['stdout'] + + +def fix_ld_path_in_config(duthost, ptfhost): + """ + Fix ld path in tacacs config + """ + ld_symbolic_link_path = get_ld_path(duthost) + if not ld_symbolic_link_path: + fix_symbolic_link_in_config(duthost, ptfhost, ld_symbolic_link_path, "/lib/arch-linux-abi/ld-linux-arch.so") + + def setup_tacacs_server(ptfhost, tacacs_creds, duthost): """setup tacacs server""" @@ -88,7 +177,9 @@ def setup_tacacs_server(ptfhost, tacacs_creds, duthost): 'tacacs_ro_user': tacacs_creds['tacacs_ro_user'], 'tacacs_ro_user_passwd': crypt.crypt(tacacs_creds['tacacs_ro_user_passwd'], 'abc'), 'tacacs_authorization_user': tacacs_creds['tacacs_authorization_user'], - 'tacacs_authorization_user_passwd': crypt.crypt(tacacs_creds['tacacs_authorization_user_passwd'], 'abc'), + 'tacacs_authorization_user_passwd': crypt.crypt( + tacacs_creds['tacacs_authorization_user_passwd'], + 'abc'), 'tacacs_jit_user': tacacs_creds['tacacs_jit_user'], 'tacacs_jit_user_passwd': crypt.crypt(tacacs_creds['tacacs_jit_user_passwd'], 'abc'), 'tacacs_jit_user_membership': tacacs_creds['tacacs_jit_user_membership']} @@ -100,15 +191,13 @@ def setup_tacacs_server(ptfhost, tacacs_creds, duthost): fix_symbolic_link_in_config(duthost, ptfhost, "/usr/bin/python") # Find ld lib symbolic link target, and fix the tac_plus config file - ld_path_x86 = "/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2" - if os.path.isfile(ld_path_x86): - fix_symbolic_link_in_config(duthost, ptfhost, ld_path_x86) - - ld_path_arm = "/lib/arm-linux-gnueabihf/ld-linux-armhf.so.3" - if os.path.isfile(ld_path_arm): - fix_symbolic_link_in_config(duthost, ptfhost, ld_path_arm) + fix_ld_path_in_config(duthost, ptfhost) - ptfhost.lineinfile(path="/etc/default/tacacs+", line="DAEMON_OPTS=\"-d 10 -l /var/log/tac_plus.log -C /etc/tacacs+/tac_plus.conf\"", regexp='^DAEMON_OPTS=.*') + ptfhost.lineinfile( + path="/etc/default/tacacs+", + line="DAEMON_OPTS=\"-d 10 -l /var/log/tac_plus.log -C /etc/tacacs+/tac_plus.conf\"", + regexp='^DAEMON_OPTS=.*' + ) check_all_services_status(ptfhost) # FIXME: This is a short term mitigation, we need to figure out why \nthe tacacs+ server does not start @@ -123,24 +212,33 @@ def cleanup_tacacs(ptfhost, tacacs_creds, duthost): # reset tacacs client configuration remove_all_tacacs_server(duthost) - duthost.shell("sudo config tacacs default passkey") - duthost.shell("sudo config aaa authentication login default") - duthost.shell("sudo config aaa authentication failthrough default") + cmds = [ + "config tacacs default passkey", + "config aaa authentication login default", + "config aaa authentication failthrough default" + ] + duthost.shell_cmds(cmds=cmds) (skip, _) = check_skip_release(duthost, per_command_check_skip_versions) if not skip: duthost.shell("sudo config aaa authorization local") duthost.shell("sudo config aaa accounting disable") - duthost.user(name=tacacs_creds['tacacs_ro_user'], state='absent', remove='yes', force='yes', module_ignore_errors=True) - duthost.user(name=tacacs_creds['tacacs_rw_user'], state='absent', remove='yes', force='yes', module_ignore_errors=True) - duthost.user(name=tacacs_creds['tacacs_jit_user'], state='absent', remove='yes', force='yes', module_ignore_errors=True) + duthost.user( + name=tacacs_creds['tacacs_ro_user'], state='absent', remove='yes', force='yes', module_ignore_errors=True + ) + duthost.user( + name=tacacs_creds['tacacs_rw_user'], state='absent', remove='yes', force='yes', module_ignore_errors=True + ) + duthost.user( + name=tacacs_creds['tacacs_jit_user'], state='absent', remove='yes', force='yes', module_ignore_errors=True + ) def remove_all_tacacs_server(duthost): # use grep command to extract tacacs server address from tacacs config - find_server_command = 'show tacacs | grep -Po "TACPLUS_SERVER address \K.*"' - server_list = duthost.shell(find_server_command)['stdout'] + find_server_command = 'show tacacs | grep -Po "TACPLUS_SERVER address \K.*"' # noqa W605 + server_list = duthost.shell(find_server_command, module_ignore_errors=True)['stdout_lines'] for tacacs_server in server_list: tacacs_server = tacacs_server.rstrip() if tacacs_server: diff --git a/tests/telemetry/test_telemetry.py b/tests/telemetry/test_telemetry.py index d5e2b2fc54e..3a44f00708f 100644 --- a/tests/telemetry/test_telemetry.py +++ b/tests/telemetry/test_telemetry.py @@ -6,6 +6,7 @@ from pkg_resources import parse_version from tests.common.helpers.assertions import pytest_assert +from tests.common.errors import RunAnsibleModuleFail from tests.common.utilities import wait_until, wait_tcp_connection pytestmark = [ @@ -51,16 +52,19 @@ def setup_telemetry_forpyclient(duthost): """ client_auth_out = duthost.shell('sonic-db-cli CONFIG_DB HGET "TELEMETRY|gnmi" "client_auth"', module_ignore_errors=False)['stdout_lines'] client_auth = str(client_auth_out[0]) - if client_auth == "true": - duthost.shell('sonic-db-cli CONFIG_DB HSET "TELEMETRY|gnmi" "client_auth" "false"', module_ignore_errors=False) + return client_auth + +def restore_telemetry_forpyclient(duthost, default_client_auth): + client_auth_out = duthost.shell('sonic-db-cli CONFIG_DB HGET "TELEMETRY|gnmi" "client_auth"', module_ignore_errors=False)['stdout_lines'] + client_auth = str(client_auth_out[0]) + if client_auth != default_client_auth: + duthost.shell('sonic-db-cli CONFIG_DB HSET "TELEMETRY|gnmi" "client_auth" {}'.format(default_client_auth), module_ignore_errors=False) duthost.service(name="telemetry", state="restarted") - else: - logger.info('client auth is false. No need to restart telemetry') -def generate_client_cli(duthost, method=METHOD_GET, xpath="COUNTERS/Ethernet0", target="COUNTERS_DB", subscribe_mode=SUBSCRIBE_MODE_STREAM, submode=SUBMODE_SAMPLE, intervalms=0, update_count=3): +def generate_client_cli(duthost, gnxi_path, method=METHOD_GET, xpath="COUNTERS/Ethernet0", target="COUNTERS_DB", subscribe_mode=SUBSCRIBE_MODE_STREAM, submode=SUBMODE_SAMPLE, intervalms=0, update_count=3): """Generate the py_gnmicli command line based on the given params. """ - cmdFormat = 'python /gnxi/gnmi_cli_py/py_gnmicli.py -g -t {0} -p {1} -m {2} -x {3} -xt {4} -o {5}' + cmdFormat = 'python '+ gnxi_path + 'gnmi_cli_py/py_gnmicli.py -g -t {0} -p {1} -m {2} -x {3} -xt {4} -o {5}' cmd = cmdFormat.format(duthost.mgmt_ip, TELEMETRY_PORT, method, xpath, target, "ndastreamingservertest") if method == METHOD_SUBSCRIBE: @@ -72,6 +76,24 @@ def assert_equal(actual, expected, message): """ pytest_assert(actual == expected, "{0}. Expected {1} vs actual {2}".format(message, expected, actual)) +@pytest.fixture(scope="module") +def gnxi_path(ptfhost): + """ + gnxi's location is updated from /gnxi to /root/gnxi + in RP https://github.com/Azure/sonic-buildimage/pull/10599. + But old docker-ptf images don't have this update, + test case will fail for these docker-ptf images, + because it should still call /gnxi files. + For avoiding this conflict, check gnxi path before test and set GNXI_PATH to correct value. + Add a new gnxi_path module fixture to make sure to set GNXI_PATH before test. + """ + path_exists = ptfhost.stat(path="/root/gnxi/") + if path_exists["stat"]["exists"] and path_exists["stat"]["isdir"]: + gnxipath = "/root/gnxi/" + else: + gnxipath = "/gnxi/" + return gnxipath + @pytest.fixture(scope="module", autouse=True) def verify_telemetry_dockerimage(duthosts, rand_one_dut_hostname): """If telemetry docker is available in image then return true @@ -84,25 +106,39 @@ def verify_telemetry_dockerimage(duthosts, rand_one_dut_hostname): if not (len(matching) > 0): pytest.skip("docker-sonic-telemetry is not part of the image") -@pytest.fixture -def setup_streaming_telemetry(duthosts, rand_one_dut_hostname, localhost, ptfhost): +@pytest.fixture(scope="module") +def setup_streaming_telemetry(duthosts, rand_one_dut_hostname, localhost, ptfhost, gnxi_path): """ @summary: Post setting up the streaming telemetry before running the test. """ - duthost = duthosts[rand_one_dut_hostname] - setup_telemetry_forpyclient(duthost) - - # Wait until telemetry was restarted - pytest_assert(wait_until(100, 10, 0, duthost.is_service_fully_started, "telemetry"), "TELEMETRY not started.") - logger.info("telemetry process restarted. Now run pyclient on ptfdocker") - - # Wait until the TCP port was opened - dut_ip = duthost.mgmt_ip - wait_tcp_connection(localhost, dut_ip, TELEMETRY_PORT, timeout_s=60) - - # pyclient should be available on ptfhost. If it was not available, then fail pytest. - file_exists = ptfhost.stat(path="/gnxi/gnmi_cli_py/py_gnmicli.py") - pytest_assert(file_exists["stat"]["exists"] is True) + try: + duthost = duthosts[rand_one_dut_hostname] + default_client_auth = setup_telemetry_forpyclient(duthost) + + if default_client_auth == "true": + duthost.shell('sonic-db-cli CONFIG_DB HSET "TELEMETRY|gnmi" "client_auth" "false"', module_ignore_errors=False) + duthost.service(name="telemetry", state="restarted") + else: + logger.info('client auth is false. No need to restart telemetry') + + # Wait until telemetry was restarted + pytest_assert(wait_until(100, 10, 0, duthost.is_service_fully_started, "telemetry"), "TELEMETRY not started.") + logger.info("telemetry process restarted. Now run pyclient on ptfdocker") + + # Wait until the TCP port was opened + dut_ip = duthost.mgmt_ip + wait_tcp_connection(localhost, dut_ip, TELEMETRY_PORT, timeout_s=60) + + # pyclient should be available on ptfhost. If it was not available, then fail pytest. + file_exists = ptfhost.stat(path=gnxi_path + "gnmi_cli_py/py_gnmicli.py") + pytest_assert(file_exists["stat"]["exists"] is True) + except RunAnsibleModuleFail as e: + logger.info("Error happens in the setup period of setup_streaming_telemetry, recover the telemetry.") + restore_telemetry_forpyclient(duthost, default_client_auth) + raise e + + yield + restore_telemetry_forpyclient(duthost, default_client_auth) def skip_201911_and_older(duthost): """ Skip the current test if the DUT version is 201911 or older. @@ -153,14 +189,14 @@ def test_telemetry_enabledbydefault(duthosts, rand_one_dut_hostname): status_expected = "enabled"; pytest_assert(str(v) == status_expected, "Telemetry feature is not enabled") -def test_telemetry_ouput(duthosts, rand_one_dut_hostname, ptfhost, setup_streaming_telemetry, localhost): +def test_telemetry_ouput(duthosts, rand_one_dut_hostname, ptfhost, setup_streaming_telemetry, localhost, gnxi_path): """Run pyclient from ptfdocker and show gnmi server outputself. """ duthost = duthosts[rand_one_dut_hostname] logger.info('start telemetry output testing') dut_ip = duthost.mgmt_ip - cmd = 'python /gnxi/gnmi_cli_py/py_gnmicli.py -g -t {0} -p {1} -m get -x COUNTERS/Ethernet0 -xt COUNTERS_DB \ + cmd = 'python ' + gnxi_path + 'gnmi_cli_py/py_gnmicli.py -g -t {0} -p {1} -m get -x COUNTERS/Ethernet0 -xt COUNTERS_DB \ -o "ndastreamingservertest"'.format(dut_ip, TELEMETRY_PORT) show_gnmi_out = ptfhost.shell(cmd)['stdout'] logger.info("GNMI Server output") @@ -169,19 +205,19 @@ def test_telemetry_ouput(duthosts, rand_one_dut_hostname, ptfhost, setup_streami inerrors_match = re.search("SAI_PORT_STAT_IF_IN_ERRORS", result) pytest_assert(inerrors_match is not None, "SAI_PORT_STAT_IF_IN_ERRORS not found in gnmi_output") -def test_osbuild_version(duthosts, rand_one_dut_hostname, ptfhost, localhost): +def test_osbuild_version(duthosts, rand_one_dut_hostname, ptfhost, localhost, gnxi_path): """ Test osbuild/version query. """ duthost = duthosts[rand_one_dut_hostname] skip_201911_and_older(duthost) - cmd = generate_client_cli(duthost=duthost, method=METHOD_GET, target="OTHERS", xpath="osversion/build") + cmd = generate_client_cli(duthost=duthost, gnxi_path=gnxi_path, method=METHOD_GET, target="OTHERS", xpath="osversion/build") show_gnmi_out = ptfhost.shell(cmd)['stdout'] result = str(show_gnmi_out) assert_equal(len(re.findall('"build_version": "sonic\.', result)), 1, "build_version value at {0}".format(result)) assert_equal(len(re.findall('sonic\.NA', result, flags=re.IGNORECASE)), 0, "invalid build_version value at {0}".format(result)) -def test_sysuptime(duthosts, rand_one_dut_hostname, ptfhost, setup_streaming_telemetry, localhost): +def test_sysuptime(duthosts, rand_one_dut_hostname, ptfhost, localhost, gnxi_path): """ @summary: Run pyclient from ptfdocker and test the dataset 'system uptime' to check whether the value of 'system uptime' was float number and whether the value was @@ -191,7 +227,7 @@ def test_sysuptime(duthosts, rand_one_dut_hostname, ptfhost, setup_streaming_tel duthost = duthosts[rand_one_dut_hostname] skip_201911_and_older(duthost) dut_ip = duthost.mgmt_ip - cmd = 'python /gnxi/gnmi_cli_py/py_gnmicli.py -g -t {0} -p {1} -m get -x proc/uptime -xt OTHERS \ + cmd = 'python '+ gnxi_path + 'gnmi_cli_py/py_gnmicli.py -g -t {0} -p {1} -m get -x proc/uptime -xt OTHERS \ -o "ndastreamingservertest"'.format(dut_ip, TELEMETRY_PORT) system_uptime_info = ptfhost.shell(cmd)["stdout_lines"] system_uptime_1st = 0 @@ -226,14 +262,14 @@ def test_sysuptime(duthosts, rand_one_dut_hostname, ptfhost, setup_streaming_tel if system_uptime_2nd - system_uptime_1st < 10: pytest.fail("The value of system uptime was not updated correctly.") -def test_virtualdb_table_streaming(duthosts, rand_one_dut_hostname, ptfhost, localhost): +def test_virtualdb_table_streaming(duthosts, rand_one_dut_hostname, ptfhost, localhost, gnxi_path): """Run pyclient from ptfdocker to stream a virtual-db query multiple times. """ logger.info('start virtual db sample streaming testing') duthost = duthosts[rand_one_dut_hostname] skip_201911_and_older(duthost) - cmd = generate_client_cli(duthost=duthost, method=METHOD_SUBSCRIBE, update_count = 3) + cmd = generate_client_cli(duthost=duthost, gnxi_path=gnxi_path, method=METHOD_SUBSCRIBE, update_count = 3) show_gnmi_out = ptfhost.shell(cmd)['stdout'] result = str(show_gnmi_out) diff --git a/tests/templates/bfd_responder.conf.j2 b/tests/templates/bfd_responder.conf.j2 new file mode 100644 index 00000000000..d7e8b813c9e --- /dev/null +++ b/tests/templates/bfd_responder.conf.j2 @@ -0,0 +1,10 @@ +[program:bfd_responder] +command=/usr/bin/python /opt/bfd_responder.py {{ bfd_responder_args }} +process_name=bfd_responder +stdout_logfile=/tmp/bfd_responder.out.log +stderr_logfile=/tmp/bfd_responder.err.log +redirect_stderr=false +autostart=false +autorestart=true +startsecs=1 +numprocs=1 diff --git a/tests/test_posttest.py b/tests/test_posttest.py index baf6190e105..3154124f975 100644 --- a/tests/test_posttest.py +++ b/tests/test_posttest.py @@ -7,13 +7,17 @@ pytestmark = [ pytest.mark.posttest, - pytest.mark.topology('util'), + pytest.mark.topology('util', 'any'), pytest.mark.sanity_check(skip_sanity=True), - pytest.mark.disable_loganalyzer + pytest.mark.disable_loganalyzer, + pytest.mark.skip_check_dut_health ] -def test_collect_techsupport(duthosts, enum_dut_hostname): +def test_collect_techsupport(request, duthosts, enum_dut_hostname): + since = request.config.getoption("--posttest_show_tech_since") + if since == '': + since = 'yesterday' duthost = duthosts[enum_dut_hostname] """ A util for collecting techsupport after tests. @@ -21,12 +25,12 @@ def test_collect_techsupport(duthosts, enum_dut_hostname): Since nightly test on Jenkins will do a cleanup at the beginning of tests, we need a method to save history logs and dumps. This util does the job. """ - logger.info("Collecting techsupport since yesterday") + logger.info("Collecting techsupport since {}".format(since)) # Because Jenkins is configured to save artifacts from tests/logs, # and this util is mainly designed for running on Jenkins, # save path is fixed to logs for now. TECHSUPPORT_SAVE_PATH = 'logs/' - out = duthost.command("generate_dump -s yesterday", module_ignore_errors=True) + out = duthost.command("show techsupport --since {}".format(since), module_ignore_errors=True) if out['rc'] == 0: tar_file = out['stdout_lines'][-1] duthost.fetch(src=tar_file, dest=TECHSUPPORT_SAVE_PATH, flat=True) diff --git a/tests/test_pretest.py b/tests/test_pretest.py index df32a75c2bf..75e935d5b29 100644 --- a/tests/test_pretest.py +++ b/tests/test_pretest.py @@ -21,8 +21,9 @@ pytestmark = [ pytest.mark.pretest, - pytest.mark.topology('util'), - pytest.mark.disable_loganalyzer + pytest.mark.topology('util', 'any'), + pytest.mark.disable_loganalyzer, + pytest.mark.skip_check_dut_health ] @@ -199,6 +200,31 @@ def collect_dut_lossy_prio(dut): all_prio = collect_dut_all_prio(dut) return [p for p in all_prio if p not in lossless_prio] + +def collect_dut_pfc_pause_delay_params(dut): + """ + Retrieves a dictionary of pfc pause delay values for the headroom test + Args: + dut (Ansible host instance): device under test + Returns: + pfc_pause_delay_test_params: Mapped from pfc pause quanta to whether + the headroom test will fail or not + E.g. {1:True, 2:False, 3:False} + """ + platform = dut.facts['platform'] + pfc_pause_delay_test_params = {} + if 'cisco' and '8102' in platform.lower(): + pfc_pause_delay_test_params[0] = True + pfc_pause_delay_test_params[1023] = True + elif 'arista' and '7050cx3' in platform.lower(): + pfc_pause_delay_test_params[0] = True + pfc_pause_delay_test_params[1023] = True + else: + pfc_pause_delay_test_params = None + + return pfc_pause_delay_test_params + + def test_collect_testbed_prio(duthosts, tbinfo): all_prio = {} lossless_prio = {} @@ -226,6 +252,34 @@ def test_collect_testbed_prio(duthosts, tbinfo): except IOError as e: logger.warning('Unable to create file {}: {}'.format(filepath, e)) + +def test_collect_pfc_pause_delay_params(duthosts, tbinfo): + pfc_pause_delay_params = {} + + tbname = tbinfo['conf-name'] + pytest_require(tbname, "skip test due to lack of testbed name.") + + for dut in duthosts: + pfc_pause_delay_params_dut = collect_dut_pfc_pause_delay_params(dut) + if pfc_pause_delay_params_dut is None: + continue + else: + pfc_pause_delay_params[dut.hostname] = pfc_pause_delay_params_dut + + file_name = tbname + '.json' + folder = 'pfc_headroom_test_params' + + + filepath = os.path.join(folder, file_name) + try: + if not os.path.exists(folder): + os.mkdir(folder) + with open(filepath, 'w') as yf: + json.dump({ tbname : pfc_pause_delay_params}, yf, indent=4) + except IOError as e: + logger.warning('Unable to create file {}: {}'.format(filepath, e)) + + def test_update_saithrift_ptf(request, ptfhost): ''' Install the correct python saithrift package on the ptf @@ -235,22 +289,19 @@ def test_update_saithrift_ptf(request, ptfhost): pytest.skip("No URL specified for python saithrift package") pkg_name = py_saithrift_url.split("/")[-1] ptfhost.shell("rm -f {}".format(pkg_name)) - result = ptfhost.get_url(url=py_saithrift_url, dest="/root", module_ignore_errors=True) - if result["failed"] != False or "OK" not in result["msg"]: + result = ptfhost.get_url(url=py_saithrift_url, dest="/root", module_ignore_errors=True, timeout=60) + if result["failed"] or "OK" not in result["msg"]: pytest.skip("Download failed/error while installing python saithrift package") ptfhost.shell("dpkg -i {}".format(os.path.join("/root", pkg_name))) logging.info("Python saithrift package installed successfully") -def test_stop_pfcwd(duthosts, enum_dut_hostname, tbinfo): - ''' - Stop pfcwd on dual tor testbeds - ''' - if 'dualtor' not in tbinfo['topo']['name']: - pytest.skip("Skip this test on non dualTOR testbeds") - - dut = duthosts[enum_dut_hostname] - dut.command('pfcwd stop') +def test_generate_running_golden_config(duthosts): + """ + Generate running golden config after pre test. + """ + for duthost in duthosts: + duthost.shell("sonic-cfggen -d --print-data > /etc/sonic/running_golden_config.json") """ Separator for internal pretests. diff --git a/tests/testbed_setup/test_add_property_spytest_junit_xml.py b/tests/testbed_setup/test_add_property_spytest_junit_xml.py index f420087d014..22c00dde271 100644 --- a/tests/testbed_setup/test_add_property_spytest_junit_xml.py +++ b/tests/testbed_setup/test_add_property_spytest_junit_xml.py @@ -5,6 +5,10 @@ import xml.etree.ElementTree as ET +pytestmark = [ + pytest.mark.topology('util') +] + def build_properties(properties_dict): """Build global properties from passed key, value pairs.""" diff --git a/tests/testbed_setup/test_populate_fdb.py b/tests/testbed_setup/test_populate_fdb.py index 491d53e682d..58d6b308ead 100644 --- a/tests/testbed_setup/test_populate_fdb.py +++ b/tests/testbed_setup/test_populate_fdb.py @@ -3,7 +3,7 @@ from tests.common.fixtures.ptfhost_utils import copy_ptftests_directory # lgtm[py/unused-import] pytestmark = [ - pytest.mark.topology('t0') + pytest.mark.topology('t0', 'm0') ] def test_populate_fdb(populate_fdb): diff --git a/tests/upgrade_path/test_upgrade_path.py b/tests/upgrade_path/test_upgrade_path.py index e55f5fa9fea..e9d3374fc9a 100644 --- a/tests/upgrade_path/test_upgrade_path.py +++ b/tests/upgrade_path/test_upgrade_path.py @@ -22,7 +22,8 @@ pytestmark = [ pytest.mark.topology('any'), pytest.mark.sanity_check(skip_sanity=True), - pytest.mark.disable_loganalyzer + pytest.mark.disable_loganalyzer, + pytest.mark.skip_check_dut_health ] logger = logging.getLogger(__name__) diff --git a/tests/vlan/test_host_vlan.py b/tests/vlan/test_host_vlan.py index b9a28321313..43db57bb022 100644 --- a/tests/vlan/test_host_vlan.py +++ b/tests/vlan/test_host_vlan.py @@ -3,6 +3,7 @@ import random import time import tempfile +import json from scapy.all import sniff from ptf import testutils @@ -10,12 +11,12 @@ from tests.common.dualtor.mux_simulator_control import mux_server_url # lgtm[py/unused-import] from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_rand_selected_tor # lgtm[py/unused-import] from tests.common.utilities import is_ipv4_address -from tests.common.utilities import wait_until +from tests.common.utilities import wait_until, delete_running_config from tests.common.utilities import skip_release - +from tests.common.helpers.assertions import pytest_assert pytestmark = [ - pytest.mark.topology("t0") + pytest.mark.topology("t0", "m0") ] DUT_VLAN_INTF_MAC = "00:00:11:22:33:44" @@ -65,17 +66,32 @@ def verify_host_port_vlan_membership(duthosts, rand_one_dut_hostname, testbed_pa @pytest.fixture(scope="module") -def setup_host_vlan_intf_mac(duthosts, rand_one_dut_hostname, testbed_params, verify_host_port_vlan_membership): +def setup_host_vlan_intf_mac(duthosts, rand_one_dut_hostname, testbed_params, verify_host_port_vlan_membership, tbinfo): vlan_intf, _ = testbed_params duthost = duthosts[rand_one_dut_hostname] dut_vlan_mac = duthost.get_dut_iface_mac('%s' % vlan_intf["attachto"]) duthost.shell('redis-cli -n 4 hmset "VLAN|%s" mac %s' % (vlan_intf["attachto"], DUT_VLAN_INTF_MAC)) - wait_until(10, 2, 2, lambda: duthost.get_dut_iface_mac(vlan_intf["attachto"]) == DUT_VLAN_INTF_MAC) + pytest_assert(wait_until(10, 2, 2, lambda: duthost.get_dut_iface_mac(vlan_intf["attachto"]) == DUT_VLAN_INTF_MAC), + "Failed to set mac address for vlan interface %s" % vlan_intf["attachto"]) yield - + + # Restore the original mac address of the vlan interface duthost.shell('redis-cli -n 4 hmset "VLAN|%s" mac %s' % (vlan_intf["attachto"], dut_vlan_mac)) - wait_until(10, 2, 2, lambda: duthost.get_dut_iface_mac(vlan_intf["attachto"]) == dut_vlan_mac) + pytest_assert(wait_until(10, 2, 2, lambda: duthost.get_dut_iface_mac(vlan_intf["attachto"]) == dut_vlan_mac), + "Failed to restore mac address for vlan interface %s" % vlan_intf["attachto"]) + + if "dualtor" not in tbinfo["topo"]["name"]: + del_vlan_json = json.loads(""" + [{ + "VLAN":{ + "%s":{ + "mac": "%s" + } + } + }] + """ % (vlan_intf["attachto"], dut_vlan_mac)) + delete_running_config(del_vlan_json, duthost) def test_host_vlan_no_floodling( diff --git a/tests/vlan/test_vlan.py b/tests/vlan/test_vlan.py index 45aa598f2e5..6310b42193c 100644 --- a/tests/vlan/test_vlan.py +++ b/tests/vlan/test_vlan.py @@ -16,6 +16,7 @@ from tests.common.fixtures.duthost_utils import utils_create_test_vlans from tests.common.fixtures.duthost_utils import utils_vlan_intfs_dict_orig from tests.common.fixtures.duthost_utils import utils_vlan_intfs_dict_add +from tests.common.helpers.backend_acl import apply_acl_rules, bind_acl_table logger = logging.getLogger(__name__) @@ -44,14 +45,14 @@ def vlan_intfs_dict(tbinfo, utils_vlan_intfs_dict_orig): # Below ip prefix overlaps with 192.168.0.1/21, and need to skip: # 192.168.0.1/24, 192.168.1.1/24, 192.168.2.1/24, 192.168.3.1/24, # 192.168.4.1/24, 192.168.5.1/24, 192.168.6.1/24, 192.168.7.1/24 - if tbinfo['topo']['name'] != 't0-56-po2vlan': + if tbinfo['topo']['name'] not in ('t0-54-po2vlan', 't0-56-po2vlan'): vlan_intfs_dict = utils_vlan_intfs_dict_add(vlan_intfs_dict, 2) return vlan_intfs_dict @pytest.fixture(scope="module") def work_vlan_ports_list(rand_selected_dut, tbinfo, cfg_facts, ports_list, utils_vlan_ports_list, vlan_intfs_dict, pc_num=PORTCHANNELS_TEST_NUM): - if tbinfo['topo']['name'] == 't0-56-po2vlan': + if tbinfo['topo']['name'] in ('t0-54-po2vlan', 't0-56-po2vlan'): return utils_vlan_ports_list mg_facts = rand_selected_dut.get_extended_minigraph_facts(tbinfo) @@ -96,6 +97,27 @@ def work_vlan_ports_list(rand_selected_dut, tbinfo, cfg_facts, ports_list, utils return work_vlan_ports_list +@pytest.fixture(scope="module") +def acl_rule_cleanup(duthost, tbinfo): + """Cleanup all the existing DATAACL rules""" + # remove all rules under the ACL_RULE table + if "t0-backend" in tbinfo["topo"]["name"]: + duthost.shell('acl-loader delete') + + yield + +@pytest.fixture(scope="module") +def setup_acl_table(duthost, tbinfo, acl_rule_cleanup): + """ Remove the DATAACL table prior to the test and recreate it at the end""" + if "t0-backend" in tbinfo["topo"]["name"]: + duthost.command('config acl remove table DATAACL') + + yield + + if "t0-backend" in tbinfo["topo"]["name"]: + duthost.command('config acl remove table DATAACL') + # rebind with new set of ports + bind_acl_table(duthost, tbinfo) def shutdown_portchannels(duthost, portchannel_interfaces, pc_num=PORTCHANNELS_TEST_NUM): cmds = [] @@ -152,11 +174,11 @@ def startup_portchannels(duthost, portchannel_interfaces, pc_num=PORTCHANNELS_TE @pytest.fixture(scope="module", autouse=True) -def setup_vlan(duthosts, rand_one_dut_hostname, ptfadapter, tbinfo, work_vlan_ports_list, vlan_intfs_dict, cfg_facts): +def setup_vlan(duthosts, rand_one_dut_hostname, ptfadapter, tbinfo, work_vlan_ports_list, vlan_intfs_dict, cfg_facts, setup_acl_table): duthost = duthosts[rand_one_dut_hostname] # --------------------- Setup ----------------------- try: - if tbinfo['topo']['name'] != 't0-56-po2vlan': + if tbinfo['topo']['name'] not in ('t0-54-po2vlan', 't0-56-po2vlan'): portchannel_interfaces = cfg_facts.get('PORTCHANNEL_INTERFACE', {}) shutdown_portchannels(duthost, portchannel_interfaces) @@ -175,6 +197,8 @@ def setup_vlan(duthosts, rand_one_dut_hostname, ptfadapter, tbinfo, work_vlan_po logger.info('"show int portchannel" output on DUT:\n{}'.format(pprint.pformat(res['stdout_lines']))) populate_fdb(ptfadapter, work_vlan_ports_list, vlan_intfs_dict) + bind_acl_table(duthost, tbinfo) + apply_acl_rules(duthost, tbinfo) # --------------------- Testing ----------------------- yield # --------------------- Teardown ----------------------- @@ -186,10 +210,29 @@ def tearDown(duthost, tbinfo): logger.info("VLAN test ending ...") - if tbinfo['topo']['name'] != 't0-56-po2vlan': + if tbinfo['topo']['name'] not in ('t0-54-po2vlan', 't0-56-po2vlan'): config_reload(duthost) +@pytest.fixture(autouse=True) +def ignore_expected_loganalyzer_exceptions(duthosts, rand_one_dut_hostname, loganalyzer): + """ + Ignore expected errors in logs during test execution + + Args: + loganalyzer: Loganalyzer utility fixture + duthost: DUT host object + """ + duthost = duthosts[rand_one_dut_hostname] + if loganalyzer: + loganalyzer_ignore_regex = [ + ".*ERR swss#orchagent: :- update: Failed to get port by bridge port ID.*", + ] + loganalyzer[duthost.hostname].ignore_regex.extend(loganalyzer_ignore_regex) + + yield + + def build_icmp_packet(vlan_id, src_mac="00:22:00:00:00:02", dst_mac="ff:ff:ff:ff:ff:ff", src_ip="192.168.0.1", dst_ip="192.168.0.2", ttl=64): @@ -218,7 +261,7 @@ def build_qinq_packet(outer_vlan_id, vlan_id, return pkt -def verify_packets_with_portchannel(test, pkt, ports=[], portchannel_ports=[], device_number=0, timeout=1): +def verify_packets_with_portchannel(test, pkt, ports=[], portchannel_ports=[], device_number=0, timeout=5): for port in ports: result = testutils.dp_poll(test, device_number=device_number, port_number=port, timeout=timeout, exp_pkt=pkt) @@ -237,7 +280,7 @@ def verify_packets_with_portchannel(test, pkt, ports=[], portchannel_ports=[], d % (device_number, str(port_group))) -def verify_icmp_packets(ptfadapter, work_vlan_ports_list, vlan_port, vlan_id): +def verify_icmp_packets(ptfadapter, send_pkt, work_vlan_ports_list, vlan_port, vlan_id): untagged_pkt = build_icmp_packet(0) tagged_pkt = build_icmp_packet(vlan_id) untagged_dst_ports = [] @@ -264,6 +307,8 @@ def verify_icmp_packets(ptfadapter, work_vlan_ports_list, vlan_port, vlan_id): else: tagged_dst_ports += port["port_index"] + ptfadapter.dataplane.flush() + testutils.send(ptfadapter, vlan_port["port_index"][0], send_pkt) verify_packets_with_portchannel(test=ptfadapter, pkt=untagged_pkt, ports=untagged_dst_ports, @@ -275,6 +320,7 @@ def verify_icmp_packets(ptfadapter, work_vlan_ports_list, vlan_port, vlan_id): def verify_unicast_packets(ptfadapter, send_pkt, exp_pkt, src_port, dst_ports): + ptfadapter.dataplane.flush() testutils.send(ptfadapter, src_port, send_pkt) try: testutils.verify_packets_any(ptfadapter, exp_pkt, ports=dst_ports) @@ -311,8 +357,7 @@ def test_vlan_tc1_send_untagged(ptfadapter, work_vlan_ports_list, toggle_all_sim logger.info("Send untagged packet from {} ...".format(vlan_port["port_index"][0])) logger.info(pkt.sprintf("%Ether.src% %IP.src% -> %Ether.dst% %IP.dst%")) if vlan_port['pvid'] != 0: - testutils.send(ptfadapter, vlan_port["port_index"][0], pkt) - verify_icmp_packets(ptfadapter, work_vlan_ports_list, vlan_port, vlan_port["pvid"]) + verify_icmp_packets(ptfadapter, pkt, work_vlan_ports_list, vlan_port, vlan_port["pvid"]) else: exp_pkt = Mask(pkt) exp_pkt.set_do_not_care_scapy(scapy.Dot1Q, "vlan") @@ -340,8 +385,8 @@ def test_vlan_tc2_send_tagged(ptfadapter, work_vlan_ports_list, toggle_all_simul pkt = build_icmp_packet(permit_vlanid) logger.info("Send tagged({}) packet from {} ...".format(permit_vlanid, vlan_port["port_index"][0])) logger.info(pkt.sprintf("%Ether.src% %IP.src% -> %Ether.dst% %IP.dst%")) - testutils.send(ptfadapter, vlan_port["port_index"][0], pkt) - verify_icmp_packets(ptfadapter, work_vlan_ports_list, vlan_port, permit_vlanid) + + verify_icmp_packets(ptfadapter, pkt, work_vlan_ports_list, vlan_port, permit_vlanid) @pytest.mark.bsl diff --git a/tests/vlan/test_vlan_ping.py b/tests/vlan/test_vlan_ping.py new file mode 100644 index 00000000000..4117495b2b4 --- /dev/null +++ b/tests/vlan/test_vlan_ping.py @@ -0,0 +1,178 @@ +import random +import pytest +import ipaddress +import logging +import ptf.testutils as testutils +from tests.common.helpers.assertions import pytest_assert as py_assert +from tests.common.plugins import ptfadapter + +logger = logging.getLogger(__name__) + +pytestmark = [ + pytest.mark.topology('t0', 't0-52', 'm0') +] + + +def static_neighbor_entry(duthost, dic, oper, ip_version="both"): + """ + Performs addition or deletion of static entries of ipv4 and v6 neighbors in DUT based on 'oper' parameter + """ + for member in dic.itervalues(): + if ip_version == "4" or "both": + if oper == "add": + logger.debug("adding ipv4 static arp entry for ip %s on DUT" % (member['ipv4'])) + duthost.shell("sudo arp -s {0} {1}".format(member['ipv4'], member['mac'])) + + elif oper == "del": + logger.debug("deleting ipv4 static arp entry for ip %s on DUT" % (member['ipv4'])) + duthost.shell("sudo arp -d {0}".format(member['ipv4'])) + else: + logger.debug("unknown operation") + + elif ip_version == "6" or "both": + if oper == "add": + logger.debug("adding ipv6 static arp entry for ip %s on DUT" % (member['ipv6'])) + duthost.shell( + "sudo ip -6 neigh add {0} lladdr {1} dev Vlan{2}".format(member['ipv6'], member['mac'], + member['Vlanid'])) + elif oper == "del": + logger.debug("deleting ipv6 static arp entry for ip %s on DUT" % (member['ipv6'])) + duthost.shell("sudo ip -6 neigh del {0} lladdr {1} dev Vlan{2}".format(member['ipv6'], member['mac'], + member['Vlanid'])) + else: + logger.debug("unknown operation") + + else: + logger.debug("unknown IP version") + + +@pytest.fixture(scope='module') +def vlan_ping_setup(duthosts, rand_one_dut_hostname, ptfhost, nbrhosts, tbinfo): + """ + Setup: adds ipv4 and ipv6 address on ptf hosts and routes for VM + Teardown: deletes ipv4 and ipv6 address on ptf hosts and removes routes to VM. Also removes residual static arp entries from tests + """ + vm_host_info = {} + + vm_name, vm_info = None, None + topo_name = tbinfo["topo"]["name"] + for nbr_name, nbr_info in nbrhosts.items(): + if topo_name != "m0" or (topo_name == "m0" and "M1" in nbr_name): + vm_name = nbr_name + vm_info = nbr_info + break + + py_assert(vm_name is not None, "Can't get neighbor vm") + vm_ip_with_prefix = (vm_info['conf']['interfaces']['Port-Channel1']['ipv4']).decode('utf-8') + output = vm_info['host'].command("ip addr show dev po1") + vm_host_info["mac"] = output['stdout_lines'][1].split()[1] + vm_ip_intf = ipaddress.IPv4Interface(vm_ip_with_prefix).ip + vm_host_info["ipv4"] = vm_ip_intf + duthost = duthosts[rand_one_dut_hostname] + mg_facts = duthost.get_extended_minigraph_facts(tbinfo) + my_cfg_facts = duthost.config_facts(host=duthost.hostname, source="running")['ansible_facts'] + ptfhost_info = {} + for a_bgp_nbr in mg_facts['minigraph_bgp']: + # Get the bgp neighbor connected to the selected VM + if a_bgp_nbr['name'] == vm_name and a_bgp_nbr['addr'] == str(vm_host_info['ipv4']): + # Find the port channel that connects to the selected VM + for intf in mg_facts['minigraph_portchannel_interfaces']: + if intf['peer_addr'] == str(vm_host_info['ipv4']): + portchannel = intf['attachto'] + vm_host_info['port_index'] = mg_facts['minigraph_ptf_indices'][mg_facts['minigraph_portchannels'][portchannel]['members'][0]] + break + break + + # getting the ipv4, ipv6 and vlan id of a vlan in DUT with 2 or more vlan members + for k, v in my_cfg_facts['VLAN'].items(): + vlanid = v['vlanid'] + if len(my_cfg_facts['VLAN_MEMBER']['Vlan' + vlanid]) >= 2: + for addr in my_cfg_facts['VLAN_INTERFACE']['Vlan' + vlanid]: + if addr.find(':') == -1: + ip4 = addr + else: + ip6 = addr + break # need only 1 vlan details + else: + continue + + # ip prefixes of the vlan + vlan_ip_address_v4 = ipaddress.IPv4Interface(ip4).ip + vlan_ip_network_v4 = ipaddress.IPv4Interface(ip4).network + + # selecting 2 random vlan members of DUT + # Remove portchannel in vlan member list + filter_vlan_member_list = [member for member in my_cfg_facts['VLAN_MEMBER']['Vlan' + vlanid].keys() if member in mg_facts['minigraph_ptf_indices']] + rand_vlan_member_list = random.sample(filter_vlan_member_list, 2) + exclude_ip = [] + exclude_ip.extend( + [ipaddress.IPv4Interface(ip4).network.network_address, ipaddress.IPv4Interface(ip4).network.broadcast_address, + vlan_ip_address_v4] + ) + + # getting port index, mac, ipv4 and ipv6 of ptf ports into a dict + ips_in_vlan = [x for x in vlan_ip_network_v4 if x not in exclude_ip] + for member in rand_vlan_member_list: + # Get first and last ip in vlan for two vlan members + ip_in_vlan = ips_in_vlan[0 if len(ptfhost_info.keys()) == 0 else -1] + ptfhost_info[member] = {} + ptfhost_info[member]["Vlanid"] = vlanid + ptfhost_info[member]["port_index"] = mg_facts['minigraph_ptf_indices'][member] + ptfhost_info[member]["mac"] = (ptfhost.shell( + "ifconfig eth%d | grep -o -E '([[:xdigit:]]{1,2}:){5}[[:xdigit:]]{1,2}'" % ptfhost_info[member][ + "port_index"]))['stdout'] + ptfhost_info[member]["ipv4"] = str(ip_in_vlan) + ptfhost_info[member]["ipv6"] = str( + ipaddress.IPv6Interface(ip6).network[ptfhost_info[member]["port_index"]]) + + return vm_host_info, ptfhost_info + + +def verify_icmp_packet(dut_mac, src_port, dst_port, ptfadapter): + pkt = testutils.simple_icmp_packet(eth_src=str(src_port['mac']), + eth_dst=str(dut_mac), + ip_src=str(src_port['ipv4']), + ip_dst=str(dst_port['ipv4']), ip_ttl=64) + exptd_pkt = testutils.simple_icmp_packet(eth_src=str(dut_mac), + eth_dst=str(dst_port['mac']), + ip_src=str(src_port['ipv4']), + ip_dst=str(dst_port['ipv4']), ip_ttl=63) + for i in range(5): + testutils.send_packet(ptfadapter, src_port['port_index'], pkt) + testutils.verify_packet(ptfadapter, exptd_pkt, dst_port['port_index']) + + +def test_vlan_ping(vlan_ping_setup, duthosts, rand_one_dut_hostname, ptfhost, nbrhosts, ptfadapter): + """ + test for checking connectivity of statically added ipv4 and ipv6 arp entries + """ + duthost = duthosts[rand_one_dut_hostname] + vmhost_info, ptfhost_info = vlan_ping_setup + device2 = dict(list(ptfhost_info.items())[1:]) + device1 = dict(list(ptfhost_info.items())[:1]) + + # initial setup and checking connectivity, try to break in more chunks + logger.info("initializing setup for ipv4 and ipv6") + static_neighbor_entry(duthost, ptfhost_info, "add") + logger.info("Checking connectivity to ptf ports") + for member in ptfhost_info: + verify_icmp_packet(duthost.facts['router_mac'], vmhost_info, ptfhost_info[member], ptfadapter) + verify_icmp_packet(duthost.facts['router_mac'], ptfhost_info[member], vmhost_info, ptfadapter) + + # flushing and re-adding ipv6 static arp entry + static_neighbor_entry(duthost, ptfhost_info, "del", "6") + static_neighbor_entry(duthost, dict(reversed(ptfhost_info.items())), "add", "6") + + # flushing and re-adding ipv4 static arp entry for 2nd ptf host + static_neighbor_entry(duthost, device2, "del", "4") + static_neighbor_entry(duthost, device2, "add", "4") + + # flushing and re-adding ipv4 static arp entry for 1st ptf host + static_neighbor_entry(duthost, device1, "del", "4") + static_neighbor_entry(duthost, device1, "add", "4") + + # Checking for connectivity + logger.info("Check connectivity to both ptfhost") + for member in ptfhost_info: + verify_icmp_packet(duthost.facts['router_mac'], vmhost_info, ptfhost_info[member], ptfadapter) + verify_icmp_packet(duthost.facts['router_mac'], ptfhost_info[member], vmhost_info, ptfadapter) diff --git a/tests/voq/voq_helpers.py b/tests/voq/voq_helpers.py index eefde2f378a..5bdd58a96db 100644 --- a/tests/voq/voq_helpers.py +++ b/tests/voq/voq_helpers.py @@ -185,7 +185,8 @@ def check_no_routes_from_nexthop(asic, nexthop): ver = '-6' else: ver = '-4' - cmd = "ip {} route show | grep -w {} | wc -l".format(ver, nexthop) + special_nexthop = nexthop.replace('.', '\\\.') + cmd = "ip {} route show | grep -w {} | wc -l".format(ver, special_nexthop) if asic.namespace is not None: fullcmd = "sudo ip netns exec {} {}".format(asic.namespace, cmd) output = asic.sonichost.shell(fullcmd) diff --git a/tests/vtestbed.yaml b/tests/vtestbed.yaml new file mode 120000 index 00000000000..ac97f82fca2 --- /dev/null +++ b/tests/vtestbed.yaml @@ -0,0 +1 @@ +../ansible/vtestbed.yaml \ No newline at end of file diff --git a/tests/vxlan/conftest.py b/tests/vxlan/conftest.py index c319667b973..f32b28d9a7c 100644 --- a/tests/vxlan/conftest.py +++ b/tests/vxlan/conftest.py @@ -1,3 +1,40 @@ +import argparse +from os.path import join +import pytest +import logging +import yaml +from tests.vxlan.vnet_utils import ( + safe_open_template, + combine_dicts + ) +from tests.vxlan.vnet_constants import ( + NUM_VNET_KEY, + NUM_ROUTES_KEY, + NUM_ENDPOINTS_KEY, + VXLAN_UDP_SPORT_KEY, + VXLAN_UDP_SPORT_MASK_KEY, + VXLAN_RANGE_ENABLE_KEY, + IPV6_VXLAN_TEST_KEY, + CLEANUP_KEY, + APPLY_NEW_CONFIG_KEY, + NUM_INTF_PER_VNET_KEY, + TEMPLATE_DIR +) + +logger = logging.getLogger(__name__) + + +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + + def pytest_addoption(parser): """ Adds pytest options that are used by VxLAN tests @@ -10,7 +47,8 @@ def pytest_addoption(parser): action="store", default=4789, type=int, - help="The UDP port to use for VxLAN. It must be a viable UDP port - not one of the already used standard protocol ports" + help="The UDP port to use for VxLAN. It must be a viable UDP port " + "- not one of the already used standard protocol ports" ) vxlan_group.addoption( @@ -45,10 +83,44 @@ def pytest_addoption(parser): help="number of VLAN interfaces per VNET" ) + vxlan_group.addoption( + "--ipv4_in_ipv4", + action="store", + default=True, + type=str2bool, + help="Test IPv4 in IPv4" + ) + vxlan_group.addoption( "--ipv6_vxlan_test", - action="store_true", - help="Use IPV6 for VxLAN test" + action="store", + default=False, + type=str2bool, + help="Test IPV6 encap" + ) + + vxlan_group.addoption( + "--ipv6_in_ipv4", + action="store", + default=True, + type=str2bool, + help="Test IPV6 in IPv4" + ) + + vxlan_group.addoption( + "--ipv4_in_ipv6", + action="store", + default=True, + type=str2bool, + help="Test IPv4 in IPv6" + ) + + vxlan_group.addoption( + "--ipv6_in_ipv6", + action="store", + type=str2bool, + default=True, + help="Test IPV6 in IPv6" ) vxlan_group.addoption( @@ -77,6 +149,15 @@ def pytest_addoption(parser): help="Expected base VXLAN UDP src port mask" ) + # BFD options + vxlan_group.addoption( + "--bfd", + action="store", + default=True, + type=bool, + help="BFD Status" + ) + # ECMP options vxlan_group.addoption( "--total_number_of_endpoints", @@ -91,13 +172,16 @@ def pytest_addoption(parser): action="store", default=1, type=int, - help="ECMP: Number of tunnel endpoints to provide for each tunnel destination" + help="ECMP: Number of tunnel endpoints to provide for each tunnel" + " destination" ) vxlan_group.addoption( "--debug_enabled", action="store_true", - help="Enable debugging the script. The config file names will *not* be time-stamped, every run of the script will over-write the previously created config files." + help="Enable debugging the script. The config file names will " + "*not* be time-stamped. Every run of the script will over-write " + "the previously created config files." ) vxlan_group.addoption( @@ -110,14 +194,156 @@ def pytest_addoption(parser): "--dut_hostid", default=1, type=int, - help="This is the host part of the IP addresses for interfaces in the DUT to be used in this script." + help="This is the host part of the IP addresses for interfaces in " + "the DUT to be used in this script." ) # This will decide the number of destinations. vxlan_group.addoption( "--total_number_of_nexthops", action="store", - default=2, # Max: 32k, 64K, or 128 K + default=2, # Max: 32k, 64K, or 128 K + type=int, + help="ECMP: Number of tunnel nexthops to be tested. (number of " + "nhs_per_destination X number_of_destinations)" + ) + + vxlan_group.addoption( + "--include_long_tests", + action="store", + default=False, + type=bool, + help="Run the long-running testcases." + ) + + vxlan_group.addoption( + "--crm_num_nexthops", + action="store", + default=4096, type=int, - help="ECMP: Number of tunnel nexthops to be tested. (number of nhs_per_destination X number_of_destinations)" + help="CRM:Number of available pool of nexthops." ) + + vxlan_group.addoption( + "--crm_num_nexthop_groups", + action="store", + default=512, + type=int, + help="CRM:Number of Vnet nexthop groups." + ) + + vxlan_group.addoption( + "--crm_num_nexthop_group_members", + action="store", + default=1024, + type=int, + help="CRM:Number of Vnet nexthop group members" + "(number of repeated addresses to use across all the routes)." + ) + + +@pytest.fixture(scope="module") +def scaled_vnet_params(request): + """ + Fixture to get CLI parameters for scaled vnet testing + Args: + request: Pytest fixture containing parsed CLI parameters + Returns: + A dictionary holding each scaled vnet parameter with the parameter + name as the key. + * num_vnet + * num_routes + * num_endpoints + """ + + params = {} + params[NUM_VNET_KEY] = request.config.option.num_vnet + params[NUM_ROUTES_KEY] = request.config.option.num_routes + params[NUM_ENDPOINTS_KEY] = request.config.option.num_endpoints + return params + + +@pytest.fixture(scope="module") +def vnet_test_params(duthost, request): + """ + Fixture to get CLI parameters for vnet testing + Args: + request: Pytest fixture containing parsed CLI parameters + Returns: + A dictionary holding each parameter with the parameter name as the key + * ipv6_vxlan_test - whether to include ipv6 functionality + in testing + * cleanup - whether to remove test data/configs after test is + finished + * apply_new_config - whether to apply new configurations that were + pushed to the DUT + """ + + params = {} + params[VXLAN_UDP_SPORT_KEY] = 0 + params[VXLAN_UDP_SPORT_MASK_KEY] = 0 + + vxlan_range_enable = duthost.shell( + 'redis-cli -n 4 hget "DEVICE_METADATA|localhost" \ + vxlan_port_range')['stdout'] == "enable" + + if request.config.option.udp_src_port is not None or \ + request.config.option.udp_src_port_mask is not None: + vxlan_range_enable = True + + if request.config.option.udp_src_port: + params[VXLAN_UDP_SPORT_KEY] = request.config.option.udp_src_port + + if request.config.option.udp_src_port_mask: + params[VXLAN_UDP_SPORT_MASK_KEY] = \ + request.config.option.udp_src_port_mask + + params[VXLAN_RANGE_ENABLE_KEY] = vxlan_range_enable + params[IPV6_VXLAN_TEST_KEY] = request.config.option.ipv6_vxlan_test + params[CLEANUP_KEY] = not request.config.option.skip_cleanup + params[APPLY_NEW_CONFIG_KEY] = not request.config.option.skip_apply_config + params[NUM_INTF_PER_VNET_KEY] = request.config.option.num_intf_per_vnet + return params + + +@pytest.fixture(scope="module") +def minigraph_facts(duthosts, rand_one_dut_hostname, tbinfo): + """ + Fixture to get minigraph facts + Args: + duthost: DUT host object + Returns: + Dictionary containing minigraph information + """ + duthost = duthosts[rand_one_dut_hostname] + + return duthost.get_extended_minigraph_facts(tbinfo) + + +@pytest.fixture(scope="module") +def vnet_config(minigraph_facts, vnet_test_params, scaled_vnet_params): + """ + Fixture to generate vnet configuration from templates/vnet_config.j2 + Args: + minigraph_facts: minigraph information/facts + vnet_test_params: Dictionary holding vnet test parameters + scaled_vnet_params: Dictionary holding scaled vnet testing parameters + Returns: + A dictionary containing the generated vnet configuration information + """ + + num_rifs = vnet_test_params[NUM_INTF_PER_VNET_KEY] * \ + scaled_vnet_params[NUM_VNET_KEY] + + if num_rifs > 128: + logger.warning( + "Total number of configured interfaces will be greater" + "than 128. This is not a supported test scenario") + + combined_args = combine_dicts( + minigraph_facts, + vnet_test_params, + scaled_vnet_params) + return yaml.safe_load( + safe_open_template( + join(TEMPLATE_DIR, "vnet_config.j2")).render(combined_args)) diff --git a/tests/vxlan/test_vnet_vxlan.py b/tests/vxlan/test_vnet_vxlan.py index 129f3a696fc..d7186a4c63e 100644 --- a/tests/vxlan/test_vnet_vxlan.py +++ b/tests/vxlan/test_vnet_vxlan.py @@ -1,10 +1,13 @@ import json import logging +import re import pytest from datetime import datetime +from tests.common.helpers.assertions import pytest_assert +from tests.common.utilities import wait_until from tests.ptf_runner import ptf_runner -from vnet_constants import CLEANUP_KEY, VXLAN_UDP_SPORT_KEY, VXLAN_UDP_SPORT_MASK_KEY, VXLAN_RANGE_ENABLE_KEY +from vnet_constants import CLEANUP_KEY, VXLAN_UDP_SPORT_KEY, VXLAN_UDP_SPORT_MASK_KEY, VXLAN_RANGE_ENABLE_KEY, DUT_VNET_NBR_JSON from vnet_utils import generate_dut_config_files, safe_open_template, \ apply_dut_config_files, cleanup_dut_vnets, cleanup_vxlan_tunnels, cleanup_vnet_routes @@ -14,16 +17,25 @@ import tests.arp.test_wr_arp as test_wr_arp +from tests.common.config_reload import config_reload + logger = logging.getLogger(__name__) pytestmark = [ pytest.mark.topology("t0"), - pytest.mark.sanity_check(post_check=True), - pytest.mark.asic("mellanox") + pytest.mark.disable_loganalyzer ] vlan_tagging_mode = "" +@pytest.fixture(scope='module', autouse=True) +def load_minigraph_after_test(rand_selected_dut): + """ + Restore config_db as vnet with wram-reboot will write testing config into + config_db.json + """ + yield + config_reload(rand_selected_dut, config_source='minigraph') def prepare_ptf(ptfhost, mg_facts, dut_facts, vnet_config): """ @@ -88,7 +100,7 @@ def setup(duthosts, rand_one_dut_hostname, ptfhost, minigraph_facts, vnet_config return minigraph_facts @pytest.fixture(params=["Disabled", "Enabled", "WR_ARP", "Cleanup"]) -def vxlan_status(setup, request, duthosts, rand_one_dut_hostname, ptfhost, vnet_test_params, vnet_config, creds): +def vxlan_status(setup, request, duthosts, rand_one_dut_hostname, ptfhost, vnet_test_params, vnet_config, creds, tbinfo): """ Paramterized fixture that tests the Disabled, Enabled, and Cleanup configs for VxLAN @@ -117,7 +129,8 @@ def vxlan_status(setup, request, duthosts, rand_one_dut_hostname, ptfhost, vnet_ duthost.shell("redis-cli -n 4 del \"VLAN_MEMBER|{}|{}\"".format(attached_vlan, vlan_member)) apply_dut_config_files(duthost, vnet_test_params) - + # Check arp table status in a loop with delay. + pytest_assert(wait_until(120, 20, 10, is_neigh_reachable, duthost, vnet_config), "Neighbor is unreachable") vxlan_enabled = True elif request.param == "Cleanup" and vnet_test_params[CLEANUP_KEY]: if vlan_tagging_mode != "": @@ -129,10 +142,33 @@ def vxlan_status(setup, request, duthosts, rand_one_dut_hostname, ptfhost, vnet_ cleanup_vxlan_tunnels(duthost, vnet_test_params) elif request.param == "WR_ARP": testWrArp = test_wr_arp.TestWrArp() - test_wr_arp.TestWrArp.testWrArp(testWrArp, request, duthost, ptfhost, creds) + testWrArp.Setup(duthost, ptfhost, tbinfo) + try: + test_wr_arp.TestWrArp.testWrArp(testWrArp, request, duthost, ptfhost, creds) + finally: + testWrArp.Teardown(duthost) return vxlan_enabled, request.param + +def is_neigh_reachable(duthost, vnet_config): + expected_neigh_list = vnet_config["vnet_nbr_list"] + ip_neigh_cmd_output = duthost.shell("sudo ip -4 neigh")['stdout'] + for exp_neigh in expected_neigh_list: + if exp_neigh["ifname"].startswith("Vlan"): + regexp = '{}.*{}.*?REACHABLE'.format(exp_neigh["ip"], exp_neigh["ifname"]) + if re.search(regexp, ip_neigh_cmd_output): + logger.info('Neigh {} {} is reachable'.format(exp_neigh["ip"], exp_neigh["ifname"])) + else: + logger.error('Neigh {} {} is not reachable'.format(exp_neigh["ip"], exp_neigh["ifname"])) + logger.info("Reapplying config {}".format(DUT_VNET_NBR_JSON)) + duthost.shell("sudo config load {} -y".format(DUT_VNET_NBR_JSON)) + return False + else: + logger.warning('Neighbor expected but not found: {} {}'.format(exp_neigh["ip"], exp_neigh["ifname"])) + return True + + def test_vnet_vxlan(setup, vxlan_status, duthosts, rand_one_dut_hostname, ptfhost, vnet_test_params, creds): """ Test case for VNET VxLAN diff --git a/tests/vxlan/test_vxlan_crm.py b/tests/vxlan/test_vxlan_crm.py new file mode 100644 index 00000000000..771f479acff --- /dev/null +++ b/tests/vxlan/test_vxlan_crm.py @@ -0,0 +1,220 @@ +import logging +import pytest +import ipaddress +from functools import reduce + +from tests.common.helpers.assertions import pytest_assert +from tests.common.fixtures.ptfhost_utils \ + import copy_ptftests_directory # noqa: F401 +from tests.vxlan.vxlan_ecmp_utils import Ecmp_Utils +from tests.vxlan.test_vxlan_ecmp import ( # noqa: F401 + Test_VxLAN, + fixture_setUp, + fixture_encap_type) + +Logger = logging.getLogger(__name__) +ecmp_utils = Ecmp_Utils() + + +def uniq(lst): + last = object() + for item in sorted(lst): + if item == last: + continue + yield item + last = item + + +def sort_and_deduplicate(list_of_entries): + return list(uniq(sorted(list_of_entries, reverse=True))) + + +def unique_in_list(list1): + return (reduce(lambda re, x: re+[x] if x not in re else re, list1, [])) + + +@pytest.fixture(name="argument_setup", scope="module") +def _fixture_argument_setup(request): + + request.config.option.total_number_of_endpoints =\ + request.config.option.crm_num_nexthops + + request.config.option.total_number_of_nexthops =\ + request.config.option.crm_num_nexthop_group_members + + request.config.option.ecmp_nhs_per_destination =\ + (request.config.option.crm_num_nexthop_group_members / + request.config.option.crm_num_nexthop_groups) + + if request.config.option.ecmp_nhs_per_destination <= 1: + raise RuntimeError( + "This config will not raise the number of ECMP groups," + " pls change the commandline arguments." + "crm_num_nexthop_group_members/crm_num_nexthop_groups " + "must be more than 1") + + +@pytest.fixture(name="setup_neighbors", scope="module") +def fixture_setup_neighbors(setUp, encap_type, minigraph_facts): + duthost = setUp['duthost'] + a_family = Ecmp_Utils.get_outer_layer_version(encap_type) + t2_neighbors = Ecmp_Utils.get_all_interfaces_running_bgp( + duthost, + minigraph_facts, + "T2") + + IP_TYPE = { + 'v4': ipaddress.IPv4Address, + 'v6': ipaddress.IPv6Address + } + intf = None + for addr in t2_neighbors.keys(): + if isinstance(ipaddress.ip_address(addr), IP_TYPE[a_family]): + intf = t2_neighbors[addr].keys()[0] + break + if not intf: + raise RuntimeError( + "Couldn't find an interface to use " + "for encap_type:{}".format(encap_type)) + + if a_family == "v4": + duthost.shell( + "sudo config interface ip add {} 200.0.0.1/16".format(intf)) + for count in range(200): + duthost.shell( + "sudo arp -s 200.0.{}.2 0a:bb:cc:dd:ee:ff".format(count)) + else: + duthost.shell( + "sudo config interface ip add {} DDDD::200:0:0:1/64".format(intf)) + for count in range(200): + duthost.shell( + "sudo ip -6 neigh add DDDD::200:0:{}:2 " + "lladdr 00:11:22:33:44:55 dev {}".format(count, intf)) + + # We have setup 201 neighbors so far. + yield 201 + + if a_family == "v4": + for count in range(200): + duthost.shell("sudo arp -d 200.0.{}.2".format(count)) + duthost.shell( + "sudo config interface ip remove {} 200.0.0.1/16".format(intf)) + else: + for count in range(200): + duthost.shell( + "sudo ip -6 neigh del DDDD::200:0:{}:2 " + "lladdr 00:11:22:33:44:55 dev {}".format(count, intf)) + duthost.shell( + "sudo config interface ip remove {} DDDD::200:0:0:1/64".format( + intf)) + + +class Test_VxLAN_Crm(Test_VxLAN): + ''' + Class for all testcases that verify Critical Resource Monitoring + counters. + ''' + # CRM tolerance + tolerance = 0.90 + + def crm_assert(self, crm_output, resource_name, required_increase): + ''' + Helper function to verify the usage went up as per + requirement. + ''' + pytest_assert( + crm_output[resource_name]['used'] >= + self.setup['crm'][resource_name]['used'] + + self.tolerance * required_increase, + "CRM:{} usage didn't increase as needed:old:{}, " + "new:{}, diff:{}, expected_diff:{}".format( + resource_name, + self.setup['crm'][resource_name]['used'], + crm_output[resource_name]['used'], + (self.setup['crm'][resource_name]['used'] - + crm_output[resource_name]['used']), + required_increase)) + + def test_crm_16k_routes(self, setUp, encap_type, setup_neighbors): + ''' + Verify that the CRM counter values for ipv4_route, ipv4_nexthop, + ipv6_route and ipv6_nexthop are updated as per the vxlan route + configs. + ''' + self.setup = setUp + outer_layer_version = ecmp_utils.get_outer_layer_version(encap_type) + + number_of_routes_configured = 0 + set_of_unique_endpoints = set() + + for vnet in self.setup[encap_type]['dest_to_nh_map'].keys(): + number_of_routes_configured += \ + len(self.setup[encap_type]['dest_to_nh_map'][vnet].keys()) + + dest_to_nh_map = self.setup[encap_type]['dest_to_nh_map'][vnet] + for _, nexthops in dest_to_nh_map.items(): + set_of_unique_endpoints = \ + set_of_unique_endpoints | set(nexthops) + + crm_output = \ + self.setup['duthost'].get_crm_resources()['main_resources'] + + self.crm_assert( + crm_output, + 'ip{}_route'.format(outer_layer_version), + number_of_routes_configured) + self.crm_assert( + crm_output, + 'ip{}_nexthop'.format(outer_layer_version), + setup_neighbors) + + def nexthop_group_helper(self, encap_type): + # number of nexthop groups configured: + # = number of unique-looking list of nexthops. + # if destA:[nhA,nhB], and destB:[nhB,nhA], we have 1 nexthop group. + list_of_nexthop_groups = set() + for vnet in self.setup[encap_type]['dest_to_nh_map'].keys(): + dest_to_nh_map = self.setup[encap_type]['dest_to_nh_map'][vnet] + list_of_nexthop_groups = list_of_nexthop_groups | \ + set(tuple(i) for i in unique_in_list( + sort_and_deduplicate(dest_to_nh_map.values()))) + + number_of_nh_groups = 0 + number_of_nh_group_members = 0 + for nhg in list_of_nexthop_groups: + if len(nhg) > 1: + number_of_nh_groups += 1 + number_of_nh_group_members += len(nhg) + return (number_of_nh_groups, number_of_nh_group_members) + + def test_crm_512_nexthop_groups(self, setUp, encap_type): + ''' + Verify that the CRM counter values for nexthop_group is updated as + per the vxlan route configs. + ''' + self.setup = setUp + Logger.info("Verifying encap_type:%s", encap_type) + crm_output = \ + self.setup['duthost'].get_crm_resources()['main_resources'] + (number_of_nh_groups, number_of_group_members) = \ + self.nexthop_group_helper(encap_type) + self.crm_assert( + crm_output, + 'nexthop_group', + number_of_nh_groups) + + def test_crm_128_group_members(self, setUp, encap_type): + ''' + Verify that the CRM counter values for nexthop_group_member + is updated as per the vxlan route configs. + ''' + self.setup = setUp + Logger.info("Verifying encap_type:%s", encap_type) + crm_output = \ + self.setup['duthost'].get_crm_resources()['main_resources'] + (number_of_nh_groups, number_of_group_members) = \ + self.nexthop_group_helper(encap_type) + self.crm_assert( + crm_output, + 'nexthop_group_member', + number_of_group_members) diff --git a/tests/vxlan/test_vxlan_decap.py b/tests/vxlan/test_vxlan_decap.py index e299111a979..a7cc06e3cb3 100644 --- a/tests/vxlan/test_vxlan_decap.py +++ b/tests/vxlan/test_vxlan_decap.py @@ -137,8 +137,6 @@ def setup(duthosts, rand_one_dut_hostname, ptfhost, tbinfo): @pytest.fixture(params=["NoVxLAN", "Enabled", "Removed"]) def vxlan_status(setup, request, duthosts, rand_one_dut_hostname): duthost = duthosts[rand_one_dut_hostname] - #clear FDB and arp cache on DUT - duthost.shell('sonic-clear arp; fdbclear') if request.param == "Enabled": duthost.shell("sonic-cfggen -j /tmp/vxlan_db.tunnel.json --write-to-db") duthost.shell("sonic-cfggen -j /tmp/vxlan_db.maps.json --write-to-db") @@ -149,6 +147,8 @@ def vxlan_status(setup, request, duthosts, rand_one_dut_hostname): duthost.shell('docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL|tunnelVxlan"') return False, request.param else: + #clear FDB and arp cache on DUT + duthost.shell('sonic-clear arp; fdbclear') return False, request.param diff --git a/tests/vxlan/test_vxlan_ecmp.py b/tests/vxlan/test_vxlan_ecmp.py index de5fc1f53b9..537592d5fb1 100644 --- a/tests/vxlan/test_vxlan_ecmp.py +++ b/tests/vxlan/test_vxlan_ecmp.py @@ -2,509 +2,197 @@ ''' Script to automate the cases listed in VxLAN HLD document: - https://github.com/Azure/SONiC/blob/8ca1ac93c8912fda7b09de9bfd51498e5038c292/doc/vxlan/Overlay%20ECMP%20with%20BFD.md#test-cases + https://github.com/sonic-net/SONiC/blob/8ca1ac93c8912fda7b09de9bfd51498e5038c292/doc/vxlan/Overlay%20ECMP%20with%20BFD.md#test-cases To test functionality: - ./run_tests.sh -n ucs-m5-2 -d mth64-m5-2 -O -u -e -s -e --disable_loganalyzer -m individual -p /home/vxr/vxlan/logs/ -c 'vxlan/test_vxlan_ecmp.py' + ./run_tests.sh -n ucs-m5-2 -d mth64-m5-2 -O -u -e -s \ + -m individual -p /home/vxr/vxlan/logs/ -c 'vxlan/test_vxlan_ecmp.py' To test ECMP with 2 paths per destination: - ./run_tests.sh -n ucs-m5-2 -d mth64-m5-2 -O -u -e -s -e --disable_loganalyzer -m individual -p /home/vxr/vxlan/logs/ -c 'vxlan/test_vxlan_ecmp.py' -e '--nhs_per_destination=2' - - To test ECMP+Scale: - ./run_tests.sh -n ucs-m5-2 -d mth64-m5-2 -O -u -e -s -e --disable_loganalyzer -m individual -p /home/vxr/vxlan/logs/ -c 'vxlan/test_vxlan_ecmp.py' \ - -e '--ecmp_nhs_per_destination=128' -e '--total_number_of_nexthops=128000' + ./run_tests.sh -n ucs-m5-2 -d mth64-m5-2 -O -u -e -s -m individual \ + -p /home/vxr/vxlan/logs/ -c 'vxlan/test_vxlan_ecmp.py' \ + -e '--nhs_per_destination=2' + + To test ECMP+Scale(for all 4 types of encap): + ./run_tests.sh -n ucs-m5-2 -d mth64-m5-2 -O -u -e -s -m individual \ + -p /home/vxr/vxlan/logs/ \ + -c 'vxlan/test_vxlan_ecmp.py::Test_VxLAN_route_tests::\ + test_vxlan_single_endpoint' \ + -e '--ecmp_nhs_per_destination=128 --total_number_of_nexthops=32000' \ + -e '--total_number_of_endpoints=1024' To keep the temporary config files created in the DUT: - ./run_tests.sh -n ucs-m5-2 -d mth64-m5-2 -O -u -e -s -e --keep_temp_files -c 'vxlan/test_vxlan_ecmp.py' + ./run_tests.sh -n ucs-m5-2 -d mth64-m5-2 -O -u -e -s -e --keep_temp_files \ + -c 'vxlan/test_vxlan_ecmp.py' Other options: - keep_temp_files : Keep the temporary files created in the DUT. Default: False - debug_enabled : Enable debug mode, for debugging script. The temp files will not have timestamped names. Default: False - dut_hostid : An integer in the range of 1 - 100 to be used as the host part of the IP address for DUT. Default: 1 + keep_temp_files : Keep the temporary files created in the + DUT. Default: False + debug_enabled : Enable debug mode, for debugging + script. The temp files will + not have timestamped names. + Default: False + dut_hostid : An integer in the range of 1 - 100 to be + used as the host + part of the IP address for DUT. Default:1 ecmp_nhs_per_destination : Number of ECMP next-hops per destination. - total_number_of_endpoints : Number of Endpoints (a pool of this number of ip addresses will used for next-hops). - total_number_of_nexthops : Maximum number of all nexthops for every destination combined(per encap_type). - vxlan_port : Global vxlan port (UDP port) to be used for the DUT. Default: 4789 + total_number_of_endpoints : Number of Endpoints (a pool of this + number of ip addresses will used for + next-hops). Default:2 + total_number_of_nexthops : Maximum number of all nexthops for every + destination combined(per encap_type). + vxlan_port : Global vxlan port (UDP port) to be used + for the DUT. Default: 4789 + bfd : Set it to True if you want to run all + VXLAN cases with BFD Default: False + include_long_tests : Include the entropy, random-hash + testcases, that take longer time. + Default: False ''' import time -import re -import ipaddress -import json import logging from datetime import datetime -from sys import getsizeof - +import json +import re import pytest +import copy -from tests.common.fixtures.ptfhost_utils import copy_ptftests_directory # lgtm[py/unused-import] +from tests.common.helpers.assertions import pytest_assert +from tests.common.fixtures.ptfhost_utils \ + import copy_ptftests_directory # noqa: F401 +from tests.common.utilities import wait_until from tests.ptf_runner import ptf_runner +from tests.vxlan.vxlan_ecmp_utils import Ecmp_Utils Logger = logging.getLogger(__name__) - -# Some of the Constants used in this script. -Constants = {} - -# Mapping the version to the python module. -IP_TYPE = { - 'v4' : ipaddress.IPv4Address, - 'v6' : ipaddress.IPv6Address -} - -# This is the mask values to use for destination -# in the vnet routes. -HOST_MASK = {'v4' : 32, 'v6' : 128} +ecmp_utils = Ecmp_Utils() # This is the list of encapsulations that will be tested in this script. # v6_in_v4 means: V6 payload is encapsulated inside v4 outer layer. # This list is used in many locations in the script. SUPPORTED_ENCAP_TYPES = ['v4_in_v4', 'v4_in_v6', 'v6_in_v4', 'v6_in_v6'] +# Starting prefixes to be used for the destinations and End points. +DESTINATION_PREFIX = 150 +NEXTHOP_PREFIX = 100 pytestmark = [ # This script supports any T1 topology: t1, t1-64-lag, t1-lag. - pytest.mark.topology("t1", "t1-64-lag", "t1-lag"), - pytest.mark.sanity_check(post_check=True) + pytest.mark.topology("t1", "t1-64-lag", "t1-lag") ] -def create_vxlan_tunnel(duthost, minigraph_data, af, tunnel_name=None, src_ip=None): - ''' - Function to create a vxlan tunnel. The arguments: - duthost : the DUT ansible host object. - minigraph_data: minigraph facts from the dut host. - tunnel_name : A name for the Tunnel, default: tunnel_ - src_ip : Source ip address of the tunnel. It has to be a local ip address in the DUT. Default: Loopback ip address. - af : Address family : v4 or v6. - ''' - if tunnel_name is None: - tunnel_name = "tunnel_{}".format(af) - - if src_ip is None: - src_ip = get_dut_loopback_address(duthost, minigraph_data, af) - - config = '''{{ - "VXLAN_TUNNEL": {{ - "{}": {{ - "src_ip": "{}" - }} - }} - }}'''.format(tunnel_name, src_ip) - - apply_config_in_dut(duthost, config, name="vxlan_tunnel_"+ af) - return tunnel_name - -def apply_config_in_dut(duthost, config, name="vxlan"): - ''' - The given json(config) will be copied to the DUT and loaded up. - ''' - if Constants['DEBUG']: - filename = "/tmp/" + name + ".json" - else: - filename = "/tmp/" + name + "-" + str(time.time()) + ".json" - duthost.copy(content=config, dest=filename) - duthost.shell("sudo config load {} -y".format(filename)) - time.sleep(1) - if not Constants['KEEP_TEMP_FILES']: - duthost.shell("rm {}".format(filename)) - -def get_dut_loopback_address(duthost, minigraph_data, af): - ''' - Returns the IP address of the Loopback interface in DUT, from minigraph. - Arguments: - duthost : DUT Ansible Host object. - minigraph_data: Minigraph facts from the DUT. - af : Address Family(v4 or v6). - ''' - lo_ip = minigraph_data['minigraph_lo_interfaces'] - for intf in lo_ip: - if isinstance(ipaddress.ip_address(intf['addr']), IP_TYPE[af]): - return intf['addr'] - - raise RuntimeError("Couldnot find the {} loopback address for the DUT:{} from minigraph.".format(af, duthost.hostname)) -def select_required_interfaces(duthost, number_of_required_interfaces, minigraph_data, af): - ''' - Pick the required number of interfaces to use for tests. - These interfaces will be selected based on if they are currently running a established BGP. - The interfaces will be picked from the T1 facing side. - ''' - bgp_interfaces = get_all_interfaces_running_bgp(duthost, minigraph_data) - interface_ip_table = minigraph_data['minigraph_interfaces'] - if interface_ip_table: - available_interfaces = interface_ip_table - elif minigraph_data['minigraph_portchannels']: - available_interfaces = minigraph_data['minigraph_portchannel_interfaces'] - else: - raise RuntimeError("Couldn't find a viable interface: No Ethernet, No PortChannels in the minigraph file.") - - # Randomly pick the interface from the above list - list_of_bgp_ips = [] - for neigh_ip_address in bgp_interfaces.keys(): - if isinstance(ipaddress.ip_address(neigh_ip_address), IP_TYPE[af]): - list_of_bgp_ips.append(neigh_ip_address) - - ret_interface_list = [] - available_number = len(list_of_bgp_ips) - # Confirm there are enough interfaces (basicaly more than or equal to the number of vnets). - if available_number <= number_of_required_interfaces+1: - raise RuntimeError('''There are not enough interfaces needed to perform the test. - We need atleast {} interfaces, but only {} are available.'''.format(number_of_required_interfaces+1, available_number)) - for index in range(number_of_required_interfaces): - neigh_ip_address = list_of_bgp_ips[index] - current_interface_name = bgp_interfaces[neigh_ip_address].keys()[0] - ret_interface_list.append(current_interface_name) - - if ret_interface_list: - return ret_interface_list - else: - raise RuntimeError("There is no Ethernet interface running BGP. Pls run this test on any T1 topology.") - -def get_portchannels_to_neighbors(duthost, neighbor_type, minigraph_data): - ''' - A function to get the list of portchannels connected to BGP neighbors of given type(T0 or T2). - It returns a list of portchannels+minigraph_lag_facts_of_that portchannel. - Arguments: - duthost : DUT Ansible Host object - localhost : Localhost Ansible Host object. - neighbor_type: T0 or T2. - ''' - lag_facts = duthost.lag_facts(host=duthost.sonichost.mgmt_ip) - names = lag_facts['ansible_facts']['lag_facts']['names'] - lags = lag_facts['ansible_facts']['lag_facts']['lags'] - - return_list = {} - pattern = re.compile("{}$".format(neighbor_type)) - for pc_name in names: - port_struct = lags[pc_name]['po_config']['ports'] - if lags[pc_name]['po_intf_stat'] == "Up": - intf = port_struct.keys()[0] - neighbor = minigraph_data['minigraph_neighbors'][intf]['name'] - match = pattern.search(neighbor) - if match: - # We found an interface that has a given neighbor_type. Let us use this. - return_list[pc_name] = port_struct - - return return_list - -def get_ethernet_to_neighbors(neighbor_type, minigraph_data): +@pytest.fixture( + name="encap_type", + scope="module", + params=SUPPORTED_ENCAP_TYPES) +def fixture_encap_type(request): ''' - A function to get the list of Ethernet interfaces connected to BGP neighbors of given type(T0 or T2). - It returns a list of ports. - Arguments: - duthost : DUT Ansible Host object - neighbor_type: T0 or T2. + This fixture forces the script to perform one encap_type at a time. + So this script doesn't support multiple encap types at the same. ''' + return request.param + + +@pytest.fixture(autouse=True) +def _ignore_route_sync_errlogs(rand_one_dut_hostname, loganalyzer): + """Ignore expected failures logs during test execution.""" + if loganalyzer: + loganalyzer[rand_one_dut_hostname].ignore_regex.extend( + [ + ".*Unaccounted_ROUTE_ENTRY_TABLE_entries.*", + ".*missed_in_asic_db_routes.*", + ".*Look at reported mismatches above.*", + ".*Unaccounted_ROUTE_ENTRY_TABLE_entries.*", + ".*'vnetRouteCheck' status failed.*", + ".*Vnet Route Mismatch reported.*", + ".*_M_construct null not valid.*", + ]) + return + + +def setup_crm_interval(duthost, interval): + crm_stdout = duthost.shell("crm show summary")['stdout_lines'] + match = re.search("Polling Interval: ([0-9]*) second", "".join(crm_stdout)) - pattern = re.compile("{}$".format(neighbor_type)) - ret_list = [] - - for intf in minigraph_data['minigraph_neighbors']: - if pattern.search(minigraph_data['minigraph_neighbors'][intf]['name']): - ret_list.append(intf) - - return ret_list - -def assign_intf_ip_address(selected_interfaces, af): - intf_ip_map = {} - for intf in selected_interfaces: - ip = get_ip_address(af=af, hostid=Constants['DUT_HOSTID'], netid=201) - intf_ip_map[intf] = ip - return intf_ip_map - -def get_all_interfaces_running_bgp(duthost, minigraph_data): - bgp_neigh_list = duthost.bgp_facts()['ansible_facts']['bgp_neighbors'] - minigraph_ip_interfaces = minigraph_data['minigraph_interfaces'] + minigraph_data['minigraph_portchannel_interfaces'] - peer_addr_map = {} - for x in minigraph_ip_interfaces: - peer_addr_map[x['peer_addr']] = {x['attachto'] : x['addr']} - - ret_list = {} - for x, entry in peer_addr_map.iteritems(): - if bgp_neigh_list[x]['state'] == 'established': - ret_list[x] = entry - - return ret_list - -def configure_vnet_neighbors(duthost, intf_to_ip_map, minigraph_data, af): - ''' - setup the vnet neighbor ip addresses. - ''' - family = "IPv4" - if af == "v6": - family = "IPv6" - - return_dict = {} - - config_list = [] - for intf, addr in intf_to_ip_map.iteritems(): - # If the given address is "net.1", the return address is "net.101" - # THE ASSUMPTION HERE IS THAT THE DUT ADDRESSES ARE ENDING IN ".1". - ptf_ip = str(ipaddress.ip_address(unicode(addr))+100) - - if "Ethernet" in intf: - return_dict[intf] = ptf_ip - elif "PortChannel" in intf: - for member in get_ethernet_ports([intf], minigraph_data): - return_dict[member] = ptf_ip - - config_list.append('''"{}|{}": {{ - "family": "{}" - }}'''.format(intf, ptf_ip, family)) - - full_config = '''{ - "NEIGH" : { - ''' + ",\n".join(config_list) + '''\n}\n}''' - - apply_config_in_dut(duthost, full_config, name="vnet_nbr_"+af) - - return return_dict - -def create_vnets(duthost, tunnel_name, vnet_count=1, scope=None, vni_base=10000, vnet_name_prefix="Vnet"): - return_dict = {} - scope_entry = "" - if scope: - scope_entry = '''"scope": "{}",'''.format(scope) - config_list = [] - for i in range(vnet_count): - name = vnet_name_prefix + "-" + str(i) - vni = vni_base+i - return_dict[name] = vni - config_list.append('''"{}": {{ - "vxlan_tunnel": "{}", - {}"vni": "{}", - "peer_list": "" - }}'''.format(name, tunnel_name, scope_entry, vni)) - - full_config = '{\n"VNET": {' + ",\n".join(config_list) + '\n}\n}' - - apply_config_in_dut(duthost, full_config, "vnets_"+tunnel_name) - return return_dict - -def setup_vnet_intf(duthost, selected_interfaces, vnet_list, minigraph_data): - if len(selected_interfaces) != len(vnet_list): - raise RuntimeError("Different number of interfaces and vnets, not supported yet") - - ret_list = {} - intf_config_list = [] - po_config_list = [] - for count in range(len(selected_interfaces)): - intf = selected_interfaces[count] - config = (''' - "{}" : {{ - "vnet_name": "{}" - }} - '''.format(intf, vnet_list[count])) - - if "Ethernet" in intf: - intf_config_list.append(config) - ret_list[intf] = vnet_list[count] - elif "PortChannel" in intf: - po_config_list.append(config) - for member in get_ethernet_ports([intf], minigraph_data): - ret_list[member] = vnet_list[count] - - full_config_list = [] - if intf_config_list: - full_config_list.append( - '''"INTERFACE": {\n''' + ",\n".join(intf_config_list) + '''}''') - if po_config_list: - full_config_list.append( - '''"PORTCHANNEL_INTERFACE": {\n''' + ",\n".join(po_config_list) + '''}''') - - full_config = '''{\n''' + ",\n".join(full_config_list) + '''}''' - apply_config_in_dut(duthost, full_config, "vnet_intf") - return ret_list - -def configure_vxlan_switch(duthost, vxlan_port=4789, dutmac=None): - if dutmac == None: - #dutmac = duthost.facts['router_mac'] - dutmac = "aa:bb:cc:dd:ee:ff" - - switch_config = ''' -[ - {{ - "SWITCH_TABLE:switch": {{ - "vxlan_port": "{}", - "vxlan_router_mac": "{}" - }}, - "OP": "SET" - }} -] -'''.format(vxlan_port, dutmac) - apply_config_in_swss(duthost, switch_config, "vnet_switch") - -def apply_config_in_swss(duthost, config, name="swss_"): - if Constants['DEBUG']: - filename = name + ".json" - else: - filename = name + "-" + str(time.time()) + ".json" - - duthost.copy(content=config, dest="/tmp/{}".format(filename)) - duthost.shell('docker exec -i swss swssconfig /dev/stdin < /tmp/{}'.format(filename)) - if not Constants['KEEP_TEMP_FILES']: - duthost.shell("rm /tmp/{}".format(filename)) - time.sleep(1) - -def get_list_of_nexthops(number, af, prefix=100): - nexthop_list = [] - for i in range(number): - nexthop_list.append(get_ip_address(af=af, netid=prefix, hostid=10)) - return nexthop_list - -def create_vnet_routes(duthost, vnet_list, dest_af, nh_af, nhs_per_destination=1, number_of_available_nexthops=100, number_of_ecmp_nhs=1000, dest_net_prefix=150, nexthop_prefix=100): - ''' - This configures the VNET_TUNNEL_ROUTES structure. It precalculates the required number of - destinations based on the given "number_of_ecmp_nhs" and the "nhs_per_destination". - - inputs: - number_of_available_nexthops : Total number of unique NextHops available for use. - nhs_per_destination : Number of ECMP nexthops to use per destination. - number_of_ecmp_nhs : Maximum number of all NextHops put together(for all destinations). - ''' - available_nexthops = get_list_of_nexthops(number=number_of_available_nexthops, af=nh_af, prefix=nexthop_prefix) - - number_of_destinations = int(number_of_ecmp_nhs / nhs_per_destination) - no_of_dests_per_vnet = int(number_of_destinations / len(vnet_list)) - available_nexthop_count = 0 - dest_to_nh_map = {} - for vnet in vnet_list: - for i in range(no_of_dests_per_vnet): - dest = get_ip_address(af=dest_af, netid=dest_net_prefix) - my_nhs = [] - for j in range(nhs_per_destination): - my_nhs.append(available_nexthops[available_nexthop_count % number_of_available_nexthops]) - available_nexthop_count = available_nexthop_count + 1 - if available_nexthop_count > number_of_ecmp_nhs: - break - - try: - dest_to_nh_map[vnet] - except KeyError: - dest_to_nh_map[vnet] = {} - dest_to_nh_map[vnet][dest] = my_nhs - - set_routes_in_dut(duthost, dest_to_nh_map, dest_af, "SET") - return dest_to_nh_map - -def get_outer_layer_version(encap_type): - match = re.search("in_(v[46])", encap_type) - if match: - return match.group(1) - else: - raise RuntimeError("Invalid format for encap_type:{}".format(encap_type)) - -def get_payload_version(encap_type): - match = re.search("(v[46])_in_v", encap_type) if match: - return match.group(1) - else: - raise RuntimeError("Invalid format for encap_type:{}".format(encap_type)) - -def create_single_route(vnet, dest, mask, nhs, op): - ''' - Create a single route entry for vnet, for the given dest, through the endpoints:nhs, op:SET/DEL - ''' - return '''{{ - "VNET_ROUTE_TUNNEL_TABLE:{}:{}/{}": {{ - "endpoint": "{}" - }}, - "OP": "{}" - }}'''.format(vnet, dest, mask, ",".join(nhs), op) - -Address_Count = 0 -def get_ip_address(af, hostid=1, netid=100): - global Address_Count - third_octet = Address_Count % 255 - second_octet = (Address_Count / 255) % 255 - first_octet = netid + (Address_Count / 65025) - Address_Count = Address_Count + 1 - if af == 'v4': - return "{}.{}.{}.{}".format(first_octet, second_octet, third_octet, hostid) - if af == 'v6': - # :0: gets removed in the IPv6 addresses. Adding a to octets, to avoid it. - return "fddd:a{}:a{}::a{}:{}".format(first_octet, second_octet, third_octet, hostid) - -def set_routes_in_dut(duthost, dest_to_nh_map, dest_af, op): - config_list = [] - for vnet in dest_to_nh_map.keys(): - for dest in dest_to_nh_map[vnet].keys(): - config_list.append(create_single_route(vnet, dest, HOST_MASK[dest_af], dest_to_nh_map[vnet][dest], op)) - - full_config = '[' + "\n,".join(config_list) + '\n]' - apply_config_in_swss(duthost, full_config, "set_routes") - -def get_t2_ports(duthost, minigraph_data): - ''' - In T1 topology, any port connected to the T2 BGP neighbors are needed. - In T0, any port connected to the T1 BGP neighbors are needed. - ''' - list_of_portchannels_to_T2 = get_portchannels_to_neighbors(duthost, "T2", minigraph_data) - list_of_interfaces = [] - if list_of_portchannels_to_T2: - for pc_name in list_of_portchannels_to_T2: - list_of_interfaces.extend(list_of_portchannels_to_T2[pc_name]) + current_polling_seconds = match.group(1) else: - list_of_interfaces = get_ethernet_to_neighbors("T2", minigraph_data) - - ret_list = [] - for iface in list_of_interfaces: - ret_list.append(minigraph_data["minigraph_ptf_indices"][iface]) - return ret_list - -def bgp_established(duthost): - bgp_facts = duthost.bgp_facts()['ansible_facts'] - for k, v in bgp_facts['bgp_neighbors'].items(): - if v['state'] != 'established': - Logger.info("Neighbor %s not established yet: %s", k, v['state']) - return False - return True - -def get_ethernet_ports(intf_list, minigraph_data): + raise RuntimeError( + "Couldn't parse the crm polling " + "interval. output:{}".format(crm_stdout)) + duthost.shell("crm config polling interval {}".format(interval)) + return current_polling_seconds + + +@pytest.fixture(name="setUp", scope="module") +def fixture_setUp(duthosts, + ptfhost, + request, + rand_one_dut_hostname, + minigraph_facts, + tbinfo, + encap_type): ''' - The given interface list can be either Ethernet or Portchannel. - This function will return a flat list of Ethernet ports corresponding to - the given intf_list itself, or members of Portchannels. + Setup for the entire script. + The basic steps in VxLAN configs are: + 1. Configure VxLAN tunnel. + 2. Configure Vnet and its VNI. + 3. Attach the Vnet to an interface(optional). + 4. Configure routes for the Vnet. The setup does all the above. + + The testcases are focused on the "configure routes" step. They add, + delete, modify, the routes. Some cases modify the underlay itself, + by add/delete bgp, or shut/start interfaces etc. ''' - ret_list = [] - for intf in intf_list: - if "Ethernet" in intf: - ret_list.append(intf) - elif "PortChannel" in intf: - ret_list.extend(minigraph_data['minigraph_portchannels'][intf]['members']) - - return ret_list -@pytest.fixture(scope="module") -def setUp(duthosts, ptfhost, request, rand_one_dut_hostname, minigraph_facts, - tbinfo): + data = {} + asic_type = duthosts[rand_one_dut_hostname].facts["asic_type"] + if asic_type == "cisco-8000": + data['tolerance'] = 0.03 + else: + raise RuntimeError("Pls update this script for your platform.") - global Constants # Should I keep the temporary files copied to DUT? - Constants['KEEP_TEMP_FILES'] = request.config.option.keep_temp_files + ecmp_utils.Constants['KEEP_TEMP_FILES'] = \ + request.config.option.keep_temp_files # Is debugging going on, or is it a production run? If it is a # production run, use time-stamped file names for temp files. - Constants['DEBUG'] = request.config.option.debug_enabled + ecmp_utils.Constants['DEBUG'] = request.config.option.debug_enabled # The host id in the ip addresses for DUT. It can be anything, # but helps to keep as a single number that is easy to identify # as DUT. - Constants['DUT_HOSTID'] = request.config.option.dut_hostid + ecmp_utils.Constants['DUT_HOSTID'] = request.config.option.dut_hostid - Logger.info("Constants to be used in the script:%s", Constants) + Logger.info("Constants to be used in the script:%s", ecmp_utils.Constants) - data = {} + data['enable_bfd'] = request.config.option.bfd + data['include_long_tests'] = request.config.option.include_long_tests + data['monitor_file'] = '/tmp/bfd_responder_monitor_file.txt' data['ptfhost'] = ptfhost data['tbinfo'] = tbinfo data['duthost'] = duthosts[rand_one_dut_hostname] - data['minigraph_facts'] = data['duthost'].get_extended_minigraph_facts(tbinfo) + data['minigraph_facts'] = \ + data['duthost'].get_extended_minigraph_facts(tbinfo) data['dut_mac'] = data['duthost'].facts['router_mac'] data['vxlan_port'] = request.config.option.vxlan_port - configure_vxlan_switch(data['duthost'], vxlan_port=data['vxlan_port'], dutmac=data['dut_mac']) - - selected_interfaces = {} - for encap_type in SUPPORTED_ENCAP_TYPES: - outer_layer_version = get_outer_layer_version(encap_type) - selected_interfaces[encap_type] = select_required_interfaces( + data['original_crm_interval'] = setup_crm_interval(data['duthost'], + interval=3) + time.sleep(4) + data['crm'] = data['duthost'].get_crm_resources()['main_resources'] + ecmp_utils.configure_vxlan_switch( + data['duthost'], + vxlan_port=data['vxlan_port'], + dutmac=data['dut_mac']) + data['list_of_bfd_monitors'] = set() + data['list_of_downed_endpoints'] = set() + + outer_layer_version = ecmp_utils.get_outer_layer_version(encap_type) + encap_type_data = {} + encap_type_data['selected_interfaces'] = \ + ecmp_utils.select_required_interfaces( data['duthost'], number_of_required_interfaces=1, minigraph_data=minigraph_facts, @@ -514,120 +202,2239 @@ def setUp(duthosts, ptfhost, request, rand_one_dut_hostname, minigraph_facts, tunnel_names = {} # To track the vnets for every outer_layer_version. vnet_af_map = {} - for encap_type in SUPPORTED_ENCAP_TYPES: - outer_layer_version = get_outer_layer_version(encap_type) - try: - tunnel_names[outer_layer_version] - except KeyError: - tunnel_names[outer_layer_version] = create_vxlan_tunnel(data['duthost'], minigraph_data=minigraph_facts, af=outer_layer_version) + outer_layer_version = ecmp_utils.get_outer_layer_version(encap_type) + try: + tunnel_names[outer_layer_version] + except KeyError: + tunnel_names[outer_layer_version] = ecmp_utils.create_vxlan_tunnel( + data['duthost'], + minigraph_data=minigraph_facts, + af=outer_layer_version) - payload_version = get_payload_version(encap_type) - encap_type = "{}_in_{}".format(payload_version, outer_layer_version) - encap_type_data = {} - encap_type_data['selected_interfaces'] = selected_interfaces[encap_type] + payload_version = ecmp_utils.get_payload_version(encap_type) + encap_type = "{}_in_{}".format(payload_version, outer_layer_version) - try: - encap_type_data['vnet_vni_map'] = vnet_af_map[outer_layer_version] - except KeyError: - vnet_af_map[outer_layer_version] = create_vnets(data['duthost'], - tunnel_name=tunnel_names[outer_layer_version], - vnet_count=1, # default scope can take only one vnet. - vnet_name_prefix="Vnet_" + encap_type, - scope="default", - vni_base=10000) - encap_type_data['vnet_vni_map'] = vnet_af_map[outer_layer_version] - - encap_type_data['vnet_intf_map'] = setup_vnet_intf(data['duthost'], - selected_interfaces=encap_type_data['selected_interfaces'], - vnet_list=encap_type_data['vnet_vni_map'].keys(), - minigraph_data=minigraph_facts) - encap_type_data['intf_to_ip_map'] = assign_intf_ip_address(selected_interfaces=encap_type_data['selected_interfaces'], af=payload_version) - encap_type_data['t2_ports'] = get_t2_ports(data['duthost'], minigraph_facts) - encap_type_data['neighbor_config'] = configure_vnet_neighbors(data['duthost'], encap_type_data['intf_to_ip_map'], minigraph_data=minigraph_facts, af=payload_version) - encap_type_data['dest_to_nh_map'] = create_vnet_routes(data['duthost'], encap_type_data['vnet_vni_map'].keys(), - nhs_per_destination=request.config.option.ecmp_nhs_per_destination, - number_of_available_nexthops=request.config.option.total_number_of_endpoints, - number_of_ecmp_nhs=request.config.option.total_number_of_nexthops, - dest_af=payload_version, - dest_net_prefix=150, # Hardcoded to avoid conflicts with topology networks. - nexthop_prefix=100, # Hardcoded to avoid conflicts with topology networks. - nh_af=outer_layer_version) - - data[encap_type] = encap_type_data + try: + encap_type_data['vnet_vni_map'] = vnet_af_map[outer_layer_version] + except KeyError: + vnet_af_map[outer_layer_version] = ecmp_utils.create_vnets( + data['duthost'], + tunnel_name=tunnel_names[outer_layer_version], + vnet_count=1, # default scope can take only one vnet. + vnet_name_prefix="Vnet_" + encap_type, + scope="default", + vni_base=10000) + encap_type_data['vnet_vni_map'] = vnet_af_map[outer_layer_version] + + encap_type_data['vnet_intf_map'] = ecmp_utils.setup_vnet_intf( + selected_interfaces=encap_type_data['selected_interfaces'], + vnet_list=encap_type_data['vnet_vni_map'].keys(), + minigraph_data=minigraph_facts) + encap_type_data['intf_to_ip_map'] = ecmp_utils.assign_intf_ip_address( + selected_interfaces=encap_type_data['selected_interfaces'], + af=payload_version) + encap_type_data['t2_ports'] = ecmp_utils.get_t2_ports( + data['duthost'], + minigraph_facts) + encap_type_data['neighbor_config'] = ecmp_utils.configure_vnet_neighbors( + data['duthost'], + encap_type_data['intf_to_ip_map'], + minigraph_data=minigraph_facts, + af=payload_version) + encap_type_data['dest_to_nh_map'] = ecmp_utils.create_vnet_routes( + data['duthost'], encap_type_data['vnet_vni_map'].keys(), + nhs_per_destination=request.config.option.ecmp_nhs_per_destination, + number_of_available_nexthops=request.config.option. + total_number_of_endpoints, + number_of_ecmp_nhs=request.config.option.total_number_of_nexthops, + dest_af=payload_version, + dest_net_prefix=DESTINATION_PREFIX, + nexthop_prefix=NEXTHOP_PREFIX, + nh_af=outer_layer_version, + bfd=request.config.option.bfd) + + data[encap_type] = encap_type_data + for vnet in encap_type_data['dest_to_nh_map'].keys(): + for dest in encap_type_data['dest_to_nh_map'][vnet].keys(): + data['list_of_bfd_monitors'] = data['list_of_bfd_monitors'] |\ + set(encap_type_data['dest_to_nh_map'][vnet][dest]) + + # Setting up bfd responder is needed only once per script run. + loopback_addresses = \ + [str(x['addr']) for x in minigraph_facts[u'minigraph_lo_interfaces']] + if request.config.option.bfd: + ecmp_utils.start_bfd_responder( + data['ptfhost'], + data['dut_mac'], + loopback_addresses, + monitor_file=data['monitor_file']) + # Add all endpoint_monitors to the bfd responder monitor. + ecmp_utils.update_monitor_file( + data['ptfhost'], + data['monitor_file'], + data[encap_type]['t2_ports'], + list(data['list_of_bfd_monitors'])) # This data doesn't change per testcase, so we copy # it as a seperate file. The test-specific config # data will be copied on testase basis. data['ptfhost'].copy(content=json.dumps( { - 'minigraph_facts': data['minigraph_facts'], - 'tbinfo' : data['tbinfo'] + 'minigraph_facts': data['minigraph_facts'], + 'tbinfo': data['tbinfo'] }, indent=4), dest="/tmp/vxlan_topo_info.json") + data['downed_endpoints'] = [] + data[encap_type]['dest_to_nh_map_orignal'] = copy.deepcopy(data[encap_type]['dest_to_nh_map']) # noqa F821 yield data # Cleanup code. - for encap_type in SUPPORTED_ENCAP_TYPES: - outer_layer_version = get_outer_layer_version(encap_type) - payload_version = get_payload_version(encap_type) - - encap_type = "{}_in_{}".format(payload_version, outer_layer_version) - set_routes_in_dut(data['duthost'], data[encap_type]['dest_to_nh_map'], payload_version, "DEL") - - for intf in data[encap_type]['selected_interfaces']: - redis_string = "INTERFACE" - if "PortChannel" in intf > 0: - redis_string = "PORTCHANNEL_INTERFACE" - data['duthost'].shell("redis-cli -n 4 hdel \"{}|{}\" vnet_name".format(redis_string, intf)) + outer_layer_version = ecmp_utils.get_outer_layer_version(encap_type) + payload_version = ecmp_utils.get_payload_version(encap_type) + + ecmp_utils.set_routes_in_dut( + data['duthost'], + data[encap_type]['dest_to_nh_map'], + payload_version, + "DEL") + + for intf in data[encap_type]['selected_interfaces']: + redis_string = "INTERFACE" + if "PortChannel" in intf: + redis_string = "PORTCHANNEL_INTERFACE" + data['duthost'].shell("redis-cli -n 4 hdel \"{}|{}\"" + "vnet_name".format(redis_string, intf)) + data['duthost'].shell( + "for i in `redis-cli -n 4 --scan --pattern \"NEIGH|{}|*\" `; " + "do redis-cli -n 4 del $i ; done".format(intf)) + + # This script's setup code re-uses same vnets for v4inv4 and v6inv4. + # There will be same vnet in multiple encap types. + # So remove vnets *after* removing the routes first. + for vnet in data[encap_type]['vnet_vni_map'].keys(): + data['duthost'].shell("redis-cli -n 4 del \"VNET|{}\"".format(vnet)) + + time.sleep(5) + for tunnel in tunnel_names.values(): + data['duthost'].shell( + "redis-cli -n 4 del \"VXLAN_TUNNEL|{}\"".format(tunnel)) - for vnet in data[encap_type]['vnet_vni_map'].keys(): - data['duthost'].shell("redis-cli -n 4 del \"VNET|{}\"".format(vnet)) + time.sleep(1) + if request.config.option.bfd: + ecmp_utils.stop_bfd_responder(data['ptfhost']) - for tunnel in tunnel_names.values(): - data['duthost'].shell("redis-cli -n 4 del \"VXLAN_TUNNEL|{}\"".format(tunnel)) + setup_crm_interval(data['duthost'], int(data['original_crm_interval'])) -@pytest.mark.parametrize("encap_type", SUPPORTED_ENCAP_TYPES) -class Test_VxLAN: - def dump_self_info_and_run_ptf(self, tcname, encap_type, expect_encap_success): +class Test_VxLAN(): + ''' + Base class for all VxLAN+BFD tests. + ''' + setup = {} + + def dump_self_info_and_run_ptf(self, + tcname, + encap_type, + expect_encap_success, + packet_count=4, + random_dport=True, + random_sport=False, + random_src_ip=False, + tolerance=None): ''' - Just a wrapper for dump_info_to_ptf to avoid entering 30 lines everytime. + Just a wrapper for dump_info_to_ptf to avoid entering 30 lines + everytime. ''' - if Constants['DEBUG']: + if tolerance is None: + tolerance = self.setup['tolerance'] + if ecmp_utils.Constants['DEBUG']: config_filename = "/tmp/vxlan_configs.json" else: - config_filename = "/tmp/vxlan_configs." + tcname + "-" + encap_type + "-" + str(time.time()) + ".json" + config_filename = "/tmp/vxlan_configs." + tcname +\ + "-" + encap_type + "-" + str(time.time()) + ".json" self.setup['ptfhost'].copy(content=json.dumps( { - 'vnet_vni_map' : self.setup[encap_type]['vnet_vni_map'], - 'vnet_intf_map' : self.setup[encap_type]['vnet_intf_map'], + 'vnet_vni_map': self.setup[encap_type]['vnet_vni_map'], + 'vnet_intf_map': self.setup[encap_type]['vnet_intf_map'], 'dest_to_nh_map': self.setup[encap_type]['dest_to_nh_map'], - 'neighbors' : self.setup[encap_type]['neighbor_config'], + 'neighbors': self.setup[encap_type]['neighbor_config'], 'intf_to_ip_map': self.setup[encap_type]['intf_to_ip_map'], }, indent=4), dest=config_filename) - time.sleep(int(0.00005*getsizeof(self.setup[encap_type]['dest_to_nh_map'])) + 1) + Logger.info("Recording current DUT state.") + cmds = [ + "show vxlan tunnel", + "show vnet route all", + "show ip bgp summary", + "show ipv6 bgp summary"] + if self.setup['enable_bfd']: + cmds.append("show bfd summary") + for cmd in cmds: + self.setup['duthost'].shell(cmd) + + ptf_params = { + "topo_file": "/tmp/vxlan_topo_info.json", + "config_file": config_filename, + "t0_ports": ecmp_utils.get_ethernet_ports( + self.setup[encap_type]['selected_interfaces'], + self.setup['minigraph_facts']), + "t2_ports": self.setup[encap_type]['t2_ports'], + "dut_mac": self.setup['dut_mac'], + "vxlan_port": self.setup['vxlan_port'], + "expect_encap_success": expect_encap_success, + "packet_count": packet_count, + "random_dport": random_dport, + "random_sport": random_sport, + "random_src_ip": random_src_ip, + "tolerance": tolerance, + "downed_endpoints": list(self.setup['list_of_downed_endpoints']) + } + Logger.info("ptf arguments:%s", ptf_params) + Logger.info( + "dest->nh mapping:%s", self.setup[encap_type]['dest_to_nh_map']) + ptf_runner(self.setup['ptfhost'], "ptftests", "vxlan_traffic.VXLAN", platform_dir="ptftests", - params={ - "topo_file": "/tmp/vxlan_topo_info.json", - "config_file": config_filename, - "t0_ports":get_ethernet_ports(self.setup[encap_type]['selected_interfaces'], self.setup['minigraph_facts']), - "t2_ports":self.setup[encap_type]['t2_ports'], - "dut_mac":self.setup['dut_mac'], - "vxlan_port": self.setup['vxlan_port'], - "expect_encap_success":expect_encap_success - }, + params=ptf_params, qlen=1000, - log_file="/tmp/vxlan-tests.{}.{}.{}.log".format(tcname, encap_type, datetime.now().strftime('%Y-%m-%d-%H:%M:%S'))) + log_file="/tmp/vxlan-tests.{}.{}.{}.log".format( + tcname, + encap_type, + datetime.now().strftime('%Y-%m-%d-%H:%M:%S'))) + + def update_monitor_list(self, bfd_enable, encap_type, ip_address_list): + ''' + Local function to update the bfd_responder's monitor file that + tracks which interfaces and ip addresses the bfd_responder will + work with. + ''' + if not bfd_enable: + return + if isinstance(ip_address_list, str): + ip_address_list = [ip_address_list] + self.setup['list_of_bfd_monitors'] = \ + self.setup['list_of_bfd_monitors'] | set(ip_address_list) + ecmp_utils.update_monitor_file( + self.setup['ptfhost'], + self.setup['monitor_file'], + self.setup[encap_type]['t2_ports'], + list(self.setup['list_of_bfd_monitors'])) + + def update_down_list(self, bfd_enable, encap_type, ip_address_list): + ''' + Local function to keep track of endpoint monitors that are down. + The bfd_responder will not be replying to any packet with these + addresses. + ''' + if not bfd_enable: + return + if isinstance(ip_address_list, str): + ip_address_list = [ip_address_list] + self.setup['list_of_downed_endpoints'] = \ + self.setup['list_of_downed_endpoints'] | set(ip_address_list) + self.setup['list_of_bfd_monitors'] = \ + self.setup['list_of_bfd_monitors'] - set(ip_address_list) + ecmp_utils.update_monitor_file( + self.setup['ptfhost'], + self.setup['monitor_file'], + self.setup[encap_type]['t2_ports'], + list(self.setup['list_of_bfd_monitors'])) + class Test_VxLAN_route_tests(Test_VxLAN): + ''' + Common class for the basic route test cases. + ''' def test_vxlan_single_endpoint(self, setUp, encap_type): + ''' + tc1:Create a tunnel route to a single endpoint a. + Send packets to the route prefix dst. + ''' self.setup = setUp - Logger.info("tc1:Create a tunnel route to a single endpoint a. Send packets to the route prefix dst.") self.dump_self_info_and_run_ptf("tc1", encap_type, True) + + def test_vxlan_modify_route_different_endpoint( + self, setUp, request, encap_type): + ''' + tc2: change the route to different endpoint. + Packets are received only at endpoint b.") + ''' + self.setup = setUp + Logger.info("Choose a vnet") + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + + Logger.info("Choose a destination, which is already present.") + tc2_dest = self.setup[encap_type]['dest_to_nh_map'][vnet].keys()[0] + + Logger.info("Create a new endpoint, or endpoint-list.") + tc2_new_end_point_list = [] + for _ in range(int(request.config.option.ecmp_nhs_per_destination)): + tc2_new_end_point_list.append(ecmp_utils.get_ip_address( + af=ecmp_utils.get_outer_layer_version(encap_type), + netid=NEXTHOP_PREFIX)) + + Logger.info("Map the destination to the new endpoint(s).") + self.setup[encap_type]['dest_to_nh_map'][vnet][tc2_dest] = \ + tc2_new_end_point_list + + Logger.info("Create the json and apply the config in the DUT swss.") + # The config looks like: + # [ + # { + # "VNET_ROUTE_TUNNEL_TABLE:vnet:tc2_dest/32": { + # "endpoint": "{tc2_new_end_point_list}" + # "endpoint_monitor": "{tc2_new_end_point_list}" + # }, + # "OP": "{}" + # } + # ] + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + tc2_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc2_new_end_point_list, + "SET", + bfd=self.setup['enable_bfd']) + self.update_monitor_list( + self.setup['enable_bfd'], + encap_type, + tc2_new_end_point_list) + + Logger.info( + "Copy the new set of configs to the PTF and run the tests.") + self.dump_self_info_and_run_ptf("tc2", encap_type, True) + + def test_vxlan_remove_all_route(self, setUp, encap_type): + ''' + tc3: remove the tunnel route. + Send packets to the route prefix dst. packets should not + be received at any ports with dst ip of b") + ''' + self.setup = setUp + try: + Logger.info("Remove the existing routes in the DUT.") + ecmp_utils.set_routes_in_dut( + self.setup['duthost'], + self.setup[encap_type]['dest_to_nh_map'], + ecmp_utils.get_payload_version(encap_type), + "DEL") + Logger.info("Verify that the traffic is not coming back.") + self.dump_self_info_and_run_ptf("tc3", encap_type, False) + finally: + Logger.info("Restore the routes in the DUT.") + ecmp_utils.set_routes_in_dut( + self.setup['duthost'], + self.setup[encap_type]['dest_to_nh_map'], + ecmp_utils.get_payload_version(encap_type), + "SET", + bfd=self.setup['enable_bfd']) + + +class Test_VxLAN_ecmp_create(Test_VxLAN): + ''' + Class for all the ECMP (multiple nexthops per destination) + create testcases. + ''' + def test_vxlan_configure_route1_ecmp_group_a(self, setUp, encap_type): + ''' + tc4:create tunnel route 1 with two endpoints a = {a1, a2...}. send + packets to the route 1's prefix dst. packets are received at either + a1 or a2. + ''' + self.setup = setUp + + Logger.info("Choose a vnet.") + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + + Logger.info("Create a new list of endpoint(s).") + tc4_end_point_list = [] + for _ in range(2): + tc4_end_point_list.append(ecmp_utils.get_ip_address( + af=ecmp_utils.get_outer_layer_version(encap_type), + netid=NEXTHOP_PREFIX)) + + Logger.info("Create a new destination") + tc4_new_dest = ecmp_utils.get_ip_address( + af=ecmp_utils.get_payload_version(encap_type), + netid=DESTINATION_PREFIX) + + Logger.info("Map the new destination and the new endpoint(s).") + self.setup[encap_type]['dest_to_nh_map'][vnet][tc4_new_dest] = \ + tc4_end_point_list + + Logger.info("Create a new config and Copy to the DUT.") + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + tc4_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc4_end_point_list, + "SET", + bfd=self.setup['enable_bfd']) + self.update_monitor_list( + self.setup['enable_bfd'], encap_type, tc4_end_point_list) + + Logger.info("Verify that the new config takes effect and run traffic.") + + self.dump_self_info_and_run_ptf("tc4", encap_type, True) + + def test_vxlan_remove_ecmp_route1(self, setUp, encap_type): + ''' + Remove tunnel route 1. Send multiple packets (varying tuple) to the + route 1's prefix dst. + ''' + self.setup = setUp + + Logger.info("Choose a vnet.") + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + + backup_dest = self.setup[encap_type]['dest_to_nh_map'][vnet].copy() + + Logger.info("Create a new list of endpoint(s).") + ecmp_route1_end_point_list = [] + for _ in range(2): + ecmp_route1_end_point_list.append( + ecmp_utils.get_ip_address( + af=ecmp_utils.get_outer_layer_version(encap_type), + netid=NEXTHOP_PREFIX)) + + Logger.info("Create a new destination") + ecmp_route1_new_dest = ecmp_utils.get_ip_address( + af=ecmp_utils.get_payload_version(encap_type), + netid=DESTINATION_PREFIX) + + Logger.info("Map the new destination and the new endpoint(s).") + self.setup[encap_type]['dest_to_nh_map'][vnet][ecmp_route1_new_dest] =\ + ecmp_route1_end_point_list + + Logger.info("Create a new config and Copy to the DUT.") + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + ecmp_route1_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + ecmp_route1_end_point_list, + "SET", + bfd=self.setup['enable_bfd']) + self.update_monitor_list( + self.setup['enable_bfd'], + encap_type, + ecmp_route1_end_point_list) + + Logger.info("Verify that the new config takes effect and run traffic.") + self.dump_self_info_and_run_ptf("tc5", encap_type, True) + + # Deleting Tunnel route 1 + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + ecmp_route1_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + ecmp_route1_end_point_list, + "DEL") + + self.setup[encap_type]['dest_to_nh_map'][vnet] =\ + {ecmp_route1_new_dest: ecmp_route1_end_point_list} + + Logger.info("Verify that the new config takes effect and run traffic.") + self.dump_self_info_and_run_ptf("tc5", encap_type, False) + + # Restoring dest_to_nh_map to old values + self.setup[encap_type]['dest_to_nh_map'][vnet] = backup_dest.copy() + self.dump_self_info_and_run_ptf("tc5", encap_type, True) + + def test_vxlan_configure_route1_ecmp_group_b(self, setUp, encap_type): + ''' + tc5: set tunnel route 2 to endpoint group a = {a1, a2}. send + packets to route 2"s prefix dst. packets are received at either a1 + or a2 + ''' + self.setup = setUp + self.setup_route2_ecmp_group_b(encap_type) + Logger.info("Verify the configs work and traffic flows correctly.") + self.dump_self_info_and_run_ptf("tc5", encap_type, True) + + def setup_route2_ecmp_group_b(self, encap_type): + ''' + Function for handling the dependency of tc6 on tc5. This function + is essentially tc5. + ''' + if self.setup[encap_type].get('tc5_dest', None): + return + Logger.info("Choose a vnet for testing.") + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + + Logger.info("Select an existing endpoint.") + tc5_end_point_list = \ + self.setup[encap_type]['dest_to_nh_map'][vnet].values()[0] + + Logger.info("Create a new destination to use.") + tc5_new_dest = ecmp_utils.get_ip_address( + af=ecmp_utils.get_payload_version(encap_type), + netid=DESTINATION_PREFIX) + + Logger.info("Map the new destination to the endpoint.") + self.setup[encap_type]['dest_to_nh_map'][vnet][tc5_new_dest] = \ + tc5_end_point_list + + Logger.info("Create the new config and apply to the DUT.") + + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + tc5_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc5_end_point_list, + "SET", + bfd=self.setup['enable_bfd']) + self.update_monitor_list( + self.setup['enable_bfd'], + encap_type, + tc5_end_point_list) + + self.setup[encap_type]['tc5_dest'] = tc5_new_dest + + def test_vxlan_configure_route2_ecmp_group_b(self, setUp, encap_type): + ''' + tc6: set tunnel route 2 to endpoint group b = {b1, b2}. send + packets to route 2"s prefix dst. packets are received at either + b1 or b2. + ''' + self.setup = setUp + self.setup_route2_ecmp_group_b(encap_type) + + Logger.info("Choose a vnet for testing.") + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + + Logger.info("Create a new list of endpoints.") + tc6_end_point_list = [] + for _ in range(2): + tc6_end_point_list.append( + ecmp_utils.get_ip_address( + af=ecmp_utils.get_outer_layer_version(encap_type), + netid=NEXTHOP_PREFIX)) + + Logger.info("Choose one of the existing destinations.") + tc6_new_dest = self.setup[encap_type]['tc5_dest'] + + Logger.info("Map the destination to the new endpoints.") + self.setup[encap_type]['dest_to_nh_map'][vnet][tc6_new_dest] = \ + tc6_end_point_list + + Logger.info("Create the config and apply on the DUT.") + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + tc6_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc6_end_point_list, + "SET", + bfd=self.setup['enable_bfd']) + self.update_monitor_list( + self.setup['enable_bfd'], + encap_type, + tc6_end_point_list) + Logger.info("Verify that the traffic works.") + + self.dump_self_info_and_run_ptf("tc6", encap_type, True) + + @pytest.mark.skipif( + "config.option.bfd is False", + reason="This test will be run only if '--bfd=True' is provided.") + def test_vxlan_bfd_health_state_change_a2down_a1up( + self, setUp, encap_type): + ''' + Set BFD state for a1' to UP and a2' to Down. Send multiple packets + (varying tuple) to the route 1's prefix dst. Packets are received + only at endpoint a1. Verify advertise table is present. + ''' + self.setup = setUp + + Logger.info("Choose a vnet.") + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + + Logger.info("Create a new list of endpoint(s).") + end_point_list = [] + for _ in range(2): + end_point_list.append( + ecmp_utils.get_ip_address( + af=ecmp_utils.get_outer_layer_version(encap_type), + netid=NEXTHOP_PREFIX)) + + Logger.info("Create a new destination") + tc4_new_dest = ecmp_utils.get_ip_address( + af=ecmp_utils.get_payload_version(encap_type), + netid=DESTINATION_PREFIX) + + Logger.info("Map the new destination and the new endpoint(s).") + self.setup[encap_type]['dest_to_nh_map'][vnet][tc4_new_dest] = \ + end_point_list + + Logger.info("Create a new config and Copy to the DUT.") + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + tc4_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + end_point_list, + "SET", + bfd=self.setup['enable_bfd']) + # Only a1 is up, bfd-responder will not respond to a2. + self.update_monitor_list( + self.setup['enable_bfd'], + encap_type, + end_point_list[0]) + self.update_down_list( + self.setup['enable_bfd'], + encap_type, + end_point_list[1]) + + Logger.info("Verify that the new config takes effect and run traffic.") + self.dump_self_info_and_run_ptf("tc_a2down_a1up", encap_type, True) + + @pytest.mark.skipif( + "config.option.bfd is False", + reason="This test will be run only if '--bfd=True' is provided.") + def test_vxlan_bfd_health_state_change_a1a2_down(self, setUp, encap_type): + ''' + Set BFD state for a1' to Down and a2' to Down. Send multiple + packets (varying tuple) to the route 1's prefix dst. Packets + are not received at any ports. Verify advertise table is removed. + ''' + self.setup = setUp + + Logger.info("Choose a vnet.") + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + + Logger.info("Create a new list of endpoint(s).") + end_point_list = [] + for _ in range(2): + end_point_list.append(ecmp_utils.get_ip_address( + af=ecmp_utils.get_outer_layer_version(encap_type), + netid=NEXTHOP_PREFIX)) + + Logger.info("Create a new destination") + tc4_new_dest = ecmp_utils.get_ip_address( + af=ecmp_utils.get_payload_version(encap_type), + netid=DESTINATION_PREFIX) + + Logger.info("Map the new destination and the new endpoint(s).") + self.setup[encap_type]['dest_to_nh_map'][vnet][tc4_new_dest] = \ + end_point_list + + Logger.info("Create a new config and Copy to the DUT.") + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + tc4_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + end_point_list, + "SET", + bfd=self.setup['enable_bfd']) + # No adding to the monitor_list. + self.update_down_list( + self.setup['enable_bfd'], + encap_type, + end_point_list) + + Logger.info("Verify that the new config takes effect and run traffic.") + self.dump_self_info_and_run_ptf( + "a1a2_down", + encap_type, + True, + packet_count=4) + + @pytest.mark.skipif( + "config.option.bfd is False", + reason="This test will be run only if '--bfd=True' is provided.") + def test_vxlan_bfd_health_state_change_a2up_a1down( + self, setUp, encap_type): + ''' + Set BFD state for a2' to UP. Send packets to the route 1's prefix + dst. Packets are received only at endpoint a2. Verify advertise + table is present + ''' + self.setup = setUp + + Logger.info("Choose a vnet.") + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + + Logger.info("Create a new list of endpoint(s).") + end_point_list = [] + for _ in range(2): + end_point_list.append( + ecmp_utils.get_ip_address( + af=ecmp_utils.get_outer_layer_version(encap_type), + netid=NEXTHOP_PREFIX)) + + Logger.info("Create a new destination") + tc4_new_dest = ecmp_utils.get_ip_address( + af=ecmp_utils.get_payload_version(encap_type), + netid=DESTINATION_PREFIX) + + Logger.info("Map the new destination and the new endpoint(s).") + self.setup[encap_type]['dest_to_nh_map'][vnet][tc4_new_dest] = \ + end_point_list + + Logger.info("Create a new config and Copy to the DUT.") + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + tc4_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + end_point_list, + "SET", + bfd=self.setup['enable_bfd']) + # Only a2 is up, but a1 is down. + self.update_monitor_list( + self.setup['enable_bfd'], + encap_type, + end_point_list[1]) + self.update_down_list( + self.setup['enable_bfd'], + encap_type, + end_point_list[0]) + + Logger.info("Verify that the new config takes effect and run traffic.") + self.dump_self_info_and_run_ptf("a2up_a1down", encap_type, True) + + def test_vxlan_bfd_health_state_change_a1a2_up(self, setUp, encap_type): + ''' + Set BFD state for a1' & a2' to UP. Send multiple packets (varying + tuple) to the route 1's prefix dst. Packets are received at both + a1 and a2. Verify advertise table is present + ''' + self.setup = setUp + + Logger.info("Choose a vnet.") + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + + Logger.info("Create a new list of endpoint(s).") + end_point_list = [] + for _ in range(2): + end_point_list.append(ecmp_utils.get_ip_address( + af=ecmp_utils.get_outer_layer_version(encap_type), + netid=NEXTHOP_PREFIX)) + + Logger.info("Create a new destination") + tc4_new_dest = ecmp_utils.get_ip_address( + af=ecmp_utils.get_payload_version(encap_type), + netid=DESTINATION_PREFIX) + + Logger.info("Map the new destination and the new endpoint(s).") + self.setup[encap_type]['dest_to_nh_map'][vnet][tc4_new_dest] = \ + end_point_list + + Logger.info("Create a new config and Copy to the DUT.") + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + tc4_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + end_point_list, + "SET", + bfd=self.setup['enable_bfd']) + self.update_monitor_list( + self.setup['enable_bfd'], + encap_type, + end_point_list) + + Logger.info("Verify that the new config takes effect and run traffic.") + + self.dump_self_info_and_run_ptf("tc4", encap_type, True) + + # perform cleanup by removing all the routes added by this test class. + # reset to add only the routes added in the setup phase. + ecmp_utils.set_routes_in_dut( + self.setup['duthost'], + self.setup[encap_type]['dest_to_nh_map'], + ecmp_utils.get_payload_version(encap_type), + "DEL") + + self.setup[encap_type]['dest_to_nh_map'] = copy.deepcopy(self.setup[encap_type]['dest_to_nh_map_orignal']) # noqa F821 + ecmp_utils.set_routes_in_dut( + self.setup['duthost'], + self.setup[encap_type]['dest_to_nh_map'], + ecmp_utils.get_payload_version(encap_type), + "SET") + + +class Test_VxLAN_NHG_Modify(Test_VxLAN): + ''' + Class for all the next-hop group modification testcases. + ''' + def setup_route2_single_endpoint(self, encap_type): + ''' + Function to handle dependency of tc9 on tc8. + ''' + if self.setup[encap_type].get('tc8_dest', None): + return + + Logger.info("Pick a vnet for testing.") + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + + Logger.info( + "Choose a route 2 destination and a new single endpoint for it.") + tc8_new_dest = self.setup[encap_type]['dest_to_nh_map'][vnet].keys()[0] + tc8_new_nh = ecmp_utils.get_ip_address( + af=ecmp_utils.get_outer_layer_version(encap_type), + netid=NEXTHOP_PREFIX) + self.setup[encap_type]['dest_to_nh_map'][vnet][tc8_new_dest] = \ + [tc8_new_nh] + Logger.info( + "Using destinations: dest:%s => nh:%s", + tc8_new_dest, + tc8_new_nh) + + Logger.info("Map the destination and new endpoint.") + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + tc8_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + [tc8_new_nh], + "SET", + bfd=self.setup['enable_bfd']) + self.update_monitor_list( + self.setup['enable_bfd'], + encap_type, + tc8_new_nh) + + Logger.info("Apply the new config in the DUT and run traffic test.") + self.setup[encap_type]['tc8_dest'] = tc8_new_dest + + def setup_route2_shared_endpoints(self, encap_type): + ''' + Function to handle dependency of tc10 on tc9 + ''' + if self.setup[encap_type].get('tc9_dest', None): + return + self.setup_route2_single_endpoint(encap_type) + + Logger.info("Choose a vnet for testing.") + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + + Logger.info( + "Select 2 already existing destinations. " + "They must have 2 different nexthops.") + tc9_new_dest1 = self.setup[encap_type]['tc8_dest'] + nh1 = self.setup[encap_type]['dest_to_nh_map'][vnet][tc9_new_dest1][0] + + nh2 = None + for dest in self.setup[encap_type]['dest_to_nh_map'][vnet].keys(): + nexthops = self.setup[encap_type]['dest_to_nh_map'][vnet][dest] + for nh in nexthops: + if nh == nh1: + continue + else: + nh2 = nh + break + if nh2: + Logger.info( + "Using destinations: dest:%s, nexthops:%s, %s", + tc9_new_dest1, + nh1, + nh2) + else: + raise RuntimeError( + "Couldnot find different nexthop for this test." + "The current list: {}".format( + self.setup[encap_type]['dest_to_nh_map'])) + + Logger.info( + "Use the selected nexthops(tunnel endpoints)." + "They are guaranteed to be different.") + tc9_new_nhs = [nh1, nh2] + + Logger.info("Map the destination 1 to the combined list.") + self.setup[encap_type]['dest_to_nh_map'][vnet][tc9_new_dest1] = \ + tc9_new_nhs + + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + tc9_new_dest1, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc9_new_nhs, + "SET", + bfd=self.setup['enable_bfd']) + self.update_monitor_list( + self.setup['enable_bfd'], + encap_type, + tc9_new_nhs) + + self.setup[encap_type]['tc9_dest'] = tc9_new_dest1 + + def setup_route2_shared_different_endpoints(self, encap_type): + ''' + Function to handle dependency of tc9.2 on tc9 + ''' + if self.setup[encap_type].get('tc9_dest', None): + return + self.setup_route2_single_endpoint(encap_type) + + Logger.info("Choose a vnet for testing.") + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + + Logger.info( + "Select 2 already existing destinations. " + "They must have 2 different nexthops.") + tc9_new_dest1 = self.setup[encap_type]['tc8_dest'] + old_nh = \ + self.setup[encap_type]['dest_to_nh_map'][vnet][tc9_new_dest1][0] + + nh1 = None + nh2 = None + for dest in self.setup[encap_type]['dest_to_nh_map'][vnet].keys(): + nexthops = self.setup[encap_type]['dest_to_nh_map'][vnet][dest] + for nh in nexthops: + if nh == old_nh: + continue + else: + if not nh1: + nh1 = nh + elif not nh2: + if nh != nh1: + nh2 = nh + break + if nh2: + Logger.info( + "Using destinations: dest:%s, nexthops:%s, %s", + tc9_new_dest1, + nh1, + nh2) + else: + raise RuntimeError( + "Couldnot find different nexthop for this test." + "The current list: {}".format( + self.setup[encap_type]['dest_to_nh_map'])) + + Logger.info( + "Use the selected nexthops(tunnel endpoints)." + "They are guaranteed to be different.") + tc9_new_nhs = [nh1, nh2] + + Logger.info("Map the destination 1 to the combined list.") + self.setup[encap_type]['dest_to_nh_map'][vnet][tc9_new_dest1] = \ + tc9_new_nhs + self.setup[encap_type]['tc9_dest'] = tc9_new_dest1 + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + tc9_new_dest1, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc9_new_nhs, + "SET", + bfd=self.setup['enable_bfd']) + self.update_monitor_list( + self.setup['enable_bfd'], + encap_type, + tc9_new_nhs) + + def test_vxlan_remove_route2(self, setUp, encap_type): + ''' + tc7:send packets to route 1's prefix dst. by removing route 2 from + group a, no change expected to route 1. + ''' + self.setup = setUp + + Logger.info("Pick a vnet for testing.") + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + + Logger.info( + "Setup: Create two destinations with the same endpoint group.") + tc7_end_point_list = [] + for _ in range(2): + tc7_end_point_list.append(ecmp_utils.get_ip_address( + af=ecmp_utils.get_outer_layer_version(encap_type), + netid=NEXTHOP_PREFIX)) + + tc7_destinations = [] + for _ in range(2): + tc7_destinations.append(ecmp_utils.get_ip_address( + af=ecmp_utils.get_payload_version(encap_type), + netid=DESTINATION_PREFIX)) + dest_nh_map = self.setup[encap_type]['dest_to_nh_map'] + Logger.info("Map the new destinations to the same endpoint list.") + for i in range(2): + dest_nh_map[vnet][tc7_destinations[i]] = \ + tc7_end_point_list + + Logger.info("Apply the setup configs to the DUT.") + payload_af = ecmp_utils.get_payload_version(encap_type) + for i in range(2): + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + tc7_destinations[i], + ecmp_utils.HOST_MASK[payload_af], + tc7_end_point_list, + "SET", + bfd=self.setup['enable_bfd']) + + self.update_monitor_list( + self.setup['enable_bfd'], + encap_type, + tc7_end_point_list) + Logger.info("Verify the setup works.") + self.dump_self_info_and_run_ptf("tc7", encap_type, True) + Logger.info("End of setup.") + + Logger.info("Remove one of the routes.") + Logger.info("Pick one out of the two TC7 destinations.") + tc7_removed_dest = tc7_destinations[0] + tc7_removed_endpoint = \ + self.setup[encap_type]['dest_to_nh_map'][vnet][tc7_removed_dest] + del self.setup[encap_type]['dest_to_nh_map'][vnet][tc7_removed_dest] + + Logger.info("Remove the chosen dest/endpoint from the DUT.") + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + tc7_removed_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc7_removed_endpoint, + "DEL") + + Logger.info("Verify the rest of the traffic still works.") + self.dump_self_info_and_run_ptf("tc7", encap_type, True) + + def test_vxlan_route2_single_nh(self, setUp, encap_type): + ''' + tc8: set tunnel route 2 to single endpoint b1. + Send packets to route 2's prefix dst. + ''' + self.setup = setUp + self.setup_route2_single_endpoint(encap_type) + self.dump_self_info_and_run_ptf("tc8", encap_type, True) + + def test_vxlan_route2_shared_nh(self, setUp, encap_type): + ''' + tc9: set tunnel route 2 to shared endpoints a1 and b1. + Send packets to route 2's + prefix dst. + ''' + self.setup = setUp + self.setup_route2_shared_endpoints(encap_type) + self.dump_self_info_and_run_ptf("tc9", encap_type, True) + + def test_vxlan_route2_shared_different_nh(self, setUp, encap_type): + ''' + tc9.2: set tunnel route 2 to 2 completely different + shared(no-reuse) endpoints a1 and b1. send packets + to route 2's prefix dst. + ''' + self.setup = setUp + self.setup_route2_shared_different_endpoints(encap_type) + self.dump_self_info_and_run_ptf("tc9.2", encap_type, True) + + def test_vxlan_remove_ecmp_route2(self, setUp, encap_type): + ''' + tc10: remove tunnel route 2. send packets to route 2's prefix dst. + ''' + self.setup = setUp + self.setup_route2_shared_endpoints(encap_type) + Logger.info("Backup the current route config.") + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + full_map = self.setup[encap_type]['dest_to_nh_map'][vnet].copy() + payload_af = ecmp_utils.get_payload_version(encap_type) + + Logger.info( + "This is to keep track if the selected route " + "should be deleted in the end.") + del_needed = False + try: + Logger.info("Choose a vnet for testing.") + + Logger.info("Choose a destination and its nhs to delete.") + tc10_dest = self.setup[encap_type]['tc9_dest'] + tc10_nhs = \ + self.setup[encap_type]['dest_to_nh_map'][vnet][tc10_dest] + Logger.info( + "Using destination: dest:%s, nh:%s", + tc10_dest, + tc10_nhs) + + Logger.info("Delete the dest and nh in the DUT.") + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + tc10_dest, + ecmp_utils.HOST_MASK[payload_af], + tc10_nhs, + "DEL") + + del_needed = True + + Logger.info( + "We should pass only the deleted entry to the ptf call," + "and expect encap to fail.") + Logger.info( + "Clear out the mappings, and keep only " + "the deleted dest and nhs.") + self.setup[encap_type]['dest_to_nh_map'][vnet] = {} + self.setup[encap_type]['dest_to_nh_map'][vnet][tc10_dest] =\ + tc10_nhs + + Logger.info("The deleted route should fail to receive traffic.") + self.dump_self_info_and_run_ptf("tc10", encap_type, False) + + # all others should be working. + # Housekeeping: + Logger.info("Restore the mapping of dest->nhs.") + self.setup[encap_type]['dest_to_nh_map'][vnet] = full_map.copy() + Logger.info("Remove the deleted entry alone.") + del self.setup[encap_type]['dest_to_nh_map'][vnet][tc10_dest] + del_needed = False + + Logger.info("Check the traffic is working in the other routes.") + self.dump_self_info_and_run_ptf("tc10", encap_type, True) + + except BaseException: + self.setup[encap_type]['dest_to_nh_map'][vnet] = full_map.copy() + Logger.info("Remove the deleted entry alone.") + if del_needed: + del self.setup[encap_type]['dest_to_nh_map'][vnet][tc10_dest] + raise + + +@pytest.mark.skipif( + "config.option.include_long_tests is False", + reason="This test will be run only if " + "'--include_long_tests=True' is provided.") +class Test_VxLAN_ecmp_random_hash(Test_VxLAN): + ''' + Class for testing different tcp ports for payload. + ''' + def test_vxlan_random_hash(self, setUp, encap_type): + ''' + tc11: set tunnel route 3 to endpoint group c = {c1, c2, c3}. + Ensure c1, c2, and c3 matches to underlay default route. + Send 1000 pkt with random hash to route 3's prefix dst. + ''' + self.setup = setUp + + Logger.info("Chose a vnet for testing.") + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + + Logger.info("Create a new destination and 3 nhs for it.") + tc11_new_dest = ecmp_utils.get_ip_address( + af=ecmp_utils.get_payload_version(encap_type), + netid=DESTINATION_PREFIX) + + tc11_new_nhs = [] + for _ in range(3): + tc11_new_nhs.append(ecmp_utils.get_ip_address( + af=ecmp_utils.get_outer_layer_version(encap_type), + netid=NEXTHOP_PREFIX)) + + # the topology always provides the default routes for any ip address. + # so it is already taken care of. + + Logger.info("Map the new dest and nhs.") + self.setup[encap_type]['dest_to_nh_map'][vnet][tc11_new_dest] =\ + tc11_new_nhs + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + tc11_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc11_new_nhs, + "SET", + bfd=self.setup['enable_bfd']) + self.update_monitor_list( + self.setup['enable_bfd'], + encap_type, + tc11_new_nhs) + + Logger.info( + "Apply the config in the DUT and verify traffic. " + "The random hash and ECMP check is already taken care of in the " + "VxLAN PTF script.") + self.dump_self_info_and_run_ptf( + "tc11", + encap_type, + True, + packet_count=1000) + + +@pytest.mark.skipif( + "config.option.include_long_tests is False", + reason="This test will be run only if " + "'--include_long_tests=True' is provided.") +class Test_VxLAN_underlay_ecmp(Test_VxLAN): + ''' + Class for all test cases that modify the underlay default route. + ''' + @pytest.mark.parametrize("ecmp_path_count", [1, 2]) + def test_vxlan_modify_underlay_default( + self, setUp, minigraph_facts, encap_type, ecmp_path_count): + ''' + tc12: modify the underlay default route nexthop/s. send packets to + route 3's prefix dst. + ''' + self.setup = setUp + ''' + First step: pick one or two of the interfaces connected to t2, and + bring them down. verify that the encap is still working, and ptf + receives the traffic. Bring them back up. + After that, bring down all the other t2 interfaces, other than + the ones used in the first step. This will force a modification + to the underlay default routes nexthops. + ''' + + all_t2_intfs = list(ecmp_utils.get_portchannels_to_neighbors( + self.setup['duthost'], + "T2", + minigraph_facts)) + + if not all_t2_intfs: + all_t2_intfs = ecmp_utils.get_ethernet_to_neighbors( + "T2", + minigraph_facts) + Logger.info("Dumping T2 link info: %s", all_t2_intfs) + if not all_t2_intfs: + raise RuntimeError( + "No interface found connected to t2 neighbors. " + "pls check the testbed, aborting.") + + # Keep a copy of the internal housekeeping list of t2 ports. + # This is the full list of DUT ports connected to T2 neighbors. + # It is one of the arguments to the ptf code. + all_t2_ports = list(self.setup[encap_type]['t2_ports']) + + # A distinction in this script between ports and interfaces: + # Ports are physical (Ethernet) only. + # Interfaces have IP address(Ethernet or PortChannel). + try: + selected_intfs = [] + # Choose some intfs based on the parameter ecmp_path_count. + # when ecmp_path_count == 1, it is non-ecmp. The switching + # happens between ecmp and non-ecmp. Otherwise, the switching + # happens within ecmp only. + for i in range(ecmp_path_count): + selected_intfs.append(all_t2_intfs[i]) + + for intf in selected_intfs: + self.setup['duthost'].shell( + "sudo config interface shutdown {}".format(intf)) + downed_ports = ecmp_utils.get_corresponding_ports( + selected_intfs, + minigraph_facts) + self.setup[encap_type]['t2_ports'] = \ + list(set(all_t2_ports) - set(downed_ports)) + downed_bgp_neighbors = ecmp_utils.get_downed_bgp_neighbors( + selected_intfs, minigraph_facts) + pytest_assert( + wait_until( + 300, + 30, + 0, + ecmp_utils.bgp_established, + self.setup['duthost'], + down_list=downed_bgp_neighbors), + "BGP neighbors didn't come up after all " + "interfaces have been brought up.") + time.sleep(10) + self.dump_self_info_and_run_ptf( + "tc12", + encap_type, + True, + packet_count=1000) + + Logger.info( + "Reverse the action: bring up the selected_intfs" + " and shutdown others.") + for intf in selected_intfs: + self.setup['duthost'].shell( + "sudo config interface startup {}".format(intf)) + Logger.info("Shutdown other interfaces.") + remaining_interfaces = list( + set(all_t2_intfs) - set(selected_intfs)) + for intf in remaining_interfaces: + self.setup['duthost'].shell( + "sudo config interface shutdown {}".format(intf)) + downed_bgp_neighbors = ecmp_utils.get_downed_bgp_neighbors( + remaining_interfaces, + minigraph_facts) + pytest_assert( + wait_until( + 300, + 30, + 0, + ecmp_utils.bgp_established, + self.setup['duthost'], + down_list=downed_bgp_neighbors), + "BGP neighbors didn't come up after all interfaces have been" + "brought up.") + self.setup[encap_type]['t2_ports'] = \ + ecmp_utils.get_corresponding_ports( + selected_intfs, + minigraph_facts) + + ''' + Need to update the bfd_responder to listen only on the sub-set of + T2 ports that are active. If we still receive packets on the + downed ports, we have a problem! + ''' + ecmp_utils.update_monitor_file( + self.setup['ptfhost'], + self.setup['monitor_file'], + self.setup[encap_type]['t2_ports'], + list(self.setup['list_of_bfd_monitors'])) + time.sleep(10) + self.dump_self_info_and_run_ptf( + "tc12", + encap_type, + True, + packet_count=1000) + + Logger.info("Recovery. Bring all up, and verify traffic works.") + for intf in all_t2_intfs: + self.setup['duthost'].shell( + "sudo config interface startup {}".format(intf)) + Logger.info("Wait for all bgp is up.") + pytest_assert( + wait_until( + 300, + 30, + 0, + ecmp_utils.bgp_established, + self.setup['duthost']), + "BGP neighbors didn't come up after " + "all interfaces have been brought up.") + Logger.info("Verify traffic flows after recovery.") + self.setup[encap_type]['t2_ports'] = all_t2_ports + ecmp_utils.update_monitor_file( + self.setup['ptfhost'], + self.setup['monitor_file'], + self.setup[encap_type]['t2_ports'], + list(self.setup['list_of_bfd_monitors'])) + time.sleep(10) + self.dump_self_info_and_run_ptf( + "tc12", + encap_type, + True, + packet_count=1000) + + except Exception: + # If anything goes wrong in the try block, atleast bring the intf + # back up. + self.setup[encap_type]['t2_ports'] = all_t2_ports + ecmp_utils.update_monitor_file( + self.setup['ptfhost'], + self.setup['monitor_file'], + self.setup[encap_type]['t2_ports'], + list(self.setup['list_of_bfd_monitors'])) + for intf in all_t2_intfs: + self.setup['duthost'].shell( + "sudo config interface startup {}".format(intf)) + pytest_assert( + wait_until( + 300, + 30, + 0, + ecmp_utils.bgp_established, + self.setup['duthost']), + "BGP neighbors didn't come up after all interfaces " + "have been brought up.") + raise + + def test_vxlan_remove_add_underlay_default(self, + setUp, + minigraph_facts, + encap_type): + ''' + tc13: remove the underlay default route. + tc14: add the underlay default route. + ''' + self.setup = setUp + Logger.info( + "Find all the underlay default routes' interfaces. This means all " + "T2 interfaces.") + all_t2_intfs = list(ecmp_utils.get_portchannels_to_neighbors( + self.setup['duthost'], + "T2", + minigraph_facts)) + if not all_t2_intfs: + all_t2_intfs = ecmp_utils.get_ethernet_to_neighbors( + "T2", + minigraph_facts) + Logger.info("Dumping T2 link info: %s", all_t2_intfs) + if not all_t2_intfs: + raise RuntimeError( + "No interface found connected to t2 neighbors." + "Pls check the testbed, aborting.") + try: + Logger.info("Bring down the T2 interfaces.") + for intf in all_t2_intfs: + self.setup['duthost'].shell( + "sudo config interface shutdown {}".format(intf)) + downed_bgp_neighbors = ecmp_utils.get_downed_bgp_neighbors( + all_t2_intfs, + minigraph_facts) + pytest_assert( + wait_until( + 300, + 30, + 0, + ecmp_utils.bgp_established, + self.setup['duthost'], + down_list=downed_bgp_neighbors), + "BGP neighbors have not reached the required state after " + "T2 intf are shutdown.") + Logger.info("Verify that traffic is not flowing through.") + self.dump_self_info_and_run_ptf("tc13", encap_type, False) + + # tc14: Re-add the underlay default route. + Logger.info("Bring up the T2 interfaces.") + for intf in all_t2_intfs: + self.setup['duthost'].shell( + "sudo config interface startup {}".format(intf)) + Logger.info("Wait for all bgp is up.") + pytest_assert( + wait_until( + 300, + 30, + 0, + ecmp_utils.bgp_established, + self.setup['duthost']), + "BGP neighbors didn't come up after all interfaces" + " have been brought up.") + Logger.info("Verify the traffic is flowing through, again.") + self.dump_self_info_and_run_ptf( + "tc14", + encap_type, + True, + packet_count=1000) + except Exception: + Logger.info( + "If anything goes wrong in the try block," + " atleast bring the intf back up.") + for intf in all_t2_intfs: + self.setup['duthost'].shell( + "sudo config interface startup {}".format(intf)) + pytest_assert( + wait_until( + 300, + 30, + 0, + ecmp_utils.bgp_established, + self.setup['duthost']), + "BGP neighbors didn't come up after all" + " interfaces have been brought up.") + raise + + def test_underlay_specific_route(self, setUp, minigraph_facts, encap_type): + ''' + Create a more specific underlay route to c1. + Verify c1 packets are received only on the c1's nexthop interface + ''' + self.setup = setUp + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + endpoint_nhmap = self.setup[encap_type]['dest_to_nh_map'][vnet] + backup_t2_ports = self.setup[encap_type]['t2_ports'] + # Gathering all T2 Neighbors + all_t2_neighbors = ecmp_utils.get_all_bgp_neighbors( + minigraph_facts, + "T2") + + # Choosing a specific T2 Neighbor to add static route + t2_neighbor = all_t2_neighbors.keys()[0] + + # Gathering PTF indices corresponding to specific T2 Neighbor + ret_list = ecmp_utils.gather_ptf_indices_t2_neighbor( + minigraph_facts, + all_t2_neighbors, + t2_neighbor, + encap_type) + outer_layer_version = ecmp_utils.get_outer_layer_version(encap_type) + ''' + Addition & Modification of static routes - endpoint_nhmap will be + prefix to endpoint mapping. Static routes are added towards + endpoint with T2 VM's ip as nexthop + ''' + gateway = all_t2_neighbors[t2_neighbor][outer_layer_version].lower() + for _, nexthops in endpoint_nhmap.items(): + for nexthop in nexthops: + if outer_layer_version == "v6": + vtysh_config_commands = [] + vtysh_config_commands.append("ipv6 route {}/{} {}".format( + nexthop, + "64", + gateway)) + vtysh_config_commands.append("ipv6 route {}/{} {}".format( + nexthop, + "68", + gateway)) + self.setup['duthost'].copy( + content="\n".join(vtysh_config_commands), + dest="/tmp/specific_route_v6.txt") + self.setup['duthost'].command( + "docker cp /tmp/specific_route_v6.txt bgp:/") + self.setup['duthost'].command( + "vtysh -f /specific_route_v6.txt") + elif outer_layer_version == "v4": + static_route = [] + static_route.append( + "sudo config route add prefix {}/{} nexthop {}".format( + ".".join(nexthop.split(".")[:-1])+".0", "24", + gateway)) + static_route.append( + "sudo config route add prefix {}/{} nexthop {}".format( + nexthop, + ecmp_utils.HOST_MASK[outer_layer_version], + gateway)) + + self.setup['duthost'].shell_cmds(cmds=static_route) + self.setup[encap_type]['t2_ports'] = ret_list + + ''' + Traffic verification to see if specific route is preferred before + deletion of static route + ''' + self.dump_self_info_and_run_ptf( + "underlay_specific_route", + encap_type, + True) + # Deletion of all static routes + gateway = all_t2_neighbors[t2_neighbor][outer_layer_version].lower() + for _, nexthops in endpoint_nhmap.items(): + for nexthop in nexthops: + if ecmp_utils.get_outer_layer_version(encap_type) == "v6": + vtysh_config_commands = [] + vtysh_config_commands.append( + "no ipv6 route {}/{} {}".format( + nexthop, "64", gateway)) + vtysh_config_commands.append( + "no ipv6 route {}/{} {}".format( + nexthop, "68", gateway)) + self.setup['duthost'].copy( + content="\n".join(vtysh_config_commands), + dest="/tmp/specific_route_v6.txt") + self.setup['duthost'].command( + "docker cp /tmp/specific_route_v6.txt bgp:/") + self.setup['duthost'].command( + "vtysh -f /specific_route_v6.txt") + + elif ecmp_utils.get_outer_layer_version(encap_type) == "v4": + static_route = [] + static_route.append( + "sudo config route del prefix {}/{} nexthop {}".format( + ".".join( + nexthop.split(".")[:-1])+".0", "24", gateway)) + static_route.append( + "sudo config route del prefix {}/{} nexthop {}".format( + nexthop, + ecmp_utils.HOST_MASK[outer_layer_version], + gateway)) + + self.setup['duthost'].shell_cmds(cmds=static_route) + self.setup[encap_type]['t2_ports'] = backup_t2_ports + + Logger.info( + "Allow some time for recovery of default route" + " after deleting the specific route.") + time.sleep(10) + + ''' + Traffic verification to see if default route is preferred after + deletion of static route + ''' + self.dump_self_info_and_run_ptf( + "underlay_specific_route", + encap_type, + True) + + def test_underlay_portchannel_shutdown(self, + setUp, + minigraph_facts, + encap_type): + ''' + Bring down one of the port-channels. + Packets are equally recieved at c1, c2 or c3 + ''' + self.setup = setUp + + # Verification of traffic before shutting down port channel + self.dump_self_info_and_run_ptf("tc12", encap_type, True) + + # Gathering all portchannels + all_t2_portchannel_intfs = \ + list(ecmp_utils.get_portchannels_to_neighbors( + self.setup['duthost'], + "T2", + minigraph_facts)) + all_t2_portchannel_members = {} + for each_pc in all_t2_portchannel_intfs: + all_t2_portchannel_members[each_pc] =\ + minigraph_facts['minigraph_portchannels'][each_pc]['members'] + + selected_portchannel = all_t2_portchannel_members.keys()[0] + + try: + # Shutting down the ethernet interfaces + for intf in all_t2_portchannel_members[selected_portchannel]: + self.setup['duthost'].shell( + "sudo config interface shutdown {}".format(intf)) + + all_t2_ports = list(self.setup[encap_type]['t2_ports']) + downed_ports = ecmp_utils.get_corresponding_ports( + all_t2_portchannel_members[selected_portchannel], + minigraph_facts) + self.setup[encap_type]['t2_ports'] = \ + list(set(all_t2_ports) - set(downed_ports)) + + # Verification of traffic + ecmp_utils.update_monitor_file( + self.setup['ptfhost'], + self.setup['monitor_file'], + self.setup[encap_type]['t2_ports'], + list(self.setup['list_of_bfd_monitors'])) + time.sleep(10) + self.dump_self_info_and_run_ptf("tc12", encap_type, True) + + for intf in all_t2_portchannel_members[selected_portchannel]: + self.setup['duthost'].shell( + "sudo config interface startup {}".format(intf)) + self.setup[encap_type]['t2_ports'] = all_t2_ports + ecmp_utils.update_monitor_file( + self.setup['ptfhost'], + self.setup['monitor_file'], + self.setup[encap_type]['t2_ports'], + list(self.setup['list_of_bfd_monitors'])) + time.sleep(10) + self.dump_self_info_and_run_ptf("tc12", encap_type, True) + except BaseException: + for intf in all_t2_portchannel_members[selected_portchannel]: + self.setup['duthost'].shell( + "sudo config interface startup {}".format(intf)) + self.setup[encap_type]['t2_ports'] = all_t2_ports + ecmp_utils.update_monitor_file( + self.setup['ptfhost'], + self.setup['monitor_file'], + self.setup[encap_type]['t2_ports'], + list(self.setup['list_of_bfd_monitors'])) + raise + + +@pytest.mark.skipif( + "config.option.include_long_tests is False", + reason="This test will be run only if" + "'--include_long_tests=True' is provided.") +class Test_VxLAN_entropy(Test_VxLAN): + ''' + Class for all test cases that modify the payload traffic + properties - tcp source port, destination port and source IP address. + ''' + def verify_entropy( + self, + encap_type, + random_sport=False, + random_dport=True, + random_src_ip=False, + tolerance=None): + ''' + Function to be reused by the entropy testcases. Sets up a couple of + endpoints on the top of the existing ones, and performs the traffic + test, with different payload variants. + ''' + + Logger.info("Choose a vnet.") + vnet = self.setup[encap_type]['dest_to_nh_map'].keys()[0] + Logger.info("Create a new list of endpoint(s).") + end_point_list = [] + for _ in range(2): + end_point_list.append(ecmp_utils.get_ip_address( + af=ecmp_utils.get_outer_layer_version(encap_type), + netid=NEXTHOP_PREFIX)) + Logger.info("Create a new destination") + new_dest = ecmp_utils.get_ip_address( + af=ecmp_utils.get_payload_version(encap_type), + netid=DESTINATION_PREFIX) + Logger.info("Map the new destination and the new endpoint(s).") + self.setup[encap_type]['dest_to_nh_map'][vnet][new_dest] = \ + end_point_list + Logger.info("Create a new config and Copy to the DUT.") + ecmp_utils.create_and_apply_config( + self.setup['duthost'], + vnet, + new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + end_point_list, + "SET", + bfd=self.setup['enable_bfd']) + self.update_monitor_list( + self.setup['enable_bfd'], + encap_type, + end_point_list) + Logger.info("Verify that the new config takes effect and run traffic.") + self.dump_self_info_and_run_ptf( + "entropy", + encap_type, + True, + random_sport=random_sport, + random_dport=random_dport, + random_src_ip=random_src_ip, + packet_count=1000, + tolerance=tolerance) + + def test_verify_entropy(self, setUp, encap_type): + ''' + Verification of entropy - Create tunnel route 4 to endpoint group A. + Send packets (fixed tuple) to route 4's prefix dst + ''' + self.setup = setUp + self.verify_entropy( + encap_type, + random_dport=True, + random_sport=True, + random_src_ip=True, + tolerance=0.75) # More tolerance since this varies entropy a lot. + + def test_vxlan_random_dst_port(self, setUp, encap_type): + ''' + Verification of entropy - Change the udp dst port of original packet to + route 4's prefix dst + ''' + self.setup = setUp + self.verify_entropy(encap_type, tolerance=0.03) + + def test_vxlan_random_src_port(self, setUp, encap_type): + ''' + Verification of entropy - Change the udp src port of original packet + to route 4's prefix dst + ''' + self.setup = setUp + self.verify_entropy( + encap_type, + random_dport=False, + random_sport=True, + tolerance=0.03) + + def test_vxlan_varying_src_ip(self, setUp, encap_type): + ''' + Verification of entropy - Change the udp src ip of original packet to + route 4's prefix dst + ''' + self.setup = setUp + self.verify_entropy( + encap_type, + random_dport=False, + random_src_ip=True, + tolerance=0.03) + + +class Test_VxLAN_ECMP_Priority_endpoints(Test_VxLAN): + ''' + Class for all the Vxlan tunnel cases where primary and secondary next hops are configured. + ''' + def test_vxlan_priority_single_pri_sec_switchover(self, setUp, encap_type): + ''' + tc4:create tunnel route 1 with two endpoints a = {a1, b1}. a1 is primary, b1 is secondary. + 1) both a1,b1 are UP. + 2) send packets to the route 1's prefix dst. packets are received at a1. + 3) bring a1 down. + 4) send packets to the route 1's prefix dst. packets are received at b1. + 5) bring both a1 and b1 down. + 6) No traffic is forwarded. + ''' + if encap_type in ['v4_in_v6', 'v6_in_v6']: + pytest.skip("Skipping test. v6 underlay is not supported in priority tunnels.") + self.setup = setUp + + Logger.info("Choose a vnet.") + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + + Logger.info("Create a new list of endpoint(s).") + tc1_end_point_list = [] + for _ in range(2): + tc1_end_point_list.append(ecmp_utils.get_ip_address( + af=ecmp_utils.get_outer_layer_version(encap_type), + netid=NEXTHOP_PREFIX)) + + Logger.info("Create a new destination") + tc1_new_dest = ecmp_utils.get_ip_address( + af=ecmp_utils.get_payload_version(encap_type), + netid=DESTINATION_PREFIX) + + Logger.info("Map the new destination and the new endpoint(s).") + self.setup[encap_type]['dest_to_nh_map'][vnet][tc1_new_dest] = \ + tc1_end_point_list + + Logger.info("Create a new priority endpoint config and Copy to the DUT.") + + Logger.info("Create the json and apply the config in the DUT swss.") + # The config looks like: + # [ + # { + # "VNET_ROUTE_TUNNEL_TABLE:vnet:tcx_new_dest/32": { + # "endpoint": "{tcx_end_point_list}" + # "endpoint_monitor": "{tcx_end_point_list}", + # "primary" : "{}", + # "adv_prefix" : "{}/{}", + # }, + # "OP": "{}" + # } + # ] + try: + ecmp_utils.create_and_apply_priority_config( + self.setup['duthost'], + vnet, + tc1_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc1_end_point_list, + [tc1_end_point_list[0]], + "SET") + # both primary secondary are up. + # only primary should recieve traffic. + time.sleep(2) + down_list = tc1_end_point_list[1] + if isinstance(down_list, str): + down_list = [down_list] + self.setup['list_of_downed_endpoints'] = set(down_list) + # setting both primary and secondary as up. only primary will recieve traffic. + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc1_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc1_end_point_list[0], "up") + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc1_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc1_end_point_list[1], "up") + time.sleep(10) + # verifying overlay_dmac + result = \ + self.setup['duthost'].shell("sonic-db-cli APPL_DB HGET 'VNET_MONITOR_TABLE:{}:{}/{}' 'overlay_dmac'" + .format( + tc1_end_point_list[0], + tc1_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)])) + assert str(result['stdout']) == ecmp_utils.OVERLAY_DMAC + + self.dump_self_info_and_run_ptf("test1", encap_type, True) + + # Single primary-secondary switchover. + # Endpoint list = [A, A`], Primary[A] | Active NH=[A] | + # Action: A went Down | Result NH=[A`] + # NH has a single primary endpoint which upon failing is replaced by the single Backup endpoint + Logger.info("Single primary-secondary switchover.") + time.sleep(2) + down_list = tc1_end_point_list[0] + if isinstance(down_list, str): + down_list = [down_list] + self.setup['list_of_downed_endpoints'] = set(down_list) + # setting primary down. only secondary will recieve traffic. + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc1_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc1_end_point_list[0], "down") + time.sleep(10) + self.dump_self_info_and_run_ptf("test1", encap_type, True) + + # Single primary recovery. + # Endpoint list = [A, A`], Primary[A] | Active NH=[A`] | + # Action: A is back up | ResultNH=[A] + # NH has a single backup endpoint which upon recovery of primary is replaced. + Logger.info("Single primary recovery.") + time.sleep(2) + down_list = tc1_end_point_list[1] + if isinstance(down_list, str): + down_list = [down_list] + self.setup['list_of_downed_endpoints'] = set(down_list) + # setting primary up. only primary will recieve traffic. + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc1_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc1_end_point_list[0], "up") + time.sleep(10) + self.dump_self_info_and_run_ptf("test1", encap_type, True) + + # Single primary backup Failure. + # Endpoint list = [A, A`]. Primary[A]| Active NH=[A`] A is DOWN | + # Action: A` goes Down | result NH=[] + # No active Endpoint results in route being removed. + Logger.info("Single primary & backup Failure.") + down_list = tc1_end_point_list + if isinstance(down_list, str): + down_list = [down_list] + self.setup['list_of_downed_endpoints'] = set(down_list) + # setting both down. no traffic is recieved. + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc1_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc1_end_point_list[1], "down") + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc1_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc1_end_point_list[0], "down") + + time.sleep(10) + self.dump_self_info_and_run_ptf("test1", encap_type, True) + ecmp_utils.create_and_apply_priority_config( + self.setup['duthost'], + vnet, + tc1_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc1_end_point_list, + [tc1_end_point_list[0]], + "DEL") + self.setup[encap_type]['dest_to_nh_map'] = copy.deepcopy(self.setup[encap_type]['dest_to_nh_map_orignal']) # noqa F821 + + except Exception: + ecmp_utils.create_and_apply_priority_config( + self.setup['duthost'], + vnet, + tc1_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc1_end_point_list, + [tc1_end_point_list[0]], + "DEL") + + def test_vxlan_priority_multi_pri_sec_switchover(self, setUp, encap_type): + ''' + tc2:create tunnel route 1 with 6 endpoints a = {A, B, A`, B`}. A,B + are primary, A`,B` are secondary. + 1) All eps are up.A,B,A`,B` + 2) send packets to the route 1's prefix dst. packets are received at A,B. + 3) bring A down. + 4) send packets to the route 1's prefix dst. packets are received at B. + 5) bring B down. + 6) send packets to the route 1's prefix dst. packets are recieved at A`,B`. + 7) bring B` down. + 8) send packets to the route 1's prefix dst. packets are recieved at A`. + 9) bring A up. + 10) send packets to the route 1's prefix dst. packets are recieved at A. + 11) bring B, A`, B` up. + 12) send packets to the route 1's prefix dst. packets are recieved at A,B. + 13) Bring all endpoints down. + 14) no traffic being passed. + 15) bring A, B, A`,B` up. + 16) send packets to the route 1's prefix dst. packets are recieved at A,B. + 17) Bring all endpoints down. + 18) no traffic being passed. + 19) bring A`,B` up. + 20) send packets to the route 1's prefix dst. packets are recieved at A`,B`. + 21) bring A,B up. + 22) send packets to the route 1's prefix dst. packets are recieved at A, B. + ''' + if encap_type in ['v4_in_v6', 'v6_in_v6']: + pytest.skip("Skipping test. v6 underlay is not supported in priority tunnels.") + self.setup = setUp + + Logger.info("Choose a vnet.") + vnet = self.setup[encap_type]['vnet_vni_map'].keys()[0] + + Logger.info("Create a new list of endpoint(s).") + tc2_end_point_list = [] + for _ in range(4): + tc2_end_point_list.append(ecmp_utils.get_ip_address( + af=ecmp_utils.get_outer_layer_version(encap_type), + netid=NEXTHOP_PREFIX)) + + Logger.info("Create a new destination") + tc2_new_dest = ecmp_utils.get_ip_address( + af=ecmp_utils.get_payload_version(encap_type), + netid=DESTINATION_PREFIX) + + Logger.info("Map the new destination and the new endpoint(s).") + self.setup[encap_type]['dest_to_nh_map'][vnet][tc2_new_dest] = \ + tc2_end_point_list + + Logger.info("Create a new priority endpoint config and Copy to the DUT.") + + Logger.info("Create the json and apply the config in the DUT swss.") + # The config looks like: + # [ + # { + # "VNET_ROUTE_TUNNEL_TABLE:vnet:tcx_new_dest/32": { + # "endpoint": "{tcx_end_point_list}" + # "endpoint_monitor": "{tcx_end_point_list}", + # "primary" : "{tcx_end_point_list[0:len/2]}", + # "adv_prefix" : "{tcx_new_dest}/{32}", + # }, + # "OP": "{}" + # } + # ] + try: + primary_nhg = tc2_end_point_list[0:2] + secondary_nhg = tc2_end_point_list[2:4] + + ecmp_utils.create_and_apply_priority_config( + self.setup['duthost'], + vnet, + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc2_end_point_list, + primary_nhg, + "SET") + + time.sleep(5) + # Bringing all endpoints UP. + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc2_end_point_list[0], "up") + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc2_end_point_list[1], "up") + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc2_end_point_list[2], "up") + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc2_end_point_list[3], "up") + + # check all primary Eps are operational + inactive_list = list(secondary_nhg) + if isinstance(inactive_list, str): + inactive_list = [inactive_list] + self.setup['list_of_downed_endpoints'] = set(inactive_list) + time.sleep(10) + # ensure that the traffic is distributed to all 3 primary Endpoints. + self.dump_self_info_and_run_ptf("test2", encap_type, True) + + # Multiple primary backups. Single primary failure. + # Endpoint list = [A, B, A`, B`], Primary = [A, B] | active NH = [A, B] | + # Action: A goes Down | Result NH=[B] + # One of the primaries goes down. The others stay active. + time.sleep(2) + inactive_list = list(secondary_nhg) + inactive_list.append(primary_nhg[0]) + if isinstance(inactive_list, str): + inactive_list = [inactive_list] + self.setup['list_of_downed_endpoints'] = set(inactive_list) + # setting A down. B,C getting traffic. + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + primary_nhg[0], "down") + time.sleep(10) + self.dump_self_info_and_run_ptf("test2", encap_type, True) + + # Multiple primary backups. All primary failure. + # Endpoint list = [A, B, A`, B`] Primary = [A, B] | A is Down. active NH = [B] | + # Action: B goes Down. | Result: NH=[A`, B`] + # All the primaries are down. The backup endpoints are added to the NH group. + time.sleep(2) + inactive_list = list(primary_nhg) + if isinstance(inactive_list, str): + inactive_list = [inactive_list] + self.setup['list_of_downed_endpoints'] = set(inactive_list) + # setting C down, now all backups are up and recieving traffic. + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + primary_nhg[1], "down") + time.sleep(10) + self.dump_self_info_and_run_ptf("test2", encap_type, True) + + # Multiple primary backups. Backup Failure. + # Endpoint list = [A, B, A`, B`] Primary = [A, B] | + # A, B already Down. Active NH = [A`, B`] | + # Action: B` goes Down. | Result: NH=[A`] + # All the primaries are down. Failure of a backup endpoint shall result in its removal from NH. + time.sleep(2) + inactive_list = list(primary_nhg) + inactive_list.append(secondary_nhg[1]) + if isinstance(inactive_list, str): + inactive_list = [inactive_list] + self.setup['list_of_downed_endpoints'] = set(inactive_list) + # setting C` down, now A` and B` are up and recieving traffic. + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + secondary_nhg[1], "down") + time.sleep(10) + self.dump_self_info_and_run_ptf("test2", encap_type, True) + + # Multiple primary backups. Single primary recovery. + # Endpoint list = [A, B, A`, B`] Primary = [A, B] | Active NH = [A`] | + # Action: A is Up. B still Down | Result: NH=[A] + # Primary takes precedence and is added to the NH. All the backups are removed. + time.sleep(2) + inactive_list = list([primary_nhg[1]]) + inactive_list += secondary_nhg + if isinstance(inactive_list, str): + inactive_list = [inactive_list] + self.setup['list_of_downed_endpoints'] = set(inactive_list) + # setting A up. only A will recieve traffic. + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + primary_nhg[0], "up") + time.sleep(10) + self.dump_self_info_and_run_ptf("test2", encap_type, True) + + # Multiple primary backups. Multiple primary & backup recovery. + # Edpoint list = [A, B, A`, B`] Primary = [A, B] | Active NH = [A] | + # Action: A is Up. B also come up along with A` and B` | Result: NH=[A, B] + # Primary endpoints take precedence and are added to the NH. + time.sleep(2) + inactive_list = list(secondary_nhg) + if isinstance(inactive_list, str): + inactive_list = [inactive_list] + self.setup['list_of_downed_endpoints'] = set(inactive_list) + # setting B, C and C` up. only A,B,C will recieve traffic. + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + primary_nhg[1], "up") + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + secondary_nhg[1], "up") + + time.sleep(10) + self.dump_self_info_and_run_ptf("test2", encap_type, True) + + # Multiple primary backups. Multiple primary & backup all failure. + # Edpoint list = [A, B, A`, B`] Primary = [A, B] | Active NH = [A,B] | + # Action: All A, B, A`, B`, go down. | Result: NH=[] + # Route is removed, No traffic forwarded. + time.sleep(2) + inactive_list = list(tc2_end_point_list) + if isinstance(inactive_list, str): + inactive_list = [inactive_list] + self.setup['list_of_downed_endpoints'] = set(inactive_list) + # setting B, C and C` up. only A,B,C will recieve traffic. + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + primary_nhg[0], "down") + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + primary_nhg[1], "down") + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + secondary_nhg[0], "down") + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + secondary_nhg[1], "down") + time.sleep(10) + self.dump_self_info_and_run_ptf("test2", encap_type, True) + + # Multiple primary backups. Multiple primary & backup recovery. + # Edpoint list = [A, B, A`, B`] Primary = [A, B] | Active NH = [] | + # Action: A, B come up along with A` and B` | Result: NH=[A, B] + # Primary endpoints take precedence and are added to the NH. + time.sleep(2) + inactive_list = list(secondary_nhg) + if isinstance(inactive_list, str): + inactive_list = [inactive_list] + self.setup['list_of_downed_endpoints'] = set(inactive_list) + # setting B, C and C` up. only A,B,C will recieve traffic. + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + primary_nhg[0], "up") + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + primary_nhg[1], "up") + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + secondary_nhg[0], "up") + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + secondary_nhg[1], "up") + + time.sleep(10) + self.dump_self_info_and_run_ptf("test2", encap_type, True) + + # Multiple primary backups. Multiple primary & backup all failure 2. + # Edpoint list = [A, B, A`, B`] Primary = [A, B] | Active NH = [A,B] | + # Action: All A, B, A`, B`, go down. | Result: NH=[] + # Route is removed, No traffic forwarded. + time.sleep(2) + inactive_list = list(tc2_end_point_list) + if isinstance(inactive_list, str): + inactive_list = [inactive_list] + self.setup['list_of_downed_endpoints'] = set(inactive_list) + # setting B, C and C` up. only A,B,C will recieve traffic. + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + primary_nhg[0], "down") + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + primary_nhg[1], "down") + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + secondary_nhg[0], "down") + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + secondary_nhg[1], "down") + time.sleep(10) + self.dump_self_info_and_run_ptf("test2", encap_type, True) + + # Multiple primary backups. Multiple primary & backup recovery of secondary. + # Edpoint list = [A, B, A`, B`] Primary = [A, B] | Active NH = [] | + # Action: bring up A` and B` | Result: NH=[A`, B`] + # Primary endpoints take precedence and are added to the NH. + time.sleep(2) + inactive_list = list(primary_nhg) + if isinstance(inactive_list, str): + inactive_list = [inactive_list] + self.setup['list_of_downed_endpoints'] = set(inactive_list) + # setting B, C and C` up. only A,B,C will recieve traffic. + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + secondary_nhg[0], "up") + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + secondary_nhg[1], "up") + + time.sleep(10) + self.dump_self_info_and_run_ptf("test2", encap_type, True) + + # Multiple primary backups. Multiple primary & backup recovery of primary after secondary. + # Edpoint list = [A, B, A`, B`] Primary = [A, B] | Active NH = [A`, B`] | + # Action: bring up A and B | Result: NH=[A, B] + # Primary endpoints take precedence and are added to the NH. + time.sleep(2) + inactive_list = list(secondary_nhg) + if isinstance(inactive_list, str): + inactive_list = [inactive_list] + self.setup['list_of_downed_endpoints'] = set(inactive_list) + # setting B, C and C` up. only A,B,C will recieve traffic. + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + primary_nhg[0], "up") + ecmp_utils.set_vnet_monitor_state(self.setup['duthost'], + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + primary_nhg[1], "up") + + time.sleep(10) + self.dump_self_info_and_run_ptf("test2", encap_type, True) + + except Exception: + ecmp_utils.create_and_apply_priority_config( + self.setup['duthost'], + vnet, + tc2_new_dest, + ecmp_utils.HOST_MASK[ecmp_utils.get_payload_version(encap_type)], + tc2_end_point_list, + primary_nhg, + "DEL") diff --git a/tests/vxlan/vxlan_ecmp_utils.py b/tests/vxlan/vxlan_ecmp_utils.py new file mode 100644 index 00000000000..3f7f01b466b --- /dev/null +++ b/tests/vxlan/vxlan_ecmp_utils.py @@ -0,0 +1,922 @@ +''' + The functions used by test_vxlan_ecmp.py. Since there are plans to + seperate the test script to multiple files, we need a common location + for these functions. + Usage: + from tests.vxlan.ecmp_utils import Ecmp_Utils + my_own_ecmp_utils = Ecmp_Utils() + my_own_ecmp_utils.create_vxlan_tunnel(...) +''' + +from sys import getsizeof +import re +import time +import logging +from ipaddress import ip_address, IPv4Address, IPv6Address + +Logger = logging.getLogger(__name__) + + +class Ecmp_Utils(object): + ''' + Collection of functions that are used by the VxLAN scripts. + ''' + Address_Count = 0 + + # Some of the self.Constants used in this script. + Constants = {} + + # Mapping the version to the python module. + IP_TYPE = { + 'v4': IPv4Address, + 'v6': IPv6Address + } + + # Starting prefixes to be used for the destinations and End points. + DESTINATION_PREFIX = 150 + NEXTHOP_PREFIX = 100 + + # Scale values for CRM test cases + NHS_PER_DESTINATION = 8 + NUMBER_OF_AVAILABLE_NEXTHOPS = 4000 + NUMBER_OF_ECMP_NHS = 128000 + + # This is the list of encapsulations that will be tested in this script. + # v6_in_v4 means: V6 payload is encapsulated inside v4 outer layer. + # This list is used in many locations in the script. + SUPPORTED_ENCAP_TYPES = ['v4_in_v4', 'v4_in_v6', 'v6_in_v4', 'v6_in_v6'] + + # This is the mask values to use for destination + # in the vnet routes. + HOST_MASK = {'v4': 32, 'v6': 128} + + OVERLAY_DMAC = "25:35:45:55:65:75" + + def create_vxlan_tunnel(self, + duthost, + minigraph_data, + af, + tunnel_name=None, + src_ip=None): + ''' + Function to create a vxlan tunnel. The arguments: + duthost : The DUT ansible host object. + minigraph_data: minigraph facts from the dut host. + tunnel_name : A name for the Tunnel, default: tunnel_ + src_ip : Source ip address of the tunnel. It has to be a + local ip address in the DUT. Default: Loopback + ip address. + af : Address family : v4 or v6. + ''' + if tunnel_name is None: + tunnel_name = "tunnel_{}".format(af) + + if src_ip is None: + src_ip = self.get_dut_loopback_address(duthost, minigraph_data, af) + + config = '''{{ + "VXLAN_TUNNEL": {{ + "{}": {{ + "src_ip": "{}" + }} + }} + }}'''.format(tunnel_name, src_ip) + + self.apply_config_in_dut(duthost, config, name="vxlan_tunnel_" + af) + return tunnel_name + + def apply_config_in_dut(self, duthost, config, name="vxlan"): + ''' + The given json(config) will be copied to the DUT and loaded up. + ''' + if self.Constants['DEBUG']: + filename = "/tmp/" + name + ".json" + else: + filename = "/tmp/" + name + "-" + str(time.time()) + ".json" + duthost.copy(content=config, dest=filename) + duthost.shell("sudo config load {} -y".format(filename)) + time.sleep(1) + if not self.Constants['KEEP_TEMP_FILES']: + duthost.shell("rm {}".format(filename)) + + def get_dut_loopback_address(self, duthost, minigraph_data, af): + ''' + Returns the IP address of the Loopback interface in DUT, from + minigraph. + Arguments: + duthost : DUT Ansible Host object. + minigraph_data: Minigraph facts from the DUT. + af : Address Family(v4 or v6). + ''' + lo_ip = minigraph_data['minigraph_lo_interfaces'] + for intf in lo_ip: + if isinstance(ip_address(intf['addr']), self.IP_TYPE[af]): + return intf['addr'] + + raise RuntimeError( + "Couldnot find the {} loopback address" + "for the DUT:{} from minigraph.".format(af, duthost.hostname)) + + def select_required_interfaces( + self, duthost, number_of_required_interfaces, minigraph_data, af): + ''' + Pick the required number of interfaces to use for tests. + These interfaces will be selected based on if they are currently + running a established BGP. The interfaces will be picked from the T0 + facing side. + ''' + bgp_interfaces = self.get_all_interfaces_running_bgp( + duthost, + minigraph_data, + "T0") + + # Randomly pick the interface from the above list + list_of_bgp_ips = [] + for neigh_ip_address in bgp_interfaces: + if isinstance(ip_address(neigh_ip_address), self.IP_TYPE[af]): + list_of_bgp_ips.append(neigh_ip_address) + + ret_interface_list = [] + available_number = len(list_of_bgp_ips) + # Confirm there are enough interfaces (basicaly more than or equal + # to the number of vnets). + if available_number <= number_of_required_interfaces+1: + raise RuntimeError( + "There are not enough interfaces needed to perform the test. " + "We need atleast {} interfaces, but only {} are " + "available.".format( + number_of_required_interfaces+1, available_number)) + for index in range(number_of_required_interfaces): + neigh_ip_address = list_of_bgp_ips[index] + current_interface_name = bgp_interfaces[neigh_ip_address].keys()[0] + ret_interface_list.append(current_interface_name) + + if ret_interface_list: + return ret_interface_list + else: + raise RuntimeError( + "There is no Ethernet interface running BGP." + "Pls run this test on any T1 topology.") + + @classmethod + def get_portchannels_to_neighbors(cls, + duthost, + neighbor_type, + minigraph_data): + ''' + A function to get the list of portchannels connected to BGP + neighbors of given type(T0 or T2). It returns a list of + portchannels+minigraph_lag_facts_of_that portchannel. + Arguments: + duthost : DUT Ansible Host object + localhost : Localhost Ansible Host object. + neighbor_type: T0 or T2. + ''' + lag_facts = duthost.lag_facts(host=duthost.sonichost.mgmt_ip) + names = lag_facts['ansible_facts']['lag_facts']['names'] + lags = lag_facts['ansible_facts']['lag_facts']['lags'] + + return_list = {} + pattern = re.compile("{}$".format(neighbor_type)) + for pc_name in names: + port_struct = lags[pc_name]['po_config']['ports'] + if lags[pc_name]['po_intf_stat'] == "Up": + intf = port_struct.keys()[0] + neighbor = minigraph_data['minigraph_neighbors'][intf]['name'] + match = pattern.search(neighbor) + if match: + # We found an interface that has a given neighbor_type. + # Let us use this. + return_list[pc_name] = port_struct + + return return_list + + @classmethod + def get_ethernet_to_neighbors(cls, neighbor_type, minigraph_data): + ''' + A function to get the list of Ethernet interfaces connected to + BGP neighbors of given type(T0 or T2). It returns a list of ports. + Arguments: + duthost : DUT Ansible Host object + neighbor_type: T0 or T2. + ''' + + pattern = re.compile("{}$".format(neighbor_type)) + ret_list = [] + + for intf in minigraph_data['minigraph_neighbors']: + if pattern.search( + minigraph_data['minigraph_neighbors'][intf]['name']): + ret_list.append(intf) + + return ret_list + + def assign_intf_ip_address(self, selected_interfaces, af): + ''' + Calculate an ip address for the selected interfaces. It is just a + mapping. Nothing is configured. + ''' + intf_ip_map = {} + for intf in selected_interfaces: + address = self.get_ip_address( + af=af, hostid=self.Constants['DUT_HOSTID'], netid=201) + intf_ip_map[intf] = address + return intf_ip_map + + @classmethod + def get_all_interfaces_running_bgp(cls, + duthost, + minigraph_data, + neighbor_type): + ''' + Analyze the DUT for bgp and return the a structure that have BGP + neighbors. + ''' + bgp_neigh_list = duthost.bgp_facts()['ansible_facts']['bgp_neighbors'] + minigraph_ip_interfaces = minigraph_data['minigraph_interfaces'] +\ + minigraph_data['minigraph_portchannel_interfaces'] + peer_addr_map = {} + pattern = re.compile("{}$".format(neighbor_type)) + for index in minigraph_ip_interfaces: + peer_addr_map[index['peer_addr']] =\ + {index['attachto']: index['addr']} + + ret_list = {} + for index, entry in peer_addr_map.iteritems(): + if bgp_neigh_list[index]['state'] == 'established' and \ + pattern.search(bgp_neigh_list[index]['description']): + ret_list[index] = entry + + return ret_list + + def configure_vnet_neighbors(self, + duthost, + intf_to_ip_map, + minigraph_data, af): + ''' + setup the vnet neighbor ip addresses. + ''' + family = "IPv4" + if af == "v6": + family = "IPv6" + + return_dict = {} + + config_list = [] + for intf, addr in intf_to_ip_map.iteritems(): + # If the given address is "net.1", the return address is "net.101" + # THE ASSUMPTION HERE IS THAT THE DUT ADDRESSES ARE ENDING IN ".1". + # addr.decode is only in python2.7 + ptf_ip = "" + if hasattr(addr, 'decode'): + # python 2.7 + ptf_ip = str(ip_address(addr.decode())+100) + else: + # python 3 + ptf_ip = str(ip_address(addr)+100) + + if "Ethernet" in intf: + return_dict[intf] = ptf_ip + elif "PortChannel" in intf: + for member in self.get_ethernet_ports([intf], minigraph_data): + return_dict[member] = ptf_ip + + config_list.append('''"{}|{}": {{ + "family": "{}" + }}'''.format(intf, ptf_ip, family)) + + full_config = '''{ + "NEIGH": { + ''' + ",\n".join(config_list) + '''\n}\n}''' + + self.apply_config_in_dut(duthost, full_config, name="vnet_nbr_"+af) + return return_dict + + def create_vnets( + self, + duthost, + tunnel_name, + vnet_count=1, + scope=None, + vni_base=10000, + vnet_name_prefix="Vnet"): + ''' + Create the required number of vnets. + duthost : AnsibleHost data structure of the DUT. + tunnel_name : The VxLAN Tunnel name. + vnet_count : Number of vnets to configure. + scope : The value for "scope" argument in the config. + Only "default" is supported. Or it should not + be given at all. + vni_base : The starting number for VNI. + vnet_name_prefix : The prefix for the name of vnets. + ''' + return_dict = {} + scope_entry = "" + if scope: + scope_entry = '''"scope": "{}",\n'''.format(scope) + config_list = [] + for cnt in range(vnet_count): + name = vnet_name_prefix + "-" + str(cnt) + vni = vni_base+cnt + return_dict[name] = vni + config_list.append('''"{}": {{ + "vxlan_tunnel": "{}", + {}"vni": "{}", + "peer_list": "", + "overlay_dmac" : "{}" + }}'''.format(name, tunnel_name, scope_entry, vni, self.OVERLAY_DMAC)) + + full_config = '{\n"VNET": {' + ",\n".join(config_list) + '\n}\n}' + + self.apply_config_in_dut(duthost, full_config, "vnets_"+tunnel_name) + return return_dict + + def setup_vnet_intf(self, selected_interfaces, vnet_list, minigraph_data): + ''' + Setup the interface(or in other words associate the interface to + a Vnet. This will remove the ip address from the interfaces. + + selected_interfaces : The list of interfaces we decided to use. + vnet_list : The list of vnets to use. The list of vnets + and interfaces should be of same length. + minigraph_data : The minigraph_facts data from DUT. + ''' + if len(selected_interfaces) != len(vnet_list): + raise RuntimeError( + "Different number of interfaces and vnets, not supported yet") + + ret_list = {} + intf_config_list = [] + po_config_list = [] + for count, intf in enumerate(selected_interfaces): + config = (''' + "{}": {{ + "vnet_name": "{}" + }} + '''.format(intf, vnet_list[count])) + + if "Ethernet" in intf: + intf_config_list.append(config) + ret_list[intf] = vnet_list[count] + elif "PortChannel" in intf: + po_config_list.append(config) + for member in self.get_ethernet_ports([intf], minigraph_data): + ret_list[member] = vnet_list[count] + + return ret_list + + def configure_vxlan_switch(self, duthost, vxlan_port=4789, dutmac=None): + ''' + Configure the VxLAN parameters for the DUT. + This step is completely optional. + + duthost: AnsibleHost structure of the DUT. + vxlan_port : The UDP port to be used for VxLAN traffic. + dutmac : The mac address to be configured in the DUT. + ''' + if dutmac is None: + dutmac = "aa:bb:cc:dd:ee:ff" + + switch_config = ''' + [ + {{ + "SWITCH_TABLE:switch": {{ + "vxlan_port": "{}", + "vxlan_router_mac": "{}" + }}, + "OP": "SET" + }} + ] + '''.format(vxlan_port, dutmac) + self.apply_config_in_swss(duthost, switch_config, "vnet_switch") + + def apply_config_in_swss(self, duthost, config, name="swss_"): + ''' + Apply the given config data in the SWSS container of the DUT. + duthost: AnsibleHost structure of the DUT. + config : The config to be applied in the swss container. + name : The name of the config file to be created in the DUT. + ''' + if self.Constants['DEBUG']: + filename = name + ".json" + else: + filename = name + "-" + str(time.time()) + ".json" + + duthost.copy(content=config, dest="/tmp/{}".format(filename)) + duthost.shell( + 'docker exec -i swss swssconfig /dev/stdin < /tmp/{}'.format( + filename)) + Logger.info("Wait for %s seconds for the config to take effect.", + 0.0005*getsizeof(config) + 1) + time.sleep(int(0.0005*getsizeof(config)) + 1) + if not self.Constants['KEEP_TEMP_FILES']: + duthost.shell("rm /tmp/{}".format(filename)) + + def get_list_of_nexthops(self, number, af, prefix=100): + ''' + Get a list of IP addresses to be used as nexthops. This creates a + pool of dummy nexthops. The other functions can use this pool to + assign nexthops to different destinations. + number : Number of addresses we need. + af : Address Family (v4 or v6). + prefix : The first octet to be used for the addresses. + ''' + nexthop_list = [] + for _ in range(number): + nexthop_list.append( + self.get_ip_address(af=af, netid=prefix, hostid=10)) + return nexthop_list + + def create_vnet_routes( + self, + duthost, + vnet_list, + dest_af, + nh_af, + nhs_per_destination=1, + number_of_available_nexthops=100, + number_of_ecmp_nhs=1000, + dest_net_prefix=150, + nexthop_prefix=100, + bfd=False): + ''' + This configures the VNET_TUNNEL_ROUTES structure. It precalculates + the required number of destinations based on the given + "number_of_ecmp_nhs" and the "nhs_per_destination". + + inputs: + number_of_available_nexthops : Total number of unique + NextHops available for use. + nhs_per_destination : Number of ECMP nexthops to use + per destination. + number_of_ecmp_nhs : Maximum number of all NextHops + put together(for all + destinations). + ''' + if number_of_available_nexthops < nhs_per_destination: + raise RuntimeError( + "The number of available nexthops ip addresses is not enough " + "to cover even one destination. Pls rerun with " + "total_number_of_endpoints(%s) > ecmp_nhs_per_destination(%s)", + number_of_available_nexthops, nhs_per_destination) + + available_nexthops = self.get_list_of_nexthops( + number=number_of_available_nexthops, + af=nh_af, prefix=nexthop_prefix) + + number_of_destinations = int(number_of_ecmp_nhs / nhs_per_destination) + no_of_dests_per_vnet = int(number_of_destinations / len(vnet_list)) + available_nexthop_count = 0 + dest_to_nh_map = {} + for vnet in vnet_list: + for _ in range(no_of_dests_per_vnet): + dest = self.get_ip_address(af=dest_af, netid=dest_net_prefix) + my_nhs = [] + for _ in range(nhs_per_destination): + my_nhs.append( + available_nexthops[ + available_nexthop_count % + number_of_available_nexthops]) + available_nexthop_count = available_nexthop_count + 1 + if available_nexthop_count > number_of_ecmp_nhs: + break + + try: + dest_to_nh_map[vnet] + except KeyError: + dest_to_nh_map[vnet] = {} + dest_to_nh_map[vnet][dest] = my_nhs + + self.set_routes_in_dut(duthost, + dest_to_nh_map, + dest_af, + "SET", + bfd=bfd) + return dest_to_nh_map + + @classmethod + def get_outer_layer_version(cls, encap_type): + ''' + Short function to get the outer layer address family from the + encap type. + ''' + match = re.search("in_(v[46])", encap_type) + if match: + return match.group(1) + else: + raise RuntimeError( + "Invalid format for encap_type:{}".format(encap_type)) + + @classmethod + def get_payload_version(cls, encap_type): + ''' + Short function to get the inner layer address family from the + encap type. + ''' + match = re.search("(v[46])_in_v", encap_type) + if match: + return match.group(1) + else: + raise RuntimeError( + "Invalid format for encap_type:{}".format(encap_type)) + + def create_and_apply_config(self, + duthost, + vnet, + dest, + mask, + nhs, + op, + bfd=False): + ''' + Create a single destinatoin->endpoint list mapping, and configure + it in the DUT. + duthost : AnsibleHost structure for the DUT. + vnet : Name of the Vnet. + dest : IP(v4/v6) address of the destination. + mask : Dest netmask length. + nhs : Nexthop list(v4/v6). + op : Operation to be done : SET or DEL. + + ''' + config = self.create_single_route(vnet, dest, mask, nhs, op, bfd=bfd) + str_config = '[\n' + config + '\n]' + self.apply_config_in_swss(duthost, str_config, op + "_vnet_route") + + @classmethod + def create_single_route(cls, vnet, dest, mask, nhs, op, bfd=False): + ''' + Create a single route entry for vnet, for the given dest, through + the endpoints:nhs, op:SET/DEL + ''' + if bfd: + config = '''{{ + "VNET_ROUTE_TUNNEL_TABLE:{}:{}/{}": {{ + "endpoint": "{}", + "endpoint_monitor": "{}" + }}, + "OP": "{}" + }}'''.format(vnet, dest, mask, ",".join(nhs), ",".join(nhs), op) + + else: + config = '''{{ + "VNET_ROUTE_TUNNEL_TABLE:{}:{}/{}": {{ + "endpoint": "{}" + }}, + "OP": "{}" + }}'''.format(vnet, dest, mask, ",".join(nhs), op) + + return config + + def get_ip_address(self, af, hostid=1, netid=100): + ''' + Calculate an ip address from the given arguments. + af : Address Family. + hostid : The last octet. + netid : The first octet. + ''' + third_octet = self.Address_Count % 255 + second_octet = (self.Address_Count / 255) % 255 + first_octet = netid + (self.Address_Count / 65025) + self.Address_Count = self.Address_Count + 1 + if af == 'v4': + return "{}.{}.{}.{}".format( + first_octet, second_octet, third_octet, hostid) + if af == 'v6': + # :0: gets removed in the IPv6 addresses. + # Adding "a" to octets, to avoid it. + return "fddd:a{}:a{}::a{}:{}".format( + first_octet, second_octet, third_octet, hostid) + + def set_routes_in_dut(self, + duthost, + dest_to_nh_map, + dest_af, + op, + bfd=False): + ''' + Configure Vnet routes in the DUT. + duthost : AnsibleHost structure for the DUT. + dest_to_nh_map : The full map of the destination->Nexthops + dictionary. + dest_af : Address family of the destionation. + op : Operation to be done: SET or DEL. + bfd : Enable BFD or not (True/False). + ''' + config_list = [] + for vnet in dest_to_nh_map: + for dest in dest_to_nh_map[vnet]: + config_list.append(self.create_single_route( + vnet, + dest, + self.HOST_MASK[dest_af], + dest_to_nh_map[vnet][dest], + op, + bfd=bfd)) + + full_config = '[' + "\n,".join(config_list) + '\n]' + self.apply_config_in_swss(duthost, full_config, op+"_routes") + + def get_t2_ports(self, duthost, minigraph_data): + ''' + In T1 topology, any port connected to the T2 BGP neighbors are + needed. In T0, any port connected to the T1 BGP neighbors are + needed. + ''' + portchannels_to_t2 = self.get_portchannels_to_neighbors( + duthost, + "T2", + minigraph_data) + list_of_interfaces = [] + if portchannels_to_t2: + for pc_name in portchannels_to_t2: + list_of_interfaces.extend(portchannels_to_t2[pc_name]) + else: + list_of_interfaces = self.get_ethernet_to_neighbors( + "T2", minigraph_data) + + ret_list = [] + for iface in list_of_interfaces: + ret_list.append(minigraph_data["minigraph_ptf_indices"][iface]) + return ret_list + + @classmethod + def bgp_established(cls, duthost, down_list=None): + ''' + Verify if the BGP state is as per our requirements. + The BGP neighbors that are listed in the down_list must be down, + and the rest should be up. If this condition is met, return True, + else False. + + duthost : AnsibleHost structure of the DUT. + down_list : The BGP neighbors that are expected to be down. + ''' + bgp_facts = duthost.bgp_facts()['ansible_facts'] + if down_list is None: + down_list = [] + for addr, value in bgp_facts['bgp_neighbors'].items(): + if value['state'] == 'established': + if addr in down_list: + # The neighbor is supposed to be down, and is actually up. + Logger.info( + "Neighbor %s is established, but should be down.", + addr) + return False + else: + # The neighbor is supposed to be up, and is actually up. + continue + else: + if addr in down_list: + # The neighbor is supposed to be down, and is actually + # down. + continue + else: + # The neighbor is supposed to be up, but is actually down. + Logger.info( + "Neighbor %s is not yet established, has state: %s", + addr, + value['state']) + return False + + # Now wait for the routes to be updated. + time.sleep(30) + return True + + @classmethod + def get_downed_bgp_neighbors(cls, shut_intf_list, minigraph_data): + ''' + Get the list of bgp neighbors that should be down, + based on the interfaces that are shutdown. + ''' + ret_list = [] + for intf in shut_intf_list: + for m_intf in minigraph_data['minigraph_portchannel_interfaces'] +\ + minigraph_data['minigraph_interfaces']: + if m_intf['attachto'] == intf: + ret_list.append(m_intf['peer_addr']) + return ret_list + + @classmethod + def get_all_bgp_neighbors(cls, minigraph_facts, role): + ''' + Get the list of BGP neighbors from the minigraph_facts. + minigraph_facts : Minigraph data from the DUT. + role : The role of the BGP neighbor. T0 or T2. + ''' + all_neighbors = {} + for element in minigraph_facts['minigraph_bgp']: + if role in element['name']: + try: + all_neighbors[element['name']] + except KeyError: + all_neighbors[element['name']] = {} + if ip_address(element['addr']).version == 4: + all_neighbors[element['name']].update( + {"v4": element['addr']}) + elif ip_address(element['addr']).version == 6: + all_neighbors[element['name']].update( + {"v6": element['addr']}) + return all_neighbors + + def get_corresponding_ports(self, shut_intf_list, minigraph_data): + ''' + This is for tests that shutdown some of the T2 ports. + This function will check which ports are to be ignored for the encap + packets coming back to the PTF. If the encap packet comes in any of + these ports, it is a bug. + ''' + eth_ifaces_list = [] + for intf in shut_intf_list: + if "Ethernet" in intf: + eth_ifaces_list.append(intf) + elif "PortChannel" in intf: + for port in self.get_ethernet_ports([intf], minigraph_data): + eth_ifaces_list.append(port) + return_list = [minigraph_data["minigraph_ptf_indices"][iface] + for iface in eth_ifaces_list] + return return_list + + def get_ethernet_ports(self, intf_list, minigraph_data): + ''' + The given interface list can be either Ethernet or Portchannel. + This function will return a flat list of Ethernet ports + corresponding to the given intf_list itself, or members of + Portchannels. + ''' + ret_list = [] + for intf in intf_list: + if "Ethernet" in intf: + ret_list.append(intf) + elif "PortChannel" in intf: + ret_list.extend( + minigraph_data['minigraph_portchannels'][intf]['members']) + + return ret_list + + def gather_ptf_indices_t2_neighbor( + self, + minigraph_facts, + all_t2_neighbors, + t2_neighbor, + encap_type): + ''' + Get the list of PTF port indices for the given list of + t2_neighbors. In T1 topology, every DUT port is mapped to a port + in the PTF. This function calculates the list of PTF ports that are + mapped to the given list of t2_neighbors. + minigraph_facts : Minigraph data from the Duthost. + all_t2_neighbors : All T2 neighbors of the DUT. + t2_neighbor : The T2 neighbor for which we need the PTF ports. + encap_type : Encap type(v4_in_v4/v4_in_v6/v6_in_v4/v6_in_v6) + ''' + # All T2 Neighbors VM's name to Neighbor IP Mapping + all_pcs = minigraph_facts['minigraph_portchannel_interfaces'] + # Neighbor IP to Portchannel interfaces mapping + pc_to_ip_map = {} + for each_pc in all_pcs: + pc_to_ip_map[each_pc['peer_addr']] = each_pc['attachto'] + # Finding the portchannel under shutdown T2 Neighbor + outer_af = self.get_outer_layer_version(encap_type) + required_pc = \ + pc_to_ip_map[all_t2_neighbors[t2_neighbor][outer_af].lower()] + # Finding ethernet interfaces under that specific portchannel + required_ethernet_interfaces = \ + minigraph_facts['minigraph_portchannels'][required_pc]['members'] + # Finding interfaces with PTF indices + ret_list = [] + for iface in required_ethernet_interfaces: + ret_list.append(minigraph_facts["minigraph_ptf_indices"][iface]) + return ret_list + + @classmethod + def start_bfd_responder(cls, ptfhost, dut_mac, dut_loop_ips, monitor_file): + ''' + Configure the supervisor in the PTF with BFD responder and start + the BFD responder. + ptfhost : AnsibleHost structure of the PTF container. + t2_ports : The list of T2 ports(The BFD responder can take any + port actually). + dut_mac : Mac address of the DUT. + dut_loop_ips : IPv4 and IPv6 addresses of the Loopback interface + in the DUT. + monitor_file : The file to be monitored by the BFD responder. + ''' + ptfhost.copy(dest=monitor_file, content="\n\n\n") + + extra_vars = { + "bfd_responder_args": + 'dut_mac=u"{}";dut_loop_ips={};monitor_file="{}"'.format( + dut_mac, + str(dut_loop_ips).replace('\'', '"'), + monitor_file)} + try: + ptfhost.command('supervisorctl stop bfd_responder') + except BaseException: + pass + + ptfhost.host.options["variable_manager"].extra_vars.update(extra_vars) + script_args = \ + '''dut_mac=u"{}";dut_loop_ips={};monitor_file="{}"'''.format( + dut_mac, str(dut_loop_ips).replace('\'', '"'), monitor_file) + supervisor_conf_content = ''' +[program:bfd_responder] +command=ptf --test-dir /root/ptftests bfd_responder.BFD_Responder''' +\ + ' --platform-dir /root/ptftests -t' + \ + ''' '{}' --relax --platform remote +process_name=bfd_responder +stdout_logfile=/tmp/bfd_responder.out.log +stderr_logfile=/tmp/bfd_responder.err.log +redirect_stderr=false +autostart=false +autorestart=true +startsecs=1 +numprocs=1 +'''.format(script_args) + ptfhost.copy( + content=supervisor_conf_content, + dest='/etc/supervisor/conf.d/bfd_responder.conf') + + ptfhost.command('supervisorctl reread') + ptfhost.command('supervisorctl update') + ptfhost.command('supervisorctl start bfd_responder') + + @classmethod + def stop_bfd_responder(cls, ptfhost): + ''' + Stop the BFD responder, and clean it up from the supervisor. + ''' + try: + ptfhost.command('supervisorctl stop bfd_responder') + except BaseException: + pass + ptfhost.command('supervisorctl remove bfd_responder') + + @classmethod + def update_monitor_file(cls, + ptfhost, + monitor_file, + intf_list, + ip_address_list): + ''' + Update the BFD responder's list of IP addresses and interfaces to + respond to. The bfd_responder will keep reading this file every + second and update itself. + ptfhost : AnsibleHost structure of the PTF container. + monitor_file : The monitor file of the bfd_responder. + intf_list : The list of interface indices in the PTF to work + with. + ip_address_list : The list of IP addresses from the DUT to + respond to. + ''' + ptfhost.copy( + dest=monitor_file, + content="{}\n{}\n".format( + ",".join(map(str, intf_list)), + ",".join(ip_address_list))) + time.sleep(3) + + def create_and_apply_priority_config(self, + duthost, + vnet, + dest, + mask, + nhs, + primary, + op): + ''' + Create a single destinatoin->endpoint list mapping, and configure + it in the DUT. + duthost : AnsibleHost structure for the DUT. + vnet : Name of the Vnet. + dest : IP(v4/v6) address of the destination. + mask : Dest netmask length. + nhs : Nexthop list(v4/v6). + primary : list of primary endpoints. + op : Operation to be done : SET or DEL. + ''' + config = self.create_single_priority_route(vnet, dest, mask, nhs, primary, op) + str_config = '[\n' + config + '\n]' + self.apply_config_in_swss(duthost, str_config, op + "_vnet_route") + + @classmethod + def create_single_priority_route(cls, vnet, dest, mask, nhs, primary, op): + ''' + Create a single route entry for vnet, for the given dest, through + the endpoints:nhs, op:SET/DEL + ''' + config = '''{{ + "VNET_ROUTE_TUNNEL_TABLE:{}:{}/{}": {{ + "endpoint": "{}", + "endpoint_monitor": "{}", + "primary" : "{}", + "monitoring" : "custom", + "adv_prefix" : "{}/{}" + }}, + "OP": "{}" + }}'''.format(vnet, dest, mask, ",".join(nhs), ",".join(nhs), ",".join(primary), dest, mask, op) + return config + + def set_vnet_monitor_state(self, duthost, dest, mask, nh, state): + duthost.shell("sonic-db-cli STATE_DB HSET 'VNET_MONITOR_TABLE|{}|{}/{}' 'state' '{}'" + .format(nh, dest, mask, state))