Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 41 additions & 31 deletions tests/dualtor/test_tunnel_memory_leak.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
1. On a dual ToR testbed, confirm that the tunnel packet handler service is running
1. On a dual ToR testbed, confirm that the tunnel packet handler service is running
in the SWSS container on active Tor (supervisorctl status tunnel_packet_handler)
2. Send a continuous stream of IPinIP packets similar to those sent from the standby
ToR to the active ToR
Expand All @@ -11,17 +11,17 @@
import time
import contextlib
from ptf import testutils
from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_upper_tor
from tests.common.dualtor.dual_tor_common import cable_type
from tests.common.dualtor.dual_tor_utils import upper_tor_host, lower_tor_host
from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_upper_tor # noqa: F401
from tests.common.dualtor.dual_tor_common import cable_type # noqa: F401
from tests.common.dualtor.dual_tor_utils import upper_tor_host, lower_tor_host # noqa: F401
from tests.common.dualtor.server_traffic_utils import ServerTrafficMonitor
from tests.common.helpers.assertions import pytest_assert
from tests.common.dualtor.dual_tor_utils import get_t1_ptf_ports
from tests.common.dualtor.dual_tor_utils import mux_cable_server_ip
from tests.common.dualtor.dual_tor_utils import build_packet_to_server
from tests.common.dualtor.dual_tor_utils import delete_neighbor
from tests.common.helpers.dut_utils import get_program_info
from tests.common.fixtures.ptfhost_utils import run_garp_service, run_icmp_responder # lgtm[py/unused-import]
from tests.common.fixtures.ptfhost_utils import run_garp_service, run_icmp_responder # noqa: F401
from tests.common.utilities import wait_until


Expand All @@ -35,12 +35,13 @@
# set threshold buffer to 0.02%
MEM_THRESHOLD_BUFFER = 0.02


def validate_neighbor_entry_exist(duthost, neighbor_addr):
"""Validate if neighbor entry exist on duthost

Args:
duthost (AnsibleHost): Device Under Test (DUT)
neighbor_addr (str): neighbor's ip address
neighbor_addr (str): neighbor's ip address

Returns:
bool: True if neighbor exists. Otherwise, return False.
Expand All @@ -51,6 +52,7 @@ def validate_neighbor_entry_exist(duthost, neighbor_addr):
return False
return True


def is_tunnel_packet_handler_running(duthost):
"""Check if tunnel_packet_handler is running on duthost

Expand All @@ -63,6 +65,7 @@ def is_tunnel_packet_handler_running(duthost):
status, _ = get_program_info(duthost, "swss", "tunnel_packet_handler")
return status == 'RUNNING'


def check_memory_leak(duthost):
"""Check if it has memory leak on duthost

Expand Down Expand Up @@ -94,20 +97,17 @@ def check_memory_leak(duthost):
if not swss_mem_percent:
# Save swss mem usage at the first time.
swss_mem_percent = mem_percent
logging.info("SWSS container original MEM USAGE:{} original percent: {}%"
.format(mem_usage, swss_mem_percent))
logging.info("SWSS container original MEM USAGE:{} original percent: {}%".format(mem_usage, swss_mem_percent))
return False
elif mem_percent > swss_mem_percent + MEM_THRESHOLD_BUFFER:
logging.error("SWSS container MEM percent is increased. current percent:{}%, original percent: {}%"
.format(mem_percent, swss_mem_percent))
.format(mem_percent, swss_mem_percent))
return True
return False

def test_tunnel_memory_leak(toggle_all_simulator_ports_to_upper_tor,
upper_tor_host, lower_tor_host, ptfhost,
ptfadapter, conn_graph_facts, tbinfo, vmhost,
run_arp_responder
):

def test_tunnel_memory_leak(toggle_all_simulator_ports_to_upper_tor, upper_tor_host, lower_tor_host, # noqa: F811
ptfhost, ptfadapter, conn_graph_facts, tbinfo, vmhost, run_arp_responder): # noqa: F811
"""
Test if there is memory leak for service tunnel_packet_handler.
Send ip packets from standby TOR T1 to Server, standby TOR will
Expand All @@ -126,7 +126,7 @@ def prepare_services(ptfhost):
Temporarily start arp and icmp service. Make sure to stop garp service,
otherwise, it will add neighbor entry back automatically.
It has to stop garp_service for triggering tunnel_packet_handler.
It has to start arp and icmp service for receiving packets at server side.
It has to start arp and icmp service for receiving packets at server side.
"""
ptfhost.shell("supervisorctl stop garp_service")
ptfhost.shell("supervisorctl start arp_responder")
Expand All @@ -135,12 +135,14 @@ def prepare_services(ptfhost):
ptfhost.shell("supervisorctl stop arp_responder")
ptfhost.shell("supervisorctl stop icmp_responder")

pytest_assert(is_tunnel_packet_handler_running(upper_tor_host),
"tunnel_packet_handler is not running in SWSS conainter.")
pytest_assert(is_tunnel_packet_handler_running(upper_tor_host),
"tunnel_packet_handler is not running in SWSS conainter.")

ptf_t1_intf = random.choice(get_t1_ptf_ports(lower_tor_host, tbinfo))

all_servers_ips = mux_cable_server_ip(upper_tor_host)
unexpected_count = 0
expected_count = 0

with prepare_services(ptfhost):
# Get the original memeory percent before test
Expand All @@ -152,25 +154,33 @@ def prepare_services(ptfhost):
pkt, exp_pkt = build_packet_to_server(lower_tor_host, ptfadapter, server_ipv4)

pytest_assert(wait_until(10, 1, 0, delete_neighbor, upper_tor_host, server_ipv4),
"server ip {} hasn't been deleted from neighbor table.".format(server_ipv4))
"server ip {} hasn't been deleted from neighbor table.".format(server_ipv4))

server_traffic_monitor = ServerTrafficMonitor(
upper_tor_host, ptfhost, vmhost, tbinfo, iface,
conn_graph_facts, exp_pkt, existing=True, is_mocked=False
)
with server_traffic_monitor:
testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), pkt, count=PACKET_COUNT)
logging.info("Sent {} packets from ptf t1 interface {} on standby TOR {}"
.format(PACKET_COUNT, ptf_t1_intf, lower_tor_host.hostname))
# Check memory usage for every operation, used for debugging if test failed
check_memory_leak(upper_tor_host)
pytest_assert(validate_neighbor_entry_exist(upper_tor_host, server_ipv4),
"The server ip {} doesn't exist in neighbor table on dut {}. \
tunnel_packet_handler isn't triggered.".format(server_ipv4, upper_tor_host.hostname))
pytest_assert(len(server_traffic_monitor.matched_packets) > 0,
"Didn't receive any expected packets for server {}.".format(server_ipv4))
try:
with server_traffic_monitor:
testutils.send(ptfadapter, int(ptf_t1_intf.strip("eth")), pkt, count=PACKET_COUNT)
logging.info("Sent {} packets from ptf t1 interface {} on standby TOR {}"
.format(PACKET_COUNT, ptf_t1_intf, lower_tor_host.hostname))
# Check memory usage for every operation, used for debugging if test failed
check_memory_leak(upper_tor_host)
pytest_assert(validate_neighbor_entry_exist(upper_tor_host, server_ipv4),
"The server ip {} doesn't exist in neighbor table on dut {}. \
tunnel_packet_handler isn't triggered.".format(server_ipv4, upper_tor_host.hostname))
except Exception as e:
logging.error("Capture exception {}, continue the process.".format(repr(e)))
if len(server_traffic_monitor.matched_packets) == 0:
logging.error("Didn't receive any expected packets for server {}.".format(server_ipv4))
unexpected_count += 1
else:
expected_count += 1
logging.info("The amount of expected scenarios: {}, the amount of unexpected scenarios: {}."
.format(expected_count, unexpected_count))
# sleep 10s to wait memory usage stable, check if there is memory leak
time.sleep(10)
check_result = check_memory_leak(upper_tor_host)
pytest_assert(check_result == False, "Test failed because there is memory leak on {}"
.format(upper_tor_host.hostname))
pytest_assert(check_result is False, "Test failed because there is memory leak on {}"
.format(upper_tor_host.hostname))