Skip to content

Commit 9b212a4

Browse files
w1ndaselldinesh
authored andcommitted
Leverage sairedis log and syslog to calculate route converge time in scale test (#20856)
What is the motivation for this PR? To get the route converge time of one port flap. To get collect results of all test cases. How did you do it? Leverage sairedis log and syslog to calculate that. Save all results into single dictionary and print it at last. How did you verify/test it? Run on testbed Signed-off-by: selldinesh <[email protected]>
1 parent 953b733 commit 9b212a4

File tree

1 file changed

+107
-16
lines changed

1 file changed

+107
-16
lines changed

tests/bgp/test_ipv6_bgp_scale.py

Lines changed: 107 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import base64
1111
import ipaddress
1212
import random
13+
import re
1314
import time
1415
from copy import deepcopy
1516
from threading import Thread, Event
@@ -40,13 +41,22 @@
4041
MAX_DOWNTIME_UNISOLATION = 300 # seconds
4142
MAX_DOWNTIME_NEXTHOP_GROUP_MEMBER_CHANGE = 30 # seconds
4243
PKTS_SENDING_TIME_SLOT = 1 # seconds
44+
MAX_CONVERGENCE_TIME = 5 # seconds
4345
MAX_CONVERGENCE_WAIT_TIME = 300 # seconds
4446
PACKETS_PER_TIME_SLOT = 500 // PKTS_SENDING_TIME_SLOT
4547
MASK_COUNTER_WAIT_TIME = 10 # wait some seconds for mask counters processing packets
4648
STATIC_ROUTES = ['0.0.0.0/0', '::/0']
4749
WITHDRAW_ROUTE_NUMBER = 1
4850
PACKET_QUEUE_LENGTH = 1000000
4951
global_icmp_type = 123
52+
test_results = {}
53+
current_test = ""
54+
55+
56+
@pytest.fixture(scope="module", autouse=True)
57+
def log_test_results():
58+
yield
59+
logger.info("test_results: %s", test_results)
5060

5161

5262
def setup_packet_mask_counters(ptf_dataplane, icmp_type):
@@ -309,6 +319,15 @@ def calculate_downtime(ptf_dp, end_time, start_time, masked_exp_pkt):
309319
pps,
310320
downtime
311321
)
322+
global current_test, test_results
323+
test_results[current_test] = f"traffic thread duration: {(end_time - start_time).total_seconds()} seconds, " + \
324+
f"rx_counters: {ptf_dp.mask_rx_cnt[masked_exp_pkt]}, " + \
325+
f"tx_counters: {ptf_dp.mask_tx_cnt[masked_exp_pkt]}, " + \
326+
f"Total packets received: {rx_total}, " + \
327+
f"Total packets sent: {tx_total}, " + \
328+
f"Missing packets: {missing_pkt_cnt}, " + \
329+
f"Estimated pps: {pps}, " + \
330+
f"downtime: {downtime}"
312331
return downtime
313332

314333

@@ -426,8 +445,67 @@ def compress_expected_routes(expected_routes):
426445
return b64_str
427446

428447

448+
def test_port_flap_with_syslog(
449+
request,
450+
duthost,
451+
bgp_peers_info,
452+
setup_routes_before_test
453+
):
454+
global current_test, test_results
455+
current_test = request.node.name
456+
TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
457+
LOG_STAMP = "ONLY_ANALYSIS_LOGS_AFTER_THIS_LINE_%s" % TIMESTAMP
458+
TMP_SYSLOG_FILEPATH = "/tmp/syslog_after_bgp_flapping_%s.log" % TIMESTAMP
459+
bgp_neighbors = [hostname for hostname in bgp_peers_info.keys()]
460+
flapping_neighbor = random.choice(bgp_neighbors)
461+
flapping_ports = [bgp_peers_info[flapping_neighbor][DUT_PORT]]
462+
logger.info("Flapping port: %s", flapping_ports)
463+
464+
startup_routes = get_all_bgp_ipv6_routes(duthost, True)
465+
nexthops_to_remove = [b[IPV6_KEY] for b in bgp_peers_info.values() if b[DUT_PORT] in flapping_ports]
466+
expected_routes = deepcopy(startup_routes)
467+
remove_routes_with_nexthops(startup_routes, nexthops_to_remove, expected_routes)
468+
compressed_expected_routes = compress_expected_routes(expected_routes)
469+
duthost.shell('sudo logger "%s"' % LOG_STAMP)
470+
try:
471+
result = check_bgp_routes_converged(
472+
duthost,
473+
compressed_expected_routes,
474+
flapping_ports,
475+
MAX_CONVERGENCE_WAIT_TIME,
476+
compressed=True,
477+
action='shutdown'
478+
)
479+
480+
if not result.get("converged"):
481+
pytest.fail("BGP routes are not stable in long time")
482+
483+
duthost.shell('sudo logger -f /var/log/swss/sairedis.rec')
484+
duthost.shell('sudo awk "/%s/ {found=1; next} found" %s > %s'
485+
% (LOG_STAMP, '/var/log/syslog', TMP_SYSLOG_FILEPATH))
486+
487+
last_group_update = duthost.shell('sudo cat %s | grep "|C|SAI_OBJECT_TYPE_NEXT_HOP_GROUP_MEMBER||" | tail -n 1'
488+
% TMP_SYSLOG_FILEPATH)['stdout']
489+
last_group_update_time_str = re.search(r'\d{4}-\d{2}-\d{2}\.\d{2}:\d{2}:\d{2}\.\d+', last_group_update).group(0)
490+
last_group_update_time = datetime.datetime.strptime(last_group_update_time_str, "%Y-%m-%d.%H:%M:%S.%f")
491+
492+
port_shut_log = duthost.shell('sudo cat %s | grep "Configure %s admin status to down" | tail -n 1'
493+
% (TMP_SYSLOG_FILEPATH, flapping_ports[0]))['stdout']
494+
port_shut_time_str = " ".join(port_shut_log.split()[:4])
495+
port_shut_time = datetime.datetime.strptime(port_shut_time_str, "%Y %b %d %H:%M:%S.%f")
496+
497+
time_gap = (last_group_update_time - port_shut_time).total_seconds()
498+
if time_gap > MAX_CONVERGENCE_TIME:
499+
pytest.fail("Time is too long, from port shut to last group update is %s seconds" % time_gap)
500+
logger.info("Time difference between port shut and last nexthop group update is %s seconds", time_gap)
501+
test_results[current_test] = "Time between port shut and last nexthop group update is %s seconds" % time_gap
502+
finally:
503+
duthost.no_shutdown_multiple(flapping_ports)
504+
505+
429506
@pytest.mark.parametrize("flapping_port_count", [1, 10, 20])
430507
def test_sessions_flapping(
508+
request,
431509
duthost,
432510
ptfadapter,
433511
bgp_peers_info,
@@ -446,6 +524,8 @@ def test_sessions_flapping(
446524
Expected result:
447525
Dataplane downtime is less than MAX_DOWNTIME_ONE_PORT_FLAPPING.
448526
'''
527+
global current_test
528+
current_test = request.node.name + f"_flapping_port_count_{flapping_port_count}"
449529
global global_icmp_type
450530
global_icmp_type += 1
451531
pdp = ptfadapter.dataplane
@@ -532,6 +612,8 @@ def test_nexthop_group_member_scale(
532612
Expected result:
533613
Dataplane downtime is less than MAX_DOWNTIME_NEXTHOP_GROUP_MEMBER_CHANGE.
534614
'''
615+
global current_test
616+
current_test = request.node.name + "_withdraw"
535617
servers_dut_interfaces = setup_routes_before_test
536618
topo_name = tbinfo['topo']['name']
537619
global global_icmp_type
@@ -593,23 +675,29 @@ def test_nexthop_group_member_scale(
593675
ptf_ip = ptfhost.mgmt_ip
594676
change_routes_on_peers(localhost, ptf_ip, topo_name, peers_routes_to_change, ACTION_WITHDRAW,
595677
servers_dut_interfaces.get(ptf_ip, ''))
596-
compressed_expected_routes = compress_expected_routes(expected_routes)
597-
result = check_bgp_routes_converged(
598-
duthost,
599-
compressed_expected_routes,
600-
[],
601-
MAX_CONVERGENCE_WAIT_TIME,
602-
compressed=True,
603-
action='no_action'
604-
)
605-
terminated.set()
606-
traffic_thread.join()
607-
end_time = datetime.datetime.now()
608-
validate_rx_tx_counters(pdp, end_time, start_time, exp_mask, MAX_DOWNTIME_NEXTHOP_GROUP_MEMBER_CHANGE)
609-
if not result.get("converged"):
610-
pytest.fail("BGP routes are not stable in long time")
611-
678+
try:
679+
compressed_expected_routes = compress_expected_routes(expected_routes)
680+
result = check_bgp_routes_converged(
681+
duthost,
682+
compressed_expected_routes,
683+
[],
684+
MAX_CONVERGENCE_WAIT_TIME,
685+
compressed=True,
686+
action='no_action'
687+
)
688+
terminated.set()
689+
traffic_thread.join()
690+
end_time = datetime.datetime.now()
691+
validate_rx_tx_counters(pdp, end_time, start_time, exp_mask, MAX_DOWNTIME_NEXTHOP_GROUP_MEMBER_CHANGE)
692+
if not result.get("converged"):
693+
pytest.fail("BGP routes are not stable in long time")
694+
finally:
695+
for ptfhost in ptfhosts:
696+
ptf_ip = ptfhost.mgmt_ip
697+
change_routes_on_peers(localhost, ptf_ip, topo_name, peers_routes_to_change, ACTION_ANNOUNCE,
698+
servers_dut_interfaces.get(ptf_ip, ''))
612699
# ------------announce routes and test ------------ #
700+
current_test = request.node.name + "_announce"
613701
global_icmp_type += 1
614702
exp_mask = setup_packet_mask_counters(pdp, global_icmp_type)
615703
pkts = generate_packets(
@@ -657,6 +745,7 @@ def test_nexthop_group_member_scale(
657745

658746

659747
def test_device_unisolation(
748+
request,
660749
duthost,
661750
ptfadapter,
662751
bgp_peers_info,
@@ -676,6 +765,8 @@ def test_device_unisolation(
676765
Expected result:
677766
Dataplane downtime is less than MAX_DOWNTIME_UNISOLATION.
678767
'''
768+
global current_test
769+
current_test = request.node.name
679770
global global_icmp_type
680771
global_icmp_type += 1
681772
pdp = ptfadapter.dataplane

0 commit comments

Comments
 (0)