From 65207f46d05620362200b445252262ae9a16ca70 Mon Sep 17 00:00:00 2001 From: vkjammala-arista <152394203+vkjammala-arista@users.noreply.github.com> Date: Thu, 26 Mar 2026 08:05:04 +0530 Subject: [PATCH] [sonic-mgmt] Fix sflow/test_sflow.py failures with expected sflow packets not received on collector interface (#22186) * [sonic-mgmt] Fix sflow/test_sflow.py failures with expected sflow packets not received on collector interface Issue #1: In some cases (like sflow config enabled for first time, device reboot), hsflowd daemon is taking little over 3 mins to be fully initialized and process collector config. During this window, hsflowd service won't send sflow packets ('CounterSample', 'FlowSample' etc) to collector interface and thus test can fail with i) "Packets are not received in active collector, collector\d+" and ii) "Expected Number of samples are not collected from Interface Ethernet\d+ in collector collector\d+ , Received \d+" hsflowd service is writing to "/etc/hsflowd.auto" once it's processed collector configuration. Thus waiting for collector info to be present in "/etc/hsflowd.auto" seems to be safe option before proceeding with sflow traffic verfication. Issue #2: If the test expects flow samples/packets on the collector interface but they aren't seen for some reason, then we are hitting "KeyError: 'flow_port_count'". Due to counter samples seen on collector interface, "data['total_samples']" will not be zero but "data['total_flow_count']" will be 0 and lead to KeyError when tried to access "data['flow_port_count']". Fix is to have assert on "total_flow_count" and "total_counter_count" before calling corresponding sample analyze functions. Signed-off-by: Vinod * Addressing review comments 1) Enhanced "wait_until_hsflowd_ready" to make it wait for all the collector IPs (instead of calling it sequentially for each IP) 2) Add docstring for "wait_until_hsflowd_ready" function 3) Updated "ast.literal_eval" usage to handle the case where "active_collectors" is passed as empty string ("" instead of "[]") Signed-off-by: Vinod * Fix pre-commit check failures Signed-off-by: Vinod * Revert PR#21674 partially to enable "sflow/test_sflow.py" test Signed-off-by: Vinod --------- Signed-off-by: Vinod Signed-off-by: mssonicbld --- .../test/files/ptftests/py3/sflow_test.py | 8 +-- tests/sflow/test_sflow.py | 66 ++++++++++++++++++- 2 files changed, 69 insertions(+), 5 deletions(-) diff --git a/ansible/roles/test/files/ptftests/py3/sflow_test.py b/ansible/roles/test/files/ptftests/py3/sflow_test.py index 4abfeb57819..08a71ffb740 100644 --- a/ansible/roles/test/files/ptftests/py3/sflow_test.py +++ b/ansible/roles/test/files/ptftests/py3/sflow_test.py @@ -184,15 +184,15 @@ def packet_analyzer(self, port_sample, collector, poll_test): % (data['total_counter_count'], collector)) else: logging.info("..Analyzing polling test counter packets") - self.assertTrue(data['total_samples'] != 0, - "....Packets are not received in active collector ,%s" % collector) + self.assertTrue(data['total_counter_count'] != 0, + "....Counter packets are not received in active collector ,%s" % collector) self.analyze_counter_sample( data, collector, self.polling_int, port_sample) else: logging.info( "Analyzing flow samples in collector %s" % collector) - self.assertTrue(data['total_samples'] != 0, - "....Packets are not received in active collector ,%s" % collector) + self.assertTrue(data['total_flow_count'] != 0, + "....Flow packets are not received in active collector ,%s" % collector) self.analyze_flow_sample(data, collector) return data diff --git a/tests/sflow/test_sflow.py b/tests/sflow/test_sflow.py index 24f64ff4c01..4877346c217 100644 --- a/tests/sflow/test_sflow.py +++ b/tests/sflow/test_sflow.py @@ -5,6 +5,7 @@ --enable_sflow_feature: Enable sFlow feature on DUT. Default is disabled """ +import ast import pytest import logging import time @@ -220,6 +221,58 @@ def config_sflow_interfaces(duthost, intf, **kwargs): # ---------------------------------------------------------------------------------- +def verify_hsflowd_ready(duthost, collector_ips): + """ + Verify hsflowd has fully initialized with all specified collector configurations. + This is done by checking if /etc/hsflowd.auto contains an entry for each collector IP. + + Args: + duthost: DUT host object + collector_ips: List of collector IP addresses to check for + + Returns: + True if hsflowd.auto contains entries for all collector IPs, False otherwise + """ + return all( + duthost.shell( + f"docker exec sflow grep -q 'collector={ip}' /etc/hsflowd.auto 2>/dev/null", + module_ignore_errors=True + )['rc'] == 0 + for ip in collector_ips + ) + + +def wait_until_hsflowd_ready(duthost, collector_ips): + """ + Wait until hsflowd has fully initialized with all specified collector configurations. + + Retries every 10 seconds for up to 240 seconds (4 minutes). This timeout accounts for + cases where hsflowd takes over 3 minutes to initialize (e.g., first-time sflow config + enable or device reboot). + + Args: + duthost: DUT host object + collector_ips: List of collector IP addresses that must all be present in hsflowd.auto + + Raises: + AssertionError: If not all collectors are initialized within 240 seconds + """ + logger.info(f"Waiting for hsflowd to initialize with collector(s): {collector_ips}") + start_time = time.time() + pytest_assert( + wait_until( + 240, 10, 0, # 4 minutes max, check every 10 seconds + verify_hsflowd_ready, + duthost, + collector_ips, + ), + f"hsflowd failed to initialize collector(s) {collector_ips} within 240 seconds. " + f"Check /etc/hsflowd.auto in sflow container." + ) + elapsed = time.time() - start_time + logger.info(f"hsflowd initialized with all collector(s) after {elapsed:.1f} seconds") + + def config_sflow_collector(duthost, collector, config): collector = var[collector] if config == 'add': @@ -273,7 +326,9 @@ def verify_sflow_interfaces(duthost, intf, status, sampling_rate): @pytest.fixture -def partial_ptf_runner(request, ptfhost, tbinfo): +def partial_ptf_runner(request, duthosts, rand_one_dut_hostname, ptfhost, tbinfo): + duthost = duthosts[rand_one_dut_hostname] + def _partial_ptf_runner(**kwargs): logger.info(f'The enabled sflow interface is: {kwargs}') params = {'testbed_type': tbinfo['topo']['name'], @@ -282,6 +337,15 @@ def _partial_ptf_runner(**kwargs): 'agent_id': var['lo_ip'], 'sflow_ports_file': "/tmp/sflow_ports.json"} params.update(kwargs) + + # Make sure hsflowd daemon has processed collector config before + # proceeding with traffic verification. + collectors = kwargs.get('active_collectors', '[]') + collector_list = ast.literal_eval(collectors or '[]') if isinstance(collectors, str) else collectors + collector_ips = [var[collector]['ip_addr'] for collector in collector_list] + if collector_ips: + wait_until_hsflowd_ready(duthost, collector_ips) + ptf_runner(host=ptfhost, testdir="ptftests", platform_dir="ptftests",