Skip to content

Commit 37981c7

Browse files
[sonic-mgmt] Fix sflow/test_sflow.py failures with expected sflow packets not received on collector interface (sonic-net#22186)
* [sonic-mgmt] Fix sflow/test_sflow.py failures with expected sflow packets not received on collector interface Issue #1: In some cases (like sflow config enabled for first time, device reboot), hsflowd daemon is taking little over 3 mins to be fully initialized and process collector config. During this window, hsflowd service won't send sflow packets ('CounterSample', 'FlowSample' etc) to collector interface and thus test can fail with i) "Packets are not received in active collector, collector\d+" and ii) "Expected Number of samples are not collected from Interface Ethernet\d+ in collector collector\d+ , Received \d+" hsflowd service is writing to "/etc/hsflowd.auto" once it's processed collector configuration. Thus waiting for collector info to be present in "/etc/hsflowd.auto" seems to be safe option before proceeding with sflow traffic verfication. Issue sonic-net#2: If the test expects flow samples/packets on the collector interface but they aren't seen for some reason, then we are hitting "KeyError: 'flow_port_count'". Due to counter samples seen on collector interface, "data['total_samples']" will not be zero but "data['total_flow_count']" will be 0 and lead to KeyError when tried to access "data['flow_port_count']". Fix is to have assert on "total_flow_count" and "total_counter_count" before calling corresponding sample analyze functions. Signed-off-by: Vinod <[email protected]> * Addressing review comments 1) Enhanced "wait_until_hsflowd_ready" to make it wait for all the collector IPs (instead of calling it sequentially for each IP) 2) Add docstring for "wait_until_hsflowd_ready" function 3) Updated "ast.literal_eval" usage to handle the case where "active_collectors" is passed as empty string ("" instead of "[]") Signed-off-by: Vinod <[email protected]> * Fix pre-commit check failures Signed-off-by: Vinod <[email protected]> * Revert PR#21674 partially to enable "sflow/test_sflow.py" test Signed-off-by: Vinod <[email protected]> --------- Signed-off-by: Vinod <[email protected]>
1 parent 31b53be commit 37981c7

3 files changed

Lines changed: 70 additions & 11 deletions

File tree

ansible/roles/test/files/ptftests/py3/sflow_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,15 +190,15 @@ def packet_analyzer(self, port_sample, collector, poll_test):
190190
% (data['total_counter_count'], collector))
191191
else:
192192
logging.info("..Analyzing polling test counter packets")
193-
self.assertTrue(data['total_samples'] != 0,
194-
"....Packets are not received in active collector ,%s" % collector)
193+
self.assertTrue(data['total_counter_count'] != 0,
194+
"....Counter packets are not received in active collector ,%s" % collector)
195195
self.analyze_counter_sample(
196196
data, collector, self.polling_int, port_sample)
197197
else:
198198
logging.info(
199199
"Analyzing flow samples in collector %s" % collector)
200-
self.assertTrue(data['total_samples'] != 0,
201-
"....Packets are not received in active collector ,%s" % collector)
200+
self.assertTrue(data['total_flow_count'] != 0,
201+
"....Flow packets are not received in active collector ,%s" % collector)
202202
self.analyze_flow_sample(data, collector)
203203
return data
204204

tests/common/plugins/conditional_mark/tests_mark_conditions.yaml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4523,12 +4523,6 @@ scripts:
45234523
#######################################
45244524
##### sflow #####
45254525
#######################################
4526-
sflow/test_sflow.py:
4527-
skip:
4528-
reason: "The testcase is skipped due to github issue #21701"
4529-
conditions:
4530-
- "https://github.com/sonic-net/sonic-mgmt/issues/21701"
4531-
45324526
sflow/test_sflow.py::TestReboot::testFastreboot:
45334527
skip:
45344528
reason: "Dualtor topology doesn't support advanced-reboot"

tests/sflow/test_sflow.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55
--enable_sflow_feature: Enable sFlow feature on DUT. Default is disabled
66
"""
77

8+
import ast
89
import pytest
910
import logging
11+
import time
1012
import json
1113
import re
1214

@@ -229,6 +231,58 @@ def config_sflow_interfaces(duthost, intf, **kwargs):
229231
# ----------------------------------------------------------------------------------
230232

231233

234+
def verify_hsflowd_ready(duthost, collector_ips):
235+
"""
236+
Verify hsflowd has fully initialized with all specified collector configurations.
237+
This is done by checking if /etc/hsflowd.auto contains an entry for each collector IP.
238+
239+
Args:
240+
duthost: DUT host object
241+
collector_ips: List of collector IP addresses to check for
242+
243+
Returns:
244+
True if hsflowd.auto contains entries for all collector IPs, False otherwise
245+
"""
246+
return all(
247+
duthost.shell(
248+
f"docker exec sflow grep -q 'collector={ip}' /etc/hsflowd.auto 2>/dev/null",
249+
module_ignore_errors=True
250+
)['rc'] == 0
251+
for ip in collector_ips
252+
)
253+
254+
255+
def wait_until_hsflowd_ready(duthost, collector_ips):
256+
"""
257+
Wait until hsflowd has fully initialized with all specified collector configurations.
258+
259+
Retries every 10 seconds for up to 240 seconds (4 minutes). This timeout accounts for
260+
cases where hsflowd takes over 3 minutes to initialize (e.g., first-time sflow config
261+
enable or device reboot).
262+
263+
Args:
264+
duthost: DUT host object
265+
collector_ips: List of collector IP addresses that must all be present in hsflowd.auto
266+
267+
Raises:
268+
AssertionError: If not all collectors are initialized within 240 seconds
269+
"""
270+
logger.info(f"Waiting for hsflowd to initialize with collector(s): {collector_ips}")
271+
start_time = time.time()
272+
pytest_assert(
273+
wait_until(
274+
240, 10, 0, # 4 minutes max, check every 10 seconds
275+
verify_hsflowd_ready,
276+
duthost,
277+
collector_ips,
278+
),
279+
f"hsflowd failed to initialize collector(s) {collector_ips} within 240 seconds. "
280+
f"Check /etc/hsflowd.auto in sflow container."
281+
)
282+
elapsed = time.time() - start_time
283+
logger.info(f"hsflowd initialized with all collector(s) after {elapsed:.1f} seconds")
284+
285+
232286
def config_sflow_collector(duthost, collector, config):
233287
collector = var[collector]
234288
if config == 'add':
@@ -282,7 +336,9 @@ def verify_sflow_interfaces(duthost, intf, status, sampling_rate):
282336

283337

284338
@pytest.fixture
285-
def partial_ptf_runner(request, ptfhost, tbinfo):
339+
def partial_ptf_runner(request, duthosts, rand_one_dut_hostname, ptfhost, tbinfo):
340+
duthost = duthosts[rand_one_dut_hostname]
341+
286342
def _partial_ptf_runner(**kwargs):
287343
logger.info(f'The enabled sflow interface is: {kwargs}')
288344
params = {'testbed_type': tbinfo['topo']['name'],
@@ -291,6 +347,15 @@ def _partial_ptf_runner(**kwargs):
291347
'agent_id': var['lo_ip'],
292348
'sflow_ports_file': "/tmp/sflow_ports.json"}
293349
params.update(kwargs)
350+
351+
# Make sure hsflowd daemon has processed collector config before
352+
# proceeding with traffic verification.
353+
collectors = kwargs.get('active_collectors', '[]')
354+
collector_list = ast.literal_eval(collectors or '[]') if isinstance(collectors, str) else collectors
355+
collector_ips = [var[collector]['ip_addr'] for collector in collector_list]
356+
if collector_ips:
357+
wait_until_hsflowd_ready(duthost, collector_ips)
358+
294359
ptf_runner(host=ptfhost,
295360
testdir="ptftests",
296361
platform_dir="ptftests",

0 commit comments

Comments
 (0)