Skip to content

Commit ea33ef3

Browse files
authored
[nvidia-bluefield] Add CLI for packet-drop and config-record (#4002)
* [NASA] Add CLI for packet-drop and config-record Signed-off-by: Vivek Reddy <[email protected]> * Handle Comments Signed-off-by: Vivek Reddy <[email protected]> * Make cli aware of current state Signed-off-by: Vivek Reddy <[email protected]> * Fix statuic check failures * Fix statuic check failures * Fix static checks * Fix static check * Handle comments --------- Signed-off-by: Vivek Reddy <[email protected]>
1 parent ffc891d commit ea33ef3

File tree

2 files changed

+598
-0
lines changed

2 files changed

+598
-0
lines changed

config/plugins/nvidia_bluefield.py

Lines changed: 334 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,334 @@
1+
#!/usr/bin/env python3
2+
#
3+
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
4+
# Apache-2.0
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
19+
#
20+
# main.py
21+
#
22+
# Specific command-line utility for Nvidia-bluefield platform
23+
#
24+
25+
try:
26+
import click
27+
import os
28+
import sys
29+
from pathlib import Path
30+
import datetime
31+
import syslog
32+
from sonic_py_common import device_info
33+
from utilities_common.auto_techsupport_helper import get_stats, pretty_size
34+
except ImportError as e:
35+
raise ImportError("%s - required module not found" % str(e))
36+
37+
SYNCD_CONTAINER_NAME = 'syncd'
38+
NASA_CLI = '/usr/sbin/cli/nasa_cli.py -u --exit_on_failure'
39+
NASA_CLI_CMD_F = '/tmp/nasa_cli_cmd.txt'
40+
CFG_REC_CMD_PREFIX = 'set_sai_debug_mode'
41+
PKT_REC_CMD_PREFIX = 'set_packet_debug_mode'
42+
43+
SAI_PROFILE_FILE = '/tmp/sai.profile'
44+
SAI_KEY_DUMP_STORE_PATH = 'SAI_DUMP_STORE_PATH'
45+
SAI_KEY_DUMP_STORE_COUNT = 'SAI_DUMP_STORE_AMOUNT'
46+
CFG_REC_DIR = "config-record"
47+
PKT_REC_DIR = "packet-drop"
48+
49+
50+
def run_in_syncd(cmd, docker_client):
51+
"""Run a command in the syncd container using Docker Python SDK.
52+
53+
Args:
54+
cmd (str): Command to run in the container
55+
docker_client (docker.client.DockerClient): Docker client
56+
57+
Returns:
58+
tuple: (return_code, stdout)
59+
"""
60+
try:
61+
container = docker_client.containers.get(SYNCD_CONTAINER_NAME)
62+
exit_code, output = container.exec_run(cmd)
63+
return exit_code, output.decode('utf-8')
64+
except Exception as e:
65+
click.echo(f"Error executing command in syncd container: {str(e)}", err=True)
66+
return 1, str(e)
67+
68+
69+
def run_nasa_cli(cmd, docker_client):
70+
"""Run a command in the syncd container using NASA CLI
71+
72+
Args:
73+
cmd (str): Command to run in the container
74+
Eg: set_packet_debug_mode on filepath /tmp/nasa_pkt_record.bin
75+
Eg: set_sai_debug_mode on filepath /tmp/nasa_cfg_record.bin
76+
docker_client (docker.client.DockerClient): Docker client
77+
78+
Returns:
79+
tuple: (return_code, stdout)
80+
"""
81+
# First create a temp file in the container
82+
83+
lines = [cmd + '\n', 'quit\n']
84+
command = f"sh -c 'echo -n \"{''.join(lines)}\" > {NASA_CLI_CMD_F}'"
85+
rc, stdout = run_in_syncd(command, docker_client)
86+
87+
if rc != 0:
88+
return rc, stdout
89+
90+
cmd = f"{NASA_CLI} -l {NASA_CLI_CMD_F}"
91+
return run_in_syncd(cmd, docker_client)
92+
93+
94+
def rotate_dump_files(path, max_count):
95+
"""Rotate dump files in the given directory
96+
97+
If the number of dump files in the directory is greater than the or equal to the count,
98+
the oldest file is deleted.
99+
100+
Args:
101+
path (str): Directory to rotate dump files. Should be accessible from the host
102+
max_count (int): Max number of dump files to keep
103+
docker_client (docker.client.DockerClient): Docker client
104+
105+
Returns:
106+
None
107+
"""
108+
fs_stats, num_bytes = get_stats(os.path.join(path, "*"))
109+
syslog.syslog(syslog.LOG_INFO, f"Logrotate: Current size of the directory {path} : {pretty_size(num_bytes)}")
110+
111+
# If number of files exceeds count, delete the oldest ones
112+
num_delete = len(fs_stats) - max_count
113+
while num_delete >= 0:
114+
stat = fs_stats.pop()
115+
os.remove(stat[2])
116+
num_delete -= 1
117+
syslog.syslog(syslog.LOG_INFO, f"Logrotate: Deleted {stat[2]}, size: {pretty_size(stat[1])}")
118+
119+
return
120+
121+
122+
def get_location_details(docker_client):
123+
"""Get the dump details from the sai.profile file
124+
125+
Args:
126+
docker_client (docker.client.DockerClient): Docker client
127+
128+
Returns:
129+
tuple: (path, count)
130+
"""
131+
path_root = get_sai_profile_value(SAI_KEY_DUMP_STORE_PATH, docker_client)
132+
count = get_sai_profile_value(SAI_KEY_DUMP_STORE_COUNT, docker_client)
133+
134+
if not Path(path_root).exists():
135+
click.echo(f"Directory {path_root} does not exist", err=True)
136+
return (None, None)
137+
138+
try:
139+
count = int(count)
140+
except ValueError as e:
141+
click.echo(f"Invalid count value: {count}, error: {e}", err=True)
142+
return (path_root, None)
143+
144+
return path_root, count
145+
146+
147+
def cleanup_dump_files(path_root, count, dir_name):
148+
"""Cleanup dump files in the given directory"""
149+
if path_root is None or count is None:
150+
return
151+
path = os.path.join(path_root, dir_name)
152+
Path(path).mkdir(parents=True, exist_ok=True)
153+
rotate_dump_files(path, count)
154+
155+
156+
def parse_nasa_output(output, mode_type):
157+
"""Parse NASA CLI output to determine status and filename"""
158+
if not output:
159+
return "disabled", None
160+
161+
lines = output.splitlines()
162+
filename = None
163+
164+
for line in lines:
165+
line = line.strip()
166+
167+
# Look for filename line (format: "filename: /path/to/file.bin")
168+
if line.startswith("filename:"):
169+
filename = line.split(":", 1)[1].strip()
170+
break
171+
172+
return "enabled" if filename else "disabled", filename
173+
174+
175+
def get_packet_debug_mode(docker_client):
176+
"""Get packet debug mode status"""
177+
try:
178+
rc, output = run_nasa_cli("get_packet_debug_mode", docker_client)
179+
if rc != 0:
180+
print(f"Error querying packet debug mode: \n {output}", file=sys.stderr)
181+
return "disabled", None
182+
183+
status, filename = parse_nasa_output(output, "packet")
184+
return status, filename
185+
186+
except Exception as e:
187+
print(f"Error: {e}", file=sys.stderr)
188+
return "disabled", None
189+
190+
191+
def get_sai_debug_mode(docker_client):
192+
"""Get SAI debug mode status"""
193+
try:
194+
rc, output = run_nasa_cli("get_sai_debug_mode", docker_client)
195+
if rc != 0:
196+
print(f"Error querying SAI debug mode: \n {output}", file=sys.stderr)
197+
return "disabled", None
198+
199+
status, filename = parse_nasa_output(output, "sai")
200+
return status, filename
201+
202+
except Exception as e:
203+
print(f"Error: {e}", file=sys.stderr)
204+
return "disabled", None
205+
206+
207+
def get_sai_profile_value(key, docker_client):
208+
"""Get value for a given key from /tmp/sai.profile file in syncd container.
209+
210+
Args:
211+
key (str): Key to look up in the profile file
212+
docker_client (docker.client.DockerClient): Docker client
213+
214+
Returns:
215+
str: Value for the given key, or None if not found
216+
"""
217+
cmd = f"cat {SAI_PROFILE_FILE}"
218+
rc, out = run_in_syncd(cmd, docker_client)
219+
220+
if rc == 0 and out:
221+
for line in out.splitlines():
222+
if line.startswith(f"{key}="):
223+
return line.split('=', 1)[1]
224+
return ""
225+
226+
227+
@click.group()
228+
def nvidia_bluefield():
229+
"""NVIDIA BlueField platform configuration tasks"""
230+
pass
231+
232+
233+
@nvidia_bluefield.group('sdk')
234+
def sdk():
235+
"""SDK related configuration"""
236+
pass
237+
238+
239+
@sdk.command('packet-drop')
240+
@click.argument('state', type=click.Choice(['enabled', 'disabled']))
241+
def packet_drop(state):
242+
"""Enable or disable packet drop recording"""
243+
import docker
244+
docker_client = docker.from_env()
245+
# check if the packet drop recording is already enabled
246+
status, filename = get_packet_debug_mode(docker_client)
247+
if status == 'enabled' and state == 'enabled':
248+
click.echo(f"Packet drop recording is already enabled on {filename}")
249+
sys.exit(0)
250+
elif status == 'disabled' and state == 'disabled':
251+
click.echo("Packet drop recording is already disabled")
252+
sys.exit(0)
253+
254+
if state == 'disabled':
255+
rc, _ = run_nasa_cli(PKT_REC_CMD_PREFIX, docker_client)
256+
if rc == 0:
257+
click.echo(f"Packet drop recording {state}.")
258+
return rc
259+
260+
path_root, count = get_location_details(docker_client)
261+
262+
if path_root is None or count is None:
263+
click.echo("Could not enable packet drop recording, dump directory not configured", err=True)
264+
sys.exit(-1)
265+
266+
cleanup_dump_files(path_root, count, PKT_REC_DIR)
267+
path = os.path.join(path_root, PKT_REC_DIR)
268+
269+
# create the bin file under the path path with timestamp
270+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
271+
bin_path = os.path.join(path, f"pkt_dump_record_{timestamp}.bin")
272+
os.mknod(bin_path)
273+
274+
cmd = f"{PKT_REC_CMD_PREFIX} on filepath {bin_path}"
275+
rc, stdout = run_nasa_cli(cmd, docker_client)
276+
if rc != 0:
277+
click.echo(f"Could not enable packet drop recording: {stdout}", err=True)
278+
else:
279+
syslog.syslog(syslog.LOG_NOTICE, f"Packet drop recording enabled on {bin_path}")
280+
click.echo(f"Packet drop recording {state} on {bin_path}.")
281+
282+
sys.exit(rc)
283+
284+
285+
@sdk.command('config-record')
286+
@click.argument('state', type=click.Choice(['enabled', 'disabled']))
287+
def config_record(state):
288+
"""Enable or disable configuration recording"""
289+
import docker
290+
docker_client = docker.from_env()
291+
# check if the packet drop recording is already enabled
292+
status, filename = get_sai_debug_mode(docker_client)
293+
if status == 'enabled' and state == 'enabled':
294+
click.echo(f"Packet drop recording is already enabled on {filename}")
295+
sys.exit(0)
296+
elif status == 'disabled' and state == 'disabled':
297+
click.echo("Packet drop recording is already disabled")
298+
sys.exit(0)
299+
300+
if state == 'disabled':
301+
rc, _ = run_nasa_cli(CFG_REC_CMD_PREFIX, docker_client)
302+
if rc == 0:
303+
click.echo(f"Config recording {state}.")
304+
return rc
305+
306+
path_root, count = get_location_details(docker_client)
307+
308+
if path_root is None or count is None:
309+
click.echo("Could not enable config recording, dump directory/count not configured", err=True)
310+
sys.exit(-1)
311+
312+
cleanup_dump_files(path_root, count, CFG_REC_DIR)
313+
path = os.path.join(path_root, CFG_REC_DIR)
314+
315+
# create the bin file under the path path with timestamp
316+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
317+
bin_path = os.path.join(path, f"cfg_record_{timestamp}.bin")
318+
os.mknod(bin_path)
319+
320+
cmd = f"{CFG_REC_CMD_PREFIX} on filepath {bin_path}"
321+
rc, stdout = run_nasa_cli(cmd, docker_client)
322+
if rc != 0:
323+
click.echo(f"Could not enable config recording: {stdout}", err=True)
324+
else:
325+
syslog.syslog(syslog.LOG_NOTICE, f"Config recording enabled on {bin_path}")
326+
click.echo(f"Config recording {state} on {bin_path}.")
327+
328+
sys.exit(rc)
329+
330+
331+
def register(cli):
332+
version_info = device_info.get_sonic_version_info()
333+
if (version_info and version_info.get('asic_type') == 'nvidia-bluefield'):
334+
cli.commands['platform'].add_command(nvidia_bluefield)

0 commit comments

Comments
 (0)