Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 139 additions & 0 deletions ansible/roles/vm_set/library/ptf_control.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#!/usr/bin/python

import json
import logging
import traceback

import docker

from ansible.module_utils.debug_utils import config_module_logging
from ansible.module_utils.basic import AnsibleModule

DOCUMENTATION = '''
---
module: ptf_control
version_added: "0.1"
author: Xin Wang (xiwang5@microsoft.com)
short_description: Control PTF container
description: For controlling PTF container, for example killing processes running in PTF container before stopping it.

Parameters:
- ctn_name: Name of the PTF container
- command: Command to run, currently only support "kill"

'''

EXAMPLES = '''
- name: Kill exabgp and ptf_nn_agent processes in PTF container
ptf_control:
ctn_name: "ptf_vms6-1"
command: kill
'''


class PtfControl(object):
"""This class is for controlling PTF container
"""

def __init__(self, module, ctn_name):
self.module = module
self.ctn_name = ctn_name

self.pid = PtfControl.get_pid(self.ctn_name)

def cmd(self, cmdline, use_unsafe_shell=False, ignore_failure=False, verbose=True):
rc, out, err = self.module.run_command(cmdline, use_unsafe_shell=use_unsafe_shell)
if verbose:
msg = {
'cmd': cmdline,
'rc': rc,
'stdout_lines': out.splitlines(),
'stderr_lines': err.splitlines()
}
logging.debug('***** RUN CMD:\n%s' % json.dumps(msg, indent=2))

if rc != 0 and not ignore_failure:
raise Exception("Failed to run command: %s, rc=%d, out=%s, err=%s" % (cmdline, rc, out, err))
return rc, out, err

@staticmethod
def get_pid(ctn_name):
cli = docker.from_env()
try:
ctn = cli.containers.get(ctn_name)
except Exception:
return None

return ctn.attrs['State']['Pid']

def get_process_pids(self, process):
cmd = 'docker exec -t {} bash -c "pgrep -f \'{}\'"'.format(self.ctn_name, process)
_, out, _ = self.cmd(cmd, ignore_failure=True)
return [int(pid.strip()) for pid in out.splitlines()]

def get_supervisord_processes(self):
_, out, _ = self.cmd(
'docker exec -t {} bash -c "supervisorctl status"'.format(self.ctn_name), ignore_failure=True
)
processes = [line.strip().split()[0] for line in out.splitlines() if "sshd" not in line]
return processes

def kill_process(self, pid):
self.cmd('docker exec -t {} bash -c "kill -9 {}"'.format(self.ctn_name, pid), ignore_failure=True)

def kill_processes(self):
supervisord_processes = self.get_supervisord_processes()
self.cmd('docker exec -t {} bash -c "ps -ef"'.format(self.ctn_name))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this command used for? (I have same confusion at line 108)

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is for debugging purpose. By default, this module can generate logs like /tmp/ptf_control_xxx.log.
With this command, we can clearly see the processes running in PTF docker before and after the killing. Next time if server crash happens again, we may be able to get some clue from the log.

Recently we noticed some tests may create exabgp processes like "exabgp-psudoswitch1" in PTF container. So, a more aggressive way is required to kill the processes. Also, it would be better to collect more information for debugging if the issue happens again in the future.

for i in range(3):
logging.info("=== Attempt %d ===" % (i + 1))
logging.info("=== Use supervisorctl to stop processes ===")
for process in supervisord_processes:
self.cmd(
'docker exec -t {} bash -c "supervisorctl stop {}"'.format(self.ctn_name, process),
ignore_failure=True
)
self.cmd(
'docker exec -t {} bash -c "ps -ef"'.format(self.ctn_name)
)

for pattern in [
"/usr/share/exabgp/http_api.py",
"/usr/local/bin/exabgp",
"ptf_nn_agent.py"
]:
logging.info("=== Kill process %s ===" % pattern)
for pid in self.get_process_pids(pattern):
self.kill_process(pid)

self.cmd('docker exec -t {} bash -c "ps -ef"'.format(self.ctn_name))


def main():
module = AnsibleModule(
argument_spec=dict(
ctn_name=dict(required=True, type='str'),
command=dict(required=True, type='str')
),
supports_check_mode=False)

ctn_name = module.params['ctn_name']
command = module.params['command']
if command not in ['kill']:
module.fail_json(msg="command %s is not supported" % command)

config_module_logging('ptf_control_' + ctn_name)

try:
ptf = PtfControl(module, ctn_name)
if command == "kill":
if ptf.pid is not None:
ptf.kill_processes()
except Exception as error:
logging.error(traceback.format_exc())
module.fail_json(msg=str(error))

module.exit_json(changed=True)


if __name__ == "__main__":
main()
81 changes: 61 additions & 20 deletions ansible/roles/vm_set/library/vm_topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,10 +512,8 @@ def add_br_if_to_docker(self, bridge, ext_if, int_if):
VMTopology.iface_up(ext_if)

if VMTopology.intf_exists(tmp_int_if) and VMTopology.intf_not_exists(tmp_int_if, pid=self.pid):
VMTopology.cmd("ip link set netns %s dev %s" %
(self.pid, tmp_int_if))
VMTopology.cmd("nsenter -t %s -n ip link set dev %s name %s" %
(self.pid, tmp_int_if, int_if))
VMTopology.cmd("ip link set dev %s netns %s " % (tmp_int_if, self.pid))
VMTopology.cmd("nsenter -t %s -n ip link set dev %s name %s" % (self.pid, tmp_int_if, int_if))

VMTopology.iface_up(int_if, pid=self.pid)

Expand All @@ -537,10 +535,8 @@ def add_br_if_to_netns(self, bridge, ext_if, int_if):
VMTopology.iface_up(ext_if)

if VMTopology.intf_exists(tmp_int_if) and VMTopology.intf_not_exists(tmp_int_if, netns=self.netns):
VMTopology.cmd("ip link set netns %s dev %s" %
(self.netns, tmp_int_if))
VMTopology.cmd("ip netns exec %s ip link set dev %s name %s" % (
self.netns, tmp_int_if, int_if))
VMTopology.cmd("ip link set dev %s netns %s" % (tmp_int_if, self.netns))
VMTopology.cmd("ip netns exec %s ip link set dev %s name %s" % (self.netns, tmp_int_if, int_if))

VMTopology.iface_up(int_if, netns=self.netns)

Expand Down Expand Up @@ -606,12 +602,10 @@ def add_dut_if_to_docker(self, iface_name, dut_iface):
if VMTopology.intf_exists(dut_iface) \
and VMTopology.intf_not_exists(dut_iface, pid=self.pid) \
and VMTopology.intf_not_exists(iface_name, pid=self.pid):
VMTopology.cmd("ip link set netns %s dev %s" %
(self.pid, dut_iface))
VMTopology.cmd("ip link set dev %s netns %s" % (dut_iface, self.pid))

if VMTopology.intf_exists(dut_iface, pid=self.pid) and VMTopology.intf_not_exists(iface_name, pid=self.pid):
VMTopology.cmd("nsenter -t %s -n ip link set dev %s name %s" %
(self.pid, dut_iface, iface_name))
VMTopology.cmd("nsenter -t %s -n ip link set dev %s name %s" % (self.pid, dut_iface, iface_name))

VMTopology.iface_up(iface_name, pid=self.pid)

Expand All @@ -626,7 +620,7 @@ def add_dut_vlan_subif_to_docker(self, iface_name, vlan_separator, vlan_id):
(self.pid, vlan_sub_iface_name))

def remove_dut_if_from_docker(self, iface_name, dut_iface):

logging.info("=== Restore docker interface %s as dut interface %s ===" % (iface_name, dut_iface))
if self.pid is None:
return

Expand All @@ -639,7 +633,7 @@ def remove_dut_if_from_docker(self, iface_name, dut_iface):

if VMTopology.intf_not_exists(dut_iface) and VMTopology.intf_exists(dut_iface, pid=self.pid):
VMTopology.cmd(
"nsenter -t %s -n ip link set netns 1 dev %s" % (self.pid, dut_iface))
"nsenter -t %s -n ip link set dev %s netns 1" % (self.pid, dut_iface))

def remove_dut_vlan_subif_from_docker(self, iface_name, vlan_separator, vlan_id):
"""Remove the vlan sub interface created for the ptf interface."""
Expand All @@ -648,6 +642,7 @@ def remove_dut_vlan_subif_from_docker(self, iface_name, vlan_separator, vlan_id)

vlan_sub_iface_name = iface_name + vlan_separator + vlan_id
if VMTopology.intf_exists(vlan_sub_iface_name, pid=self.pid):
VMTopology.iface_down(vlan_sub_iface_name, pid=self.pid)
VMTopology.cmd("nsenter -t %s -n ip link del %s" %
(self.pid, vlan_sub_iface_name))

Expand Down Expand Up @@ -708,14 +703,14 @@ def add_veth_if_to_docker(self, ext_if, int_if, create_vlan_subintf=False, **kwa
if VMTopology.intf_exists(t_int_if) \
and VMTopology.intf_not_exists(t_int_if, pid=self.pid) \
and VMTopology.intf_not_exists(int_if, pid=self.pid):
VMTopology.cmd("ip link set netns %s dev %s" %
(self.pid, t_int_if))
VMTopology.cmd("ip link set dev %s netns %s" %
(t_int_if, self.pid))
if create_vlan_subintf \
and VMTopology.intf_exists(t_int_sub_if) \
and VMTopology.intf_not_exists(t_int_sub_if, pid=self.pid) \
and VMTopology.intf_not_exists(int_sub_if, pid=self.pid):
VMTopology.cmd("ip link set netns %s dev %s" %
(self.pid, t_int_sub_if))
VMTopology.cmd("ip link set dev %s netns %s" %
(t_int_sub_if, self.pid))

if VMTopology.intf_exists(t_int_if, pid=self.pid) and VMTopology.intf_not_exists(int_if, pid=self.pid):
VMTopology.cmd("nsenter -t %s -n ip link set dev %s name %s" %
Expand Down Expand Up @@ -761,8 +756,8 @@ def add_veth_if_to_netns(self, ext_if, int_if):
if VMTopology.intf_exists(t_int_if) \
and VMTopology.intf_not_exists(t_int_if, netns=self.netns) \
and VMTopology.intf_not_exists(int_if, netns=self.netns):
VMTopology.cmd("ip link set netns %s dev %s" %
(self.netns, t_int_if))
VMTopology.cmd("ip link set dev %s netns %s" %
(t_int_if, self.netns))

if VMTopology.intf_exists(t_int_if, netns=self.netns) and VMTopology.intf_not_exists(int_if, netns=self.netns):
VMTopology.cmd("ip netns exec %s ip link set dev %s name %s" % (
Expand Down Expand Up @@ -869,6 +864,7 @@ def bind_fp_ports(self, disconnect_vm=False):
VS_CHASSIS_MIDPLANE_BRIDGE_NAME, self.topo['DUT']['vs_chassis']['midplane_port'])

def unbind_fp_ports(self):
logging.info("=== unbind front panel ports ===")
for attr in self.VMs.values():
for vlan_num, vlan in enumerate(attr['vlans']):
br_name = adaptive_name(
Expand Down Expand Up @@ -1266,6 +1262,7 @@ def remove_host_ports(self):
"""
remove dut port from the ptf docker
"""
logging.info("=== Remove host ports ===")
for i, intf in enumerate(self.host_interfaces):
if self._is_multi_duts:
if isinstance(intf, list):
Expand All @@ -1290,6 +1287,45 @@ def remove_host_ports(self):
self.remove_dut_vlan_subif_from_docker(
ptf_if, vlan_separator, vlan_id)

def remove_veth_if_from_docker(self, ext_if, int_if, tmp_name):
"""
Remove veth interface from docker
"""
logging.info("=== Cleanup port, int_if: %s, ext_if: %s, tmp_name: %s ===" % (ext_if, int_if, tmp_name))
if VMTopology.intf_exists(int_if, pid=self.pid):
# Name it back to temp name in PTF container to avoid potential conflicts
VMTopology.iface_down(int_if, pid=self.pid)
VMTopology.cmd("nsenter -t %s -n ip link set dev %s name %s" % (self.pid, int_if, tmp_name))
# Set it to default namespace
VMTopology.cmd("nsenter -t %s -n ip link set dev %s netns 1" % (self.pid, tmp_name))

# Delete its peer in default namespace
if VMTopology.intf_exists(ext_if):
VMTopology.cmd("ip link delete dev %s" % ext_if)

def remove_ptf_mgmt_port(self):
ext_if = PTF_MGMT_IF_TEMPLATE % self.vm_set_name
tmp_name = MGMT_PORT_NAME + VMTopology._generate_fingerprint(ext_if, MAX_INTF_LEN-len(MGMT_PORT_NAME))
self.remove_veth_if_from_docker(ext_if, MGMT_PORT_NAME, tmp_name)

def remove_ptf_backplane_port(self):
ext_if = PTF_BP_IF_TEMPLATE % self.vm_set_name
tmp_name = BP_PORT_NAME + VMTopology._generate_fingerprint(ext_if, MAX_INTF_LEN-len(BP_PORT_NAME))
self.remove_veth_if_from_docker(ext_if, BP_PORT_NAME, tmp_name)

def remove_injected_fp_ports_from_docker(self):
for vm, vlans in self.injected_fp_ports.items():
for vlan in vlans:
(_, _, ptf_index) = VMTopology.parse_vm_vlan_port(vlan)
ext_if = adaptive_name(INJECTED_INTERFACES_TEMPLATE, self.vm_set_name, ptf_index)
int_if = PTF_FP_IFACE_TEMPLATE % ptf_index
properties = self.vm_properties.get(vm, {})
create_vlan_subintf = properties.get('device_type') in (
BACKEND_TOR_TYPE, BACKEND_LEAF_TYPE)
if not create_vlan_subintf:
tmp_name = int_if + VMTopology._generate_fingerprint(ext_if, MAX_INTF_LEN-len(int_if))
self.remove_veth_if_from_docker(ext_if, int_if, tmp_name)

@staticmethod
def _generate_fingerprint(name, digit=6):
"""
Expand Down Expand Up @@ -1867,10 +1903,14 @@ def main():
if vms_exists:
net.unbind_vm_backplane()
net.unbind_fp_ports()
net.remove_injected_fp_ports_from_docker()

if hostif_exists:
net.remove_host_ports()

net.remove_ptf_mgmt_port()
net.remove_ptf_backplane_port()

if net.netns:
net.unbind_mgmt_port(NETNS_MGMT_IF_TEMPLATE % net.vm_set_name)
net.delete_network_namespace()
Expand Down Expand Up @@ -1935,6 +1975,7 @@ def main():
net.unbind_fp_ports()
net.add_injected_fp_ports_to_docker()
net.bind_fp_ports()
net.bind_vm_backplane()
net.add_bp_port_to_docker(ptf_bp_ip_addr, ptf_bp_ipv6_addr)

if net.netns:
Expand Down
Loading