diff --git a/ansible/roles/vm_set/library/ptf_control.py b/ansible/roles/vm_set/library/ptf_control.py new file mode 100644 index 00000000000..5e3d6bcbb5b --- /dev/null +++ b/ansible/roles/vm_set/library/ptf_control.py @@ -0,0 +1,139 @@ +#!/usr/bin/python + +import json +import logging +import traceback + +import docker + +from ansible.module_utils.debug_utils import config_module_logging +from ansible.module_utils.basic import AnsibleModule + +DOCUMENTATION = ''' +--- +module: ptf_control +version_added: "0.1" +author: Xin Wang (xiwang5@microsoft.com) +short_description: Control PTF container +description: For controlling PTF container, for example killing processes running in PTF container before stopping it. + +Parameters: + - ctn_name: Name of the PTF container + - command: Command to run, currently only support "kill" + +''' + +EXAMPLES = ''' +- name: Kill exabgp and ptf_nn_agent processes in PTF container + ptf_control: + ctn_name: "ptf_vms6-1" + command: kill +''' + + +class PtfControl(object): + """This class is for controlling PTF container + """ + + def __init__(self, module, ctn_name): + self.module = module + self.ctn_name = ctn_name + + self.pid = PtfControl.get_pid(self.ctn_name) + + def cmd(self, cmdline, use_unsafe_shell=False, ignore_failure=False, verbose=True): + rc, out, err = self.module.run_command(cmdline, use_unsafe_shell=use_unsafe_shell) + if verbose: + msg = { + 'cmd': cmdline, + 'rc': rc, + 'stdout_lines': out.splitlines(), + 'stderr_lines': err.splitlines() + } + logging.debug('***** RUN CMD:\n%s' % json.dumps(msg, indent=2)) + + if rc != 0 and not ignore_failure: + raise Exception("Failed to run command: %s, rc=%d, out=%s, err=%s" % (cmdline, rc, out, err)) + return rc, out, err + + @staticmethod + def get_pid(ctn_name): + cli = docker.from_env() + try: + ctn = cli.containers.get(ctn_name) + except Exception: + return None + + return ctn.attrs['State']['Pid'] + + def get_process_pids(self, process): + cmd = 'docker exec -t {} bash -c "pgrep -f \'{}\'"'.format(self.ctn_name, process) + _, out, _ = self.cmd(cmd, ignore_failure=True) + return [int(pid.strip()) for pid in out.splitlines()] + + def get_supervisord_processes(self): + _, out, _ = self.cmd( + 'docker exec -t {} bash -c "supervisorctl status"'.format(self.ctn_name), ignore_failure=True + ) + processes = [line.strip().split()[0] for line in out.splitlines() if "sshd" not in line] + return processes + + def kill_process(self, pid): + self.cmd('docker exec -t {} bash -c "kill -9 {}"'.format(self.ctn_name, pid), ignore_failure=True) + + def kill_processes(self): + supervisord_processes = self.get_supervisord_processes() + self.cmd('docker exec -t {} bash -c "ps -ef"'.format(self.ctn_name)) + for i in range(3): + logging.info("=== Attempt %d ===" % (i + 1)) + logging.info("=== Use supervisorctl to stop processes ===") + for process in supervisord_processes: + self.cmd( + 'docker exec -t {} bash -c "supervisorctl stop {}"'.format(self.ctn_name, process), + ignore_failure=True + ) + self.cmd( + 'docker exec -t {} bash -c "ps -ef"'.format(self.ctn_name) + ) + + for pattern in [ + "/usr/share/exabgp/http_api.py", + "/usr/local/bin/exabgp", + "ptf_nn_agent.py" + ]: + logging.info("=== Kill process %s ===" % pattern) + for pid in self.get_process_pids(pattern): + self.kill_process(pid) + + self.cmd('docker exec -t {} bash -c "ps -ef"'.format(self.ctn_name)) + + +def main(): + module = AnsibleModule( + argument_spec=dict( + ctn_name=dict(required=True, type='str'), + command=dict(required=True, type='str') + ), + supports_check_mode=False) + + ctn_name = module.params['ctn_name'] + command = module.params['command'] + if command not in ['kill']: + module.fail_json(msg="command %s is not supported" % command) + + config_module_logging('ptf_control_' + ctn_name) + + try: + ptf = PtfControl(module, ctn_name) + if command == "kill": + if ptf.pid is not None: + ptf.kill_processes() + except Exception as error: + logging.error(traceback.format_exc()) + module.fail_json(msg=str(error)) + + module.exit_json(changed=True) + + +if __name__ == "__main__": + main() diff --git a/ansible/roles/vm_set/library/vm_topology.py b/ansible/roles/vm_set/library/vm_topology.py index b769c63023d..dfffde24483 100644 --- a/ansible/roles/vm_set/library/vm_topology.py +++ b/ansible/roles/vm_set/library/vm_topology.py @@ -512,10 +512,8 @@ def add_br_if_to_docker(self, bridge, ext_if, int_if): VMTopology.iface_up(ext_if) if VMTopology.intf_exists(tmp_int_if) and VMTopology.intf_not_exists(tmp_int_if, pid=self.pid): - VMTopology.cmd("ip link set netns %s dev %s" % - (self.pid, tmp_int_if)) - VMTopology.cmd("nsenter -t %s -n ip link set dev %s name %s" % - (self.pid, tmp_int_if, int_if)) + VMTopology.cmd("ip link set dev %s netns %s " % (tmp_int_if, self.pid)) + VMTopology.cmd("nsenter -t %s -n ip link set dev %s name %s" % (self.pid, tmp_int_if, int_if)) VMTopology.iface_up(int_if, pid=self.pid) @@ -537,10 +535,8 @@ def add_br_if_to_netns(self, bridge, ext_if, int_if): VMTopology.iface_up(ext_if) if VMTopology.intf_exists(tmp_int_if) and VMTopology.intf_not_exists(tmp_int_if, netns=self.netns): - VMTopology.cmd("ip link set netns %s dev %s" % - (self.netns, tmp_int_if)) - VMTopology.cmd("ip netns exec %s ip link set dev %s name %s" % ( - self.netns, tmp_int_if, int_if)) + VMTopology.cmd("ip link set dev %s netns %s" % (tmp_int_if, self.netns)) + VMTopology.cmd("ip netns exec %s ip link set dev %s name %s" % (self.netns, tmp_int_if, int_if)) VMTopology.iface_up(int_if, netns=self.netns) @@ -606,12 +602,10 @@ def add_dut_if_to_docker(self, iface_name, dut_iface): if VMTopology.intf_exists(dut_iface) \ and VMTopology.intf_not_exists(dut_iface, pid=self.pid) \ and VMTopology.intf_not_exists(iface_name, pid=self.pid): - VMTopology.cmd("ip link set netns %s dev %s" % - (self.pid, dut_iface)) + VMTopology.cmd("ip link set dev %s netns %s" % (dut_iface, self.pid)) if VMTopology.intf_exists(dut_iface, pid=self.pid) and VMTopology.intf_not_exists(iface_name, pid=self.pid): - VMTopology.cmd("nsenter -t %s -n ip link set dev %s name %s" % - (self.pid, dut_iface, iface_name)) + VMTopology.cmd("nsenter -t %s -n ip link set dev %s name %s" % (self.pid, dut_iface, iface_name)) VMTopology.iface_up(iface_name, pid=self.pid) @@ -626,7 +620,7 @@ def add_dut_vlan_subif_to_docker(self, iface_name, vlan_separator, vlan_id): (self.pid, vlan_sub_iface_name)) def remove_dut_if_from_docker(self, iface_name, dut_iface): - + logging.info("=== Restore docker interface %s as dut interface %s ===" % (iface_name, dut_iface)) if self.pid is None: return @@ -639,7 +633,7 @@ def remove_dut_if_from_docker(self, iface_name, dut_iface): if VMTopology.intf_not_exists(dut_iface) and VMTopology.intf_exists(dut_iface, pid=self.pid): VMTopology.cmd( - "nsenter -t %s -n ip link set netns 1 dev %s" % (self.pid, dut_iface)) + "nsenter -t %s -n ip link set dev %s netns 1" % (self.pid, dut_iface)) def remove_dut_vlan_subif_from_docker(self, iface_name, vlan_separator, vlan_id): """Remove the vlan sub interface created for the ptf interface.""" @@ -648,6 +642,7 @@ def remove_dut_vlan_subif_from_docker(self, iface_name, vlan_separator, vlan_id) vlan_sub_iface_name = iface_name + vlan_separator + vlan_id if VMTopology.intf_exists(vlan_sub_iface_name, pid=self.pid): + VMTopology.iface_down(vlan_sub_iface_name, pid=self.pid) VMTopology.cmd("nsenter -t %s -n ip link del %s" % (self.pid, vlan_sub_iface_name)) @@ -708,14 +703,14 @@ def add_veth_if_to_docker(self, ext_if, int_if, create_vlan_subintf=False, **kwa if VMTopology.intf_exists(t_int_if) \ and VMTopology.intf_not_exists(t_int_if, pid=self.pid) \ and VMTopology.intf_not_exists(int_if, pid=self.pid): - VMTopology.cmd("ip link set netns %s dev %s" % - (self.pid, t_int_if)) + VMTopology.cmd("ip link set dev %s netns %s" % + (t_int_if, self.pid)) if create_vlan_subintf \ and VMTopology.intf_exists(t_int_sub_if) \ and VMTopology.intf_not_exists(t_int_sub_if, pid=self.pid) \ and VMTopology.intf_not_exists(int_sub_if, pid=self.pid): - VMTopology.cmd("ip link set netns %s dev %s" % - (self.pid, t_int_sub_if)) + VMTopology.cmd("ip link set dev %s netns %s" % + (t_int_sub_if, self.pid)) if VMTopology.intf_exists(t_int_if, pid=self.pid) and VMTopology.intf_not_exists(int_if, pid=self.pid): VMTopology.cmd("nsenter -t %s -n ip link set dev %s name %s" % @@ -761,8 +756,8 @@ def add_veth_if_to_netns(self, ext_if, int_if): if VMTopology.intf_exists(t_int_if) \ and VMTopology.intf_not_exists(t_int_if, netns=self.netns) \ and VMTopology.intf_not_exists(int_if, netns=self.netns): - VMTopology.cmd("ip link set netns %s dev %s" % - (self.netns, t_int_if)) + VMTopology.cmd("ip link set dev %s netns %s" % + (t_int_if, self.netns)) if VMTopology.intf_exists(t_int_if, netns=self.netns) and VMTopology.intf_not_exists(int_if, netns=self.netns): VMTopology.cmd("ip netns exec %s ip link set dev %s name %s" % ( @@ -869,6 +864,7 @@ def bind_fp_ports(self, disconnect_vm=False): VS_CHASSIS_MIDPLANE_BRIDGE_NAME, self.topo['DUT']['vs_chassis']['midplane_port']) def unbind_fp_ports(self): + logging.info("=== unbind front panel ports ===") for attr in self.VMs.values(): for vlan_num, vlan in enumerate(attr['vlans']): br_name = adaptive_name( @@ -1266,6 +1262,7 @@ def remove_host_ports(self): """ remove dut port from the ptf docker """ + logging.info("=== Remove host ports ===") for i, intf in enumerate(self.host_interfaces): if self._is_multi_duts: if isinstance(intf, list): @@ -1290,6 +1287,45 @@ def remove_host_ports(self): self.remove_dut_vlan_subif_from_docker( ptf_if, vlan_separator, vlan_id) + def remove_veth_if_from_docker(self, ext_if, int_if, tmp_name): + """ + Remove veth interface from docker + """ + logging.info("=== Cleanup port, int_if: %s, ext_if: %s, tmp_name: %s ===" % (ext_if, int_if, tmp_name)) + if VMTopology.intf_exists(int_if, pid=self.pid): + # Name it back to temp name in PTF container to avoid potential conflicts + VMTopology.iface_down(int_if, pid=self.pid) + VMTopology.cmd("nsenter -t %s -n ip link set dev %s name %s" % (self.pid, int_if, tmp_name)) + # Set it to default namespace + VMTopology.cmd("nsenter -t %s -n ip link set dev %s netns 1" % (self.pid, tmp_name)) + + # Delete its peer in default namespace + if VMTopology.intf_exists(ext_if): + VMTopology.cmd("ip link delete dev %s" % ext_if) + + def remove_ptf_mgmt_port(self): + ext_if = PTF_MGMT_IF_TEMPLATE % self.vm_set_name + tmp_name = MGMT_PORT_NAME + VMTopology._generate_fingerprint(ext_if, MAX_INTF_LEN-len(MGMT_PORT_NAME)) + self.remove_veth_if_from_docker(ext_if, MGMT_PORT_NAME, tmp_name) + + def remove_ptf_backplane_port(self): + ext_if = PTF_BP_IF_TEMPLATE % self.vm_set_name + tmp_name = BP_PORT_NAME + VMTopology._generate_fingerprint(ext_if, MAX_INTF_LEN-len(BP_PORT_NAME)) + self.remove_veth_if_from_docker(ext_if, BP_PORT_NAME, tmp_name) + + def remove_injected_fp_ports_from_docker(self): + for vm, vlans in self.injected_fp_ports.items(): + for vlan in vlans: + (_, _, ptf_index) = VMTopology.parse_vm_vlan_port(vlan) + ext_if = adaptive_name(INJECTED_INTERFACES_TEMPLATE, self.vm_set_name, ptf_index) + int_if = PTF_FP_IFACE_TEMPLATE % ptf_index + properties = self.vm_properties.get(vm, {}) + create_vlan_subintf = properties.get('device_type') in ( + BACKEND_TOR_TYPE, BACKEND_LEAF_TYPE) + if not create_vlan_subintf: + tmp_name = int_if + VMTopology._generate_fingerprint(ext_if, MAX_INTF_LEN-len(int_if)) + self.remove_veth_if_from_docker(ext_if, int_if, tmp_name) + @staticmethod def _generate_fingerprint(name, digit=6): """ @@ -1867,10 +1903,14 @@ def main(): if vms_exists: net.unbind_vm_backplane() net.unbind_fp_ports() + net.remove_injected_fp_ports_from_docker() if hostif_exists: net.remove_host_ports() + net.remove_ptf_mgmt_port() + net.remove_ptf_backplane_port() + if net.netns: net.unbind_mgmt_port(NETNS_MGMT_IF_TEMPLATE % net.vm_set_name) net.delete_network_namespace() @@ -1935,6 +1975,7 @@ def main(): net.unbind_fp_ports() net.add_injected_fp_ports_to_docker() net.bind_fp_ports() + net.bind_vm_backplane() net.add_bp_port_to_docker(ptf_bp_ip_addr, ptf_bp_ipv6_addr) if net.netns: diff --git a/ansible/roles/vm_set/tasks/announce_routes.yml b/ansible/roles/vm_set/tasks/announce_routes.yml index fa341c93bd4..70c85fe4b81 100644 --- a/ansible/roles/vm_set/tasks/announce_routes.yml +++ b/ansible/roles/vm_set/tasks/announce_routes.yml @@ -121,93 +121,3 @@ ptf_ip: "{{ ptf_host_ip }}" delegate_to: localhost when: exabgp_action == 'start' - -- name: Check if ptf is accessible - wait_for: - host: "{{ ptf_host_ip }}" - port: 22 - timeout: 3 - register: ptf_accessible - ignore_errors: true - delegate_to: localhost - -- name: Check and stop exabgp processes on PTF - block: - - name: Check exabgp processes for IPv4 running on PTF - shell: "supervisorctl status exabgpv4:* | grep RUNNING | wc -l" - register: exabgpv4_running - delegate_to: "{{ ptf_host }}" - - - name: Stop exabgp processes for IPv4 on PTF - supervisorctl: - name: "exabgpv4:" - state: stopped - delegate_to: "{{ ptf_host }}" - when: exabgpv4_running.stdout|int > 0 - - - name: Check exabgp processes for IPv6 running on PTF - shell: "supervisorctl status exabgpv6:* | grep RUNNING | wc -l" - register: exabgpv6_running - delegate_to: "{{ ptf_host }}" - - - name: Stop exabgp processes for IPv6 on PTF - supervisorctl: - name: "exabgpv6:" - state: stopped - delegate_to: "{{ ptf_host }}" - when: exabgpv6_running.stdout|int > 0 - - - name: Check and stop exabgp processes on PTF (for old naming convention) - block: - - name: Check if exabgp processes running on PTF (for old naming convention) - shell: "supervisorctl status | grep RUNNING | grep ^exabgp-.* | wc -l" - register: exabgp_running_old - delegate_to: "{{ ptf_host }}" - - - name: Stop exabgp processes for IPv4 on PTF (for old naming convention) - exabgp: - name: "{{ vm_item.key }}" - state: "stopped" - loop: "{{ topology['VMs']|dict2items }}" - loop_control: - loop_var: vm_item - delegate_to: "{{ ptf_host }}" - when: exabgp_running_old.stdout|int > 0 - - - name: Stop exabgp processes for IPv6 on PTF (for old naming convention) - exabgp: - name: "{{ vm_item.key }}-v6" - state: "stopped" - loop: "{{ topology['VMs']|dict2items }}" - loop_control: - loop_var: vm_item - delegate_to: "{{ ptf_host }}" - when: exabgp_running_old.stdout|int > 0 - - - name: Get count of exabgp processes running on PTF - shell: "supervisorctl status | grep RUNNING | grep ^exabgp.* | wc -l" - register: exabgp_running - delegate_to: "{{ ptf_host }}" - - - name: Verify no exabgp processes running on PTF - assert: - that: exabgp_running.stdout|int == 0 - fail_msg: "exabgp processes are still running on PTF, please check manually" - - - name: Stop all processes managed by supervisor on PTF - block: - - name: Stop all processes managed by supervisor on PTF - shell: "supervisorctl stop all" - delegate_to: "{{ ptf_host }}" - - - name: Get count of running processes managed by supervisor - shell: "supervisorctl status | grep RUNNING | wc -l" - register: supervisor_proc_running - delegate_to: "{{ ptf_host }}" - - - name: Verify all the processes managed by supervisor are not running - assert: - that: supervisor_proc_running.stdout|int == 0 - fail_msg: "There are still processes managed by supervisor running on PTF, please check manually" - - when: exabgp_action == 'stop' and ptf_accessible is defined and not ptf_accessible.failed diff --git a/ansible/roles/vm_set/tasks/remove_topo.yml b/ansible/roles/vm_set/tasks/remove_topo.yml index 0d7d3458035..8d0768bf848 100644 --- a/ansible/roles/vm_set/tasks/remove_topo.yml +++ b/ansible/roles/vm_set/tasks/remove_topo.yml @@ -26,19 +26,19 @@ nic_simulator_action: stop when: topology.host_interfaces_active_active is defined and topology.host_interfaces_active_active|length > 0 - - name: Stop exabgp processes - include_tasks: announce_routes.yml - vars: - exabgp_action: stop - when: - - topo != 'fullmesh' - - not 'ptf' in topo - - name: Stop PTF portchannel service include_tasks: ptf_portchannel.yml vars: ptf_portchannel_action: stop + - name: Kill exabgp and ptf_nn_agent processes in PTF container + ptf_control: + ctn_name: "ptf_{{ vm_set_name }}" + command: kill + when: + - topo != 'fullmesh' + - not 'ptf' in topo + - name: Get duts ports include_tasks: get_dut_port.yml loop: "{{ duts_name.split(',') }}" diff --git a/ansible/roles/vm_set/tasks/renumber_topo.yml b/ansible/roles/vm_set/tasks/renumber_topo.yml index a32f744c84d..3dcef6260ac 100644 --- a/ansible/roles/vm_set/tasks/renumber_topo.yml +++ b/ansible/roles/vm_set/tasks/renumber_topo.yml @@ -27,19 +27,19 @@ nic_simulator_action: stop when: topology.host_interfaces_active_active is defined and topology.host_interfaces_active_active|length > 0 - - name: Stop exabgp processes - include_tasks: announce_routes.yml - vars: - exabgp_action: stop - when: - - topo != 'fullmesh' - - not 'ptf' in topo - - name: Stop PTF portchannel service include_tasks: ptf_portchannel.yml vars: ptf_portchannel_action: stop + - name: Kill exabgp and ptf_nn_agent processes in PTF container + ptf_control: + ctn_name: "ptf_{{ vm_set_name }}" + command: kill + when: + - topo != 'fullmesh' + - not 'ptf' in topo + - name: Get infos of ptf container docker_container_info: name: ptf_{{ vm_set_name }} @@ -58,6 +58,25 @@ echo "-----------------------------" >> /tmp/ptf_network_{{ vm_set_name }}.log when: ptf_docker_info.exists + - name: Get dut ports + include_tasks: get_dut_port.yml + loop: "{{ duts_name.split(',') }}" + loop_control: + loop_var: dut_name + + - name: Unbind topology {{ topo }} to VMs. base vm = {{ VM_base }} + vm_topology: + cmd: "unbind" + vm_set_name: "{{ vm_set_name }}" + topo: "{{ topology }}" + vm_names: "{{ VM_hosts }}" + vm_base: "{{ VM_base }}" + vm_type: "{{ vm_type }}" + duts_fp_ports: "{{ duts_fp_ports }}" + duts_name: "{{ duts_name.split(',') }}" + max_fp_num: "{{ max_fp_num }}" + become: yes + - name: Stop ptf container ptf_{{ vm_set_name }} docker_container: name: ptf_{{ vm_set_name }} @@ -111,12 +130,6 @@ command: docker exec -i ptf_{{ vm_set_name }} sysctl -w net.ipv6.route.max_size=168000 become: yes - - name: Get dut ports - include_tasks: get_dut_port.yml - loop: "{{ duts_name.split(',') }}" - loop_control: - loop_var: dut_name - - name: Create vlan ports for dut include_tasks: create_dut_port.yml when: external_port is defined