-
Notifications
You must be signed in to change notification settings - Fork 1k
Improve the cleanup of processes and interfaces before stopping PTF container #10069
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
e294603
Safe remove PTF container
wangxin 774a4cf
Add ptf_control
wangxin 6a615be
Aggressively kill
wangxin 292fe5d
Fix pre-commit style
wangxin f7b5eb9
Kill all possible exabgp processes in ptf
wangxin 55537be
Fix pre-commit style issue
wangxin File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,139 @@ | ||
| #!/usr/bin/python | ||
|
|
||
| import json | ||
| import logging | ||
| import traceback | ||
|
|
||
| import docker | ||
|
|
||
| from ansible.module_utils.debug_utils import config_module_logging | ||
| from ansible.module_utils.basic import AnsibleModule | ||
|
|
||
| DOCUMENTATION = ''' | ||
| --- | ||
| module: ptf_control | ||
| version_added: "0.1" | ||
| author: Xin Wang (xiwang5@microsoft.com) | ||
| short_description: Control PTF container | ||
| description: For controlling PTF container, for example killing processes running in PTF container before stopping it. | ||
|
|
||
| Parameters: | ||
| - ctn_name: Name of the PTF container | ||
| - command: Command to run, currently only support "kill" | ||
|
|
||
| ''' | ||
|
|
||
| EXAMPLES = ''' | ||
| - name: Kill exabgp and ptf_nn_agent processes in PTF container | ||
| ptf_control: | ||
| ctn_name: "ptf_vms6-1" | ||
| command: kill | ||
| ''' | ||
|
|
||
|
|
||
| class PtfControl(object): | ||
| """This class is for controlling PTF container | ||
| """ | ||
|
|
||
| def __init__(self, module, ctn_name): | ||
| self.module = module | ||
| self.ctn_name = ctn_name | ||
|
|
||
| self.pid = PtfControl.get_pid(self.ctn_name) | ||
|
|
||
| def cmd(self, cmdline, use_unsafe_shell=False, ignore_failure=False, verbose=True): | ||
| rc, out, err = self.module.run_command(cmdline, use_unsafe_shell=use_unsafe_shell) | ||
| if verbose: | ||
| msg = { | ||
| 'cmd': cmdline, | ||
| 'rc': rc, | ||
| 'stdout_lines': out.splitlines(), | ||
| 'stderr_lines': err.splitlines() | ||
| } | ||
| logging.debug('***** RUN CMD:\n%s' % json.dumps(msg, indent=2)) | ||
|
|
||
| if rc != 0 and not ignore_failure: | ||
| raise Exception("Failed to run command: %s, rc=%d, out=%s, err=%s" % (cmdline, rc, out, err)) | ||
| return rc, out, err | ||
|
|
||
| @staticmethod | ||
| def get_pid(ctn_name): | ||
| cli = docker.from_env() | ||
| try: | ||
| ctn = cli.containers.get(ctn_name) | ||
| except Exception: | ||
| return None | ||
|
|
||
| return ctn.attrs['State']['Pid'] | ||
|
|
||
| def get_process_pids(self, process): | ||
| cmd = 'docker exec -t {} bash -c "pgrep -f \'{}\'"'.format(self.ctn_name, process) | ||
| _, out, _ = self.cmd(cmd, ignore_failure=True) | ||
| return [int(pid.strip()) for pid in out.splitlines()] | ||
|
|
||
| def get_supervisord_processes(self): | ||
| _, out, _ = self.cmd( | ||
| 'docker exec -t {} bash -c "supervisorctl status"'.format(self.ctn_name), ignore_failure=True | ||
| ) | ||
| processes = [line.strip().split()[0] for line in out.splitlines() if "sshd" not in line] | ||
| return processes | ||
|
|
||
| def kill_process(self, pid): | ||
| self.cmd('docker exec -t {} bash -c "kill -9 {}"'.format(self.ctn_name, pid), ignore_failure=True) | ||
|
|
||
| def kill_processes(self): | ||
| supervisord_processes = self.get_supervisord_processes() | ||
| self.cmd('docker exec -t {} bash -c "ps -ef"'.format(self.ctn_name)) | ||
| for i in range(3): | ||
| logging.info("=== Attempt %d ===" % (i + 1)) | ||
| logging.info("=== Use supervisorctl to stop processes ===") | ||
| for process in supervisord_processes: | ||
| self.cmd( | ||
| 'docker exec -t {} bash -c "supervisorctl stop {}"'.format(self.ctn_name, process), | ||
| ignore_failure=True | ||
| ) | ||
| self.cmd( | ||
| 'docker exec -t {} bash -c "ps -ef"'.format(self.ctn_name) | ||
| ) | ||
|
|
||
| for pattern in [ | ||
| "/usr/share/exabgp/http_api.py", | ||
| "/usr/local/bin/exabgp", | ||
| "ptf_nn_agent.py" | ||
| ]: | ||
| logging.info("=== Kill process %s ===" % pattern) | ||
| for pid in self.get_process_pids(pattern): | ||
| self.kill_process(pid) | ||
|
|
||
| self.cmd('docker exec -t {} bash -c "ps -ef"'.format(self.ctn_name)) | ||
|
|
||
|
|
||
| def main(): | ||
| module = AnsibleModule( | ||
| argument_spec=dict( | ||
| ctn_name=dict(required=True, type='str'), | ||
| command=dict(required=True, type='str') | ||
| ), | ||
| supports_check_mode=False) | ||
|
|
||
| ctn_name = module.params['ctn_name'] | ||
| command = module.params['command'] | ||
| if command not in ['kill']: | ||
| module.fail_json(msg="command %s is not supported" % command) | ||
|
|
||
| config_module_logging('ptf_control_' + ctn_name) | ||
|
|
||
| try: | ||
| ptf = PtfControl(module, ctn_name) | ||
| if command == "kill": | ||
| if ptf.pid is not None: | ||
| ptf.kill_processes() | ||
| except Exception as error: | ||
| logging.error(traceback.format_exc()) | ||
| module.fail_json(msg=str(error)) | ||
|
|
||
| module.exit_json(changed=True) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is this command used for? (I have same confusion at line 108)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is for debugging purpose. By default, this module can generate logs like
/tmp/ptf_control_xxx.log.With this command, we can clearly see the processes running in PTF docker before and after the killing. Next time if server crash happens again, we may be able to get some clue from the log.
Recently we noticed some tests may create exabgp processes like "exabgp-psudoswitch1" in PTF container. So, a more aggressive way is required to kill the processes. Also, it would be better to collect more information for debugging if the issue happens again in the future.