Skip to content

Commit 4f28540

Browse files
authored
Merge pull request #5220 from r0m4n-z/fix_zombie_spawning_green_shell
Fix subprocess handling when killed on timeout
2 parents c687259 + 0770969 commit 4f28540

File tree

3 files changed

+132
-7
lines changed

3 files changed

+132
-7
lines changed

CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,11 @@ Fixed
271271

272272
Contributed by @Kami.
273273

274+
* Make sure ``st2common.util.green.shell.run_command()`` doesn't leave stray / zombie processes
275+
laying around in some command timeout scenarios. #5220
276+
277+
Contributed by @r0m4n-z.
278+
274279
3.4.1 - March 14, 2021
275280
----------------------
276281

st2common/st2common/util/green/shell.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,14 @@ def run_command(
8585
8686
:param kill_func: Optional function which will be called on timeout to kill the process.
8787
If not provided, it defaults to `process.kill`
88+
89+
NOTE: If you are utilizing shell=True, you shoulf always specify a custom
90+
kill function which correctly kills shell process + the shell children
91+
process.
92+
93+
If you don't do that, timeout handler won't work correctly / as expected -
94+
only the shell process will be killed, but not also the child processs
95+
spawned by the shell.
8896
:type kill_func: ``callable``
8997
9098
:param read_stdout_func: Function which is responsible for reading process stdout when
@@ -154,6 +162,11 @@ def run_command(
154162
read_stderr_func, process.stderr, read_stderr_buffer
155163
)
156164

165+
# Special attribute we use to determine if the process timed out or not
166+
process._timed_out = False
167+
168+
# TODO: Now that we support Python >= 3.6 we should utilize timeout argument for the
169+
# communicate() method and handle killing the process + read threads there.
157170
def on_timeout_expired(timeout):
158171
global timed_out
159172

@@ -165,9 +178,7 @@ def on_timeout_expired(timeout):
165178
# Note: We explicitly set the returncode to indicate the timeout.
166179
LOG.debug("Command execution timeout reached.")
167180

168-
# NOTE: It's important we set returncode twice - here and below to avoid race in this
169-
# function because "kill_func()" is async and "process.kill()" is not.
170-
process.returncode = TIMEOUT_EXIT_CODE
181+
process._timed_out = True
171182

172183
if kill_func:
173184
LOG.debug("Calling kill_func.")
@@ -176,9 +187,8 @@ def on_timeout_expired(timeout):
176187
LOG.debug("Killing process.")
177188
process.kill()
178189

179-
# NOTE: It's imporant to set returncode here as well, since call to process.kill() sets
180-
# it and overwrites it if we set it earlier.
181-
process.returncode = TIMEOUT_EXIT_CODE
190+
process.wait()
191+
process._timed_out = True
182192

183193
if read_stdout_func and read_stderr_func:
184194
LOG.debug("Killing read_stdout_thread and read_stderr_thread")
@@ -205,7 +215,11 @@ def on_timeout_expired(timeout):
205215
stdout, stderr = process.communicate()
206216

207217
concurrency.cancel(timeout_thread)
208-
exit_code = process.returncode
218+
219+
if getattr(process, "_timed_out", False):
220+
exit_code = TIMEOUT_EXIT_CODE
221+
else:
222+
exit_code = process.returncode
209223

210224
if read_stdout_func and read_stderr_func:
211225
# Wait on those green threads to finish reading from stdout and stderr before continuing
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# Copyright 2020 The StackStorm Authors.
2+
# Copyright 2019 Extreme Networks, Inc.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import os
17+
18+
from st2common.util.monkey_patch import monkey_patch
19+
20+
monkey_patch()
21+
22+
from st2tests.base import IntegrationTestCase
23+
24+
from st2common.util.green.shell import run_command
25+
from st2common.util.green.shell import TIMEOUT_EXIT_CODE
26+
from st2common.util.shell import kill_process
27+
from st2common.util.shell import quote_unix
28+
29+
_all__ = ["GreenShellUtilsTestCase"]
30+
31+
32+
class GreenShellUtilsTestCase(IntegrationTestCase):
33+
def test_run_command_success(self):
34+
# 0 exit code
35+
exit_code, stdout, stderr, timed_out = run_command(
36+
cmd='echo "test stdout" ; >&2 echo "test stderr"', shell=True
37+
)
38+
self.assertEqual(exit_code, 0)
39+
self.assertEqual(stdout.strip(), b"test stdout")
40+
self.assertEqual(stderr.strip(), b"test stderr")
41+
self.assertFalse(timed_out)
42+
43+
# non-zero exit code
44+
exit_code, stdout, stderr, timed_out = run_command(
45+
cmd='echo "test stdout" ; >&2 echo "test stderr" ; exit 5', shell=True
46+
)
47+
self.assertEqual(exit_code, 5)
48+
self.assertEqual(stdout.strip(), b"test stdout")
49+
self.assertEqual(stderr.strip(), b"test stderr")
50+
self.assertFalse(timed_out)
51+
52+
# implicit non zero code (invalid command)
53+
exit_code, stdout, stderr, timed_out = run_command(
54+
cmd="foobarbarbazrbar", shell=True
55+
)
56+
self.assertEqual(exit_code, 127)
57+
self.assertEqual(stdout.strip(), b"")
58+
self.assertTrue(b"foobarbarbazrbar: not found" in stderr.strip())
59+
self.assertFalse(timed_out)
60+
61+
def test_run_command_timeout_shell_and_custom_kill_func(self):
62+
# This test represents our local runner setup where we use a preexec_func + custom kill_func
63+
# NOTE: When using shell=True. we should alaways use custom kill_func to ensure child shell
64+
# processses are in fact killed as well.
65+
exit_code, stdout, stderr, timed_out = run_command(
66+
cmd='echo "pre sleep"; sleep 1589; echo "post sleep"',
67+
preexec_func=os.setsid,
68+
timeout=1,
69+
kill_func=kill_process,
70+
shell=True,
71+
)
72+
self.assertEqual(exit_code, TIMEOUT_EXIT_CODE)
73+
self.assertEqual(stdout.strip(), b"pre sleep")
74+
self.assertEqual(stderr.strip(), b"")
75+
self.assertTrue(timed_out)
76+
77+
# Verify there is no zombie process left laying around
78+
self.assertNoStrayProcessesLeft("sleep 1589")
79+
80+
def test_run_command_timeout_no_shell_no_custom_kill_func(self):
81+
exit_code, stdout, stderr, timed_out = run_command(
82+
cmd=["sleep", "1599"], preexec_func=os.setsid, timeout=1
83+
)
84+
self.assertEqual(exit_code, TIMEOUT_EXIT_CODE)
85+
self.assertEqual(stdout.strip(), b"")
86+
self.assertEqual(stderr.strip(), b"")
87+
self.assertTrue(timed_out)
88+
89+
# Verify there is no zombie process left laying around
90+
self.assertNoStrayProcessesLeft("sleep 1599")
91+
92+
def assertNoStrayProcessesLeft(self, grep_string: str) -> None:
93+
"""
94+
Assert that there are no stray / zombie processes left with the provided command line
95+
string.
96+
"""
97+
exit_code, stdout, stderr, timed_out = run_command(
98+
cmd="ps aux | grep %s | grep -v grep" % (quote_unix(grep_string)),
99+
shell=True,
100+
)
101+
102+
if stdout.strip() != b"" and stderr.strip() != b"":
103+
raise AssertionError(
104+
"Expected no stray processes, but found Some. stdout: %s, stderr: %s"
105+
% (stdout, stderr)
106+
)

0 commit comments

Comments
 (0)