diff --git a/scripts/coredump_gen_handler.py b/scripts/coredump_gen_handler.py index 03ba2de89e..525b7f1643 100644 --- a/scripts/coredump_gen_handler.py +++ b/scripts/coredump_gen_handler.py @@ -111,7 +111,7 @@ def parse_ts_dump_name(self, ts_stdout): return "" def invoke_ts_cmd(self, since_cfg, num_retry=0): - cmd_opts = ["show", "techsupport", "--silent", "--since", since_cfg] + cmd_opts = ["show", "techsupport", "--silent", "--global-timeout", TS_GLOBAL_TIMEOUT, "--since", since_cfg] cmd = " ".join(cmd_opts) rc, stdout, stderr = subprocess_exec(cmd_opts, env=ENV_VAR) new_dump = "" diff --git a/scripts/generate_dump b/scripts/generate_dump index b5dd472e05..018d884330 100755 --- a/scripts/generate_dump +++ b/scripts/generate_dump @@ -1287,11 +1287,6 @@ main() { end_t=$(date +%s%3N) echo "[ Capture Proc State ] : $(($end_t-$start_t)) msec" >> $TECHSUPPORT_TIME_INFO - # Save logs and cores early - save_log_files - save_crash_files - save_warmboot_files - # Save all the processes within each docker save_cmd "show services" services.summary @@ -1426,6 +1421,10 @@ main() { end_t=$(date +%s%3N) echo "[ TAR /etc Files ] : $(($end_t-$start_t)) msec" >> $TECHSUPPORT_TIME_INFO + save_log_files + save_crash_files + save_warmboot_files + finalize } diff --git a/show/main.py b/show/main.py index 198940bc3e..6520130f7c 100755 --- a/show/main.py +++ b/show/main.py @@ -1138,7 +1138,7 @@ def users(verbose): @cli.command() @click.option('--since', required=False, help="Collect logs and core files since given date") -@click.option('-g', '--global-timeout', default=30, type=int, help="Global timeout in minutes. Default 30 mins") +@click.option('-g', '--global-timeout', required=False, type=int, help="Global timeout in minutes. WARN: Dump might be incomplete if enforced") @click.option('-c', '--cmd-timeout', default=5, type=int, help="Individual command timeout in minutes. Default 5 mins") @click.option('--verbose', is_flag=True, help="Enable verbose output") @click.option('--allow-process-stop', is_flag=True, help="Dump additional data which may require system interruption") @@ -1147,7 +1147,10 @@ def users(verbose): @click.option('--redirect-stderr', '-r', is_flag=True, help="Redirect an intermediate errors to STDERR") def techsupport(since, global_timeout, cmd_timeout, verbose, allow_process_stop, silent, debug_dump, redirect_stderr): """Gather information for troubleshooting""" - cmd = "sudo timeout --kill-after={}s -s SIGTERM --foreground {}m".format(COMMAND_TIMEOUT, global_timeout) + cmd = "sudo" + + if global_timeout: + cmd += " timeout --kill-after={}s -s SIGTERM --foreground {}m".format(COMMAND_TIMEOUT, global_timeout) if allow_process_stop: cmd += " -a" diff --git a/tests/coredump_gen_handler_test.py b/tests/coredump_gen_handler_test.py index bf4ae8dc78..96d275970e 100644 --- a/tests/coredump_gen_handler_test.py +++ b/tests/coredump_gen_handler_test.py @@ -20,6 +20,8 @@ /tmp/saisdkdump """ +TS_DEFAULT_CMD = "show techsupport --silent --global-timeout 60 --since 2 days ago" + def signal_handler(signum, frame): raise Exception("Timed out!") @@ -427,4 +429,23 @@ def mock_cmd(cmd, env): assert False, "Method should not time out" finally: signal.alarm(0) + + def test_auto_ts_options(self): + """ + Scenario: Check if the techsupport is called as expected + """ + db_wrap = Db() + redis_mock = db_wrap.db + set_auto_ts_cfg(redis_mock, state="enabled", since_cfg="2 days ago") + set_feature_table_cfg(redis_mock, state="enabled") + with Patcher() as patcher: + def mock_cmd(cmd, env): + cmd_str = " ".join(cmd) + if "show techsupport" in cmd_str and cmd_str != TS_DEFAULT_CMD: + assert False, "Expected TS_CMD: {}, Recieved: {}".format(TS_DEFAULT_CMD, cmd_str) + return 0, AUTO_TS_STDOUT, "" + cdump_mod.subprocess_exec = mock_cmd + patcher.fs.create_file("/var/core/orchagent.12345.123.core.gz") + cls = cdump_mod.CriticalProcCoreDumpHandle("orchagent.12345.123.core.gz", "swss", redis_mock) + cls.handle_core_dump_creation_event() diff --git a/tests/techsupport_test.py b/tests/techsupport_test.py index 64bc133627..8effa89887 100644 --- a/tests/techsupport_test.py +++ b/tests/techsupport_test.py @@ -3,18 +3,18 @@ from unittest.mock import patch, Mock from click.testing import CliRunner -EXPECTED_BASE_COMMAND = 'sudo timeout --kill-after=300s -s SIGTERM --foreground ' +EXPECTED_BASE_COMMAND = 'sudo ' @patch("show.main.run_command") @pytest.mark.parametrize( "cli_arguments,expected", [ - ([], '30m generate_dump -v -t 5'), - (['--since', '2 days ago'], "30m generate_dump -v -s '2 days ago' -t 5"), - (['-g', '50'], '50m generate_dump -v -t 5'), - (['--allow-process-stop'], '30m -a generate_dump -v -t 5'), - (['--silent'], '30m generate_dump -t 5'), - (['--debug-dump', '--redirect-stderr'], '30m generate_dump -v -d -t 5 -r'), + ([], 'generate_dump -v -t 5'), + (['--since', '2 days ago'], "generate_dump -v -s '2 days ago' -t 5"), + (['-g', '50'], 'timeout --kill-after=300s -s SIGTERM --foreground 50m generate_dump -v -t 5'), + (['--allow-process-stop'], '-a generate_dump -v -t 5'), + (['--silent'], 'generate_dump -t 5'), + (['--debug-dump', '--redirect-stderr'], 'generate_dump -v -d -t 5 -r'), ] ) def test_techsupport(run_command, cli_arguments, expected): diff --git a/utilities_common/auto_techsupport_helper.py b/utilities_common/auto_techsupport_helper.py index 4eaae933b0..1a461d379e 100644 --- a/utilities_common/auto_techsupport_helper.py +++ b/utilities_common/auto_techsupport_helper.py @@ -13,7 +13,7 @@ "CFG_CORE_USAGE", "CFG_SINCE", "FEATURE", "STATE_DB", "TS_MAP", "CORE_DUMP", "TIMESTAMP", "CONTAINER", "TIME_BUF", "SINCE_DEFAULT", "TS_PTRN_GLOB", "EXT_LOCKFAIL", "EXT_RETRY", - "EXT_SUCCESS", "MAX_RETRY_LIMIT" + "EXT_SUCCESS", "MAX_RETRY_LIMIT", "TS_GLOBAL_TIMEOUT" ] + [ # Methods "verify_recent_file_creation", "get_ts_dumps", @@ -60,6 +60,7 @@ TIME_BUF = 20 SINCE_DEFAULT = "2 days ago" +TS_GLOBAL_TIMEOUT = "60" # Techsupport Exit Codes EXT_LOCKFAIL = 2