easybuilders · boegel · Jun 5, 2024 · Mar 19, 2024 · Mar 19, 2024 · Mar 19, 2024
diff --git a/easybuild/tools/run.py b/easybuild/tools/run.py
@@ -41,6 +41,7 @@
 import locale
 import os
 import re
+import shlex
 import shutil
 import string
 import subprocess
@@ -196,6 +197,51 @@ def fileprefix_from_cmd(cmd, allowed_chars=False):
     return ''.join([c for c in cmd if c in allowed_chars])
 
 
+def create_cmd_scripts(cmd_str, work_dir, env, tmpdir):
+    """
+    Create helper scripts for specified command in specified directory:
+    - env.sh which can be sourced to define environment in which command was run;
+    - cmd.sh to create interactive (bash) shell session with working directory and environment,
+      and with the command in shell history;
+    """
+    # Save environment variables in env.sh which can be sourced to restore environment
+    if env is None:
+        env = os.environ.copy()
+
+    env_fp = os.path.join(tmpdir, 'env.sh')
+    with open(env_fp, 'w') as fid:
+        # unset all environment variables in current environment first to start from a clean slate;
+        # we need to be careful to filter out functions definitions, so first undefine those
+        fid.write("unset -f $(env | grep '%=' | cut -f1 -d'%' | sed 's/BASH_FUNC_//g')\n")
+        fid.write("unset $(env | cut -f1 -d=)\n")
+
+        # excludes bash functions (environment variables ending with %)
+        fid.write('\n'.join(f'export {key}={shlex.quote(value)}' for key, value in sorted(env.items())
+                            if not key.endswith('%')) + '\n')
+
+        fid.write('\n\nPS1="eb-shell> "')
+
+        # also change to working directory (to ensure that working directory is correct for interactive bash shell)
+        fid.write(f'\ncd "{work_dir}"')
+
+        # reset shell history to only include executed command
+        fid.write(f'\nhistory -s {shlex.quote(cmd_str)}')
+
+    # Make script that sets up bash shell with specified environment and working directory
+    cmd_fp = os.path.join(tmpdir, 'cmd.sh')
+    with open(cmd_fp, 'w') as fid:
+        fid.write('#!/usr/bin/env bash\n')
+        fid.write('# Run this script to set up a shell environment that EasyBuild used to run the shell command\n')
+        fid.write('\n'.join([
+            'EB_SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )',
+            f'echo "# Shell for the command: {shlex.quote(cmd_str)}"',
+            'echo "# Use command history, exit to stop"',
+            # using -i to force interactive shell, so env.sh is also sourced when -c is used to run commands
+            'bash --rcfile $EB_SCRIPT_DIR/env.sh -i "$@"',
+            ]))
+    os.chmod(cmd_fp, 0o775)
+
+
 def _answer_question(stdout, proc, qa_patterns, qa_wait_patterns):
     """
     Private helper function to try and answer questions raised in interactive shell commands.
@@ -329,12 +375,17 @@ def to_cmd_str(cmd):
         _log.info(f"Auto-enabling streaming output of '{cmd_str}' command because logging to stdout is enabled")
         stream_output = True
 
-    # temporary output file(s) for command output
+    # temporary output file(s) for command output, along with helper scripts
     if output_file:
         toptmpdir = os.path.join(tempfile.gettempdir(), 'run-shell-cmd-output')
         os.makedirs(toptmpdir, exist_ok=True)
         cmd_name = fileprefix_from_cmd(os.path.basename(cmd_str.split(' ')[0]))
         tmpdir = tempfile.mkdtemp(dir=toptmpdir, prefix=f'{cmd_name}-')
+
+        _log.info(f'run_shell_cmd: command environment of "{cmd_str}" will be saved to {tmpdir}')
+
+        create_cmd_scripts(cmd_str, work_dir, env, tmpdir)
+
         cmd_out_fp = os.path.join(tmpdir, 'out.txt')
         _log.info(f'run_shell_cmd: Output of "{cmd_str}" will be logged to {cmd_out_fp}')
         if split_stderr:
@@ -343,7 +394,7 @@ def to_cmd_str(cmd):
         else:
             cmd_err_fp = None
     else:
-        cmd_out_fp, cmd_err_fp = None, None
+        tmpdir, cmd_out_fp, cmd_err_fp = None, None, None
 
     interactive = bool(qa_patterns)
     interactive_msg = 'interactive ' if interactive else ''
@@ -361,7 +412,7 @@ def to_cmd_str(cmd):
 
     start_time = datetime.now()
     if not hidden:
-        _cmd_trace_msg(cmd_str, start_time, work_dir, stdin, cmd_out_fp, cmd_err_fp, thread_id, interactive=interactive)
+        _cmd_trace_msg(cmd_str, start_time, work_dir, stdin, tmpdir, thread_id, interactive=interactive)
 
     if stream_output:
         print_msg(f"(streaming) output for command '{cmd_str}':")
@@ -522,16 +573,15 @@ def to_cmd_str(cmd):
     return res
 
 
-def _cmd_trace_msg(cmd, start_time, work_dir, stdin, cmd_out_fp, cmd_err_fp, thread_id, interactive=False):
+def _cmd_trace_msg(cmd, start_time, work_dir, stdin, tmpdir, thread_id, interactive=False):
     """
     Helper function to construct and print trace message for command being run
 
     :param cmd: command being run
     :param start_time: datetime object indicating when command was started
     :param work_dir: path of working directory in which command is run
     :param stdin: stdin input value for command
-    :param cmd_out_fp: path to output file for command
-    :param cmd_err_fp: path to errors/warnings output file for command
+    :param tmpdir: path to temporary output directory for command
     :param thread_id: thread ID (None when not running shell command asynchronously)
     :param interactive: boolean indicating whether it is an interactive command, or not
     """
@@ -551,10 +601,8 @@ def _cmd_trace_msg(cmd, start_time, work_dir, stdin, cmd_out_fp, cmd_err_fp, thr
     ]
     if stdin:
         lines.append(f"\t[input: {stdin}]")
-    if cmd_out_fp:
-        lines.append(f"\t[output saved to {cmd_out_fp}]")
-    if cmd_err_fp:
-        lines.append(f"\t[errors/warnings saved to {cmd_err_fp}]")
+    if tmpdir:
+        lines.append(f"\t[output and state saved to {tmpdir}]")
 
     trace_msg('\n'.join(lines))
 

diff --git a/test/framework/run.py b/test/framework/run.py
@@ -179,6 +179,10 @@ def test_run_cmd(self):
     def test_run_shell_cmd_basic(self):
         """Basic test for run_shell_cmd function."""
 
+        os.environ['FOOBAR'] = 'foobar'
+
+        cwd = change_dir(self.test_prefix)
+
         with self.mocked_stdout_stderr():
             res = run_shell_cmd("echo hello")
         self.assertEqual(res.output, "hello\n")
@@ -189,6 +193,43 @@ def test_run_shell_cmd_basic(self):
         self.assertEqual(res.stderr, None)
         self.assertTrue(res.work_dir and isinstance(res.work_dir, str))
 
+        change_dir(cwd)
+        del os.environ['FOOBAR']
+
+        # check on helper scripts that were generated for this command
+        paths = glob.glob(os.path.join(self.test_prefix, 'eb-*', 'run-shell-cmd-output', 'echo-*'))
+        self.assertEqual(len(paths), 1)
+        cmd_tmpdir = paths[0]
+
+        # check on env.sh script that can be used to set up environment in which command was run
+        env_script = os.path.join(cmd_tmpdir, 'env.sh')
+        self.assertExists(env_script)
+        env_script_txt = read_file(env_script)
+        self.assertIn("export FOOBAR=foobar", env_script_txt)
+        self.assertIn("history -s 'echo hello'", env_script_txt)
+
+        with self.mocked_stdout_stderr():
+            res = run_shell_cmd(f"source {env_script}; echo $USER; echo $FOOBAR; history")
+        self.assertEqual(res.exit_code, 0)
+        user = os.getenv('USER')
+        self.assertTrue(res.output.startswith(f'{user}\nfoobar\n'))
+        self.assertTrue(res.output.endswith("echo hello\n"))
+
+        # check on cmd.sh script that can be used to create interactive shell environment for command
+        cmd_script = os.path.join(cmd_tmpdir, 'cmd.sh')
+        self.assertExists(cmd_script)
+
+        with self.mocked_stdout_stderr():
+            res = run_shell_cmd(f"{cmd_script} -c 'echo pwd: $PWD; echo $FOOBAR'", fail_on_error=False)
+        self.assertEqual(res.exit_code, 0)
+        self.assertTrue(res.output.endswith('foobar\n'))
+        # check whether working directory is what's expected
+        regex = re.compile('^pwd: .*', re.M)
+        res = regex.findall(res.output)
+        self.assertEqual(len(res), 1)
+        pwd = res[0].strip()[5:]
+        self.assertTrue(os.path.samefile(pwd, self.test_prefix))
+
         # test running command that emits non-UTF-8 characters
         # this is constructed to reproduce errors like:
         # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe2
@@ -207,6 +248,45 @@ def test_run_shell_cmd_basic(self):
             self.assertTrue(isinstance(res.output, str))
             self.assertTrue(res.work_dir and isinstance(res.work_dir, str))
 
+    def test_run_shell_cmd_env(self):
+        """Test env option in run_shell_cmd."""
+
+        # use 'env' to define environment in which command should be run;
+        # with a few exceptions (like $_, $PWD) no other environment variables will be defined,
+        # so $HOME and $USER will not be set
+        cmd = "env | sort"
+        with self.mocked_stdout_stderr():
+            res = run_shell_cmd(cmd, env={'FOOBAR': 'foobar', 'PATH': os.getenv('PATH')})
+        self.assertEqual(res.cmd, cmd)
+        self.assertEqual(res.exit_code, 0)
+        self.assertIn("FOOBAR=foobar\n", res.output)
+        self.assertTrue(re.search("^_=.*/env$", res.output, re.M))
+        for var in ('HOME', 'USER'):
+            self.assertFalse(re.search('^' + var + '=.*', res.output, re.M))
+
+        # check on helper scripts that were generated for this command
+        paths = glob.glob(os.path.join(self.test_prefix, 'eb-*', 'run-shell-cmd-output', 'env-*'))
+        self.assertEqual(len(paths), 1)
+        cmd_tmpdir = paths[0]
+
+        # set environment variable in current environment,
+        # this should not be set in shell environment produced by scripts
+        os.environ['TEST123'] = 'test123'
+
+        env_script = os.path.join(cmd_tmpdir, 'env.sh')
+        self.assertExists(env_script)
+        env_script_txt = read_file(env_script)
+        self.assertTrue(env_script_txt.startswith('unset -f $('))
+        self.assertIn('\nexport FOOBAR=foobar\nexport PATH', env_script_txt)
+
+        cmd_script = os.path.join(cmd_tmpdir, 'cmd.sh')
+        self.assertExists(cmd_script)
+
+        with self.mocked_stdout_stderr():
+            res = run_shell_cmd(f"{cmd_script} -c 'echo $FOOBAR; echo TEST123:$TEST123'", fail_on_error=False)
+        self.assertEqual(res.exit_code, 0)
+        self.assertTrue(res.output.endswith('\nfoobar\nTEST123:\n'))
+
     def test_fileprefix_from_cmd(self):
         """test simplifications from fileprefix_from_cmd."""
         cmds = {
@@ -676,7 +756,7 @@ def test_run_shell_cmd_trace(self):
             r"\techo hello",
             r"\t\[started at: .*\]",
             r"\t\[working dir: .*\]",
-            r"\t\[output saved to .*\]",
+            r"\t\[output and state saved to .*\]",
             r"  >> command completed: exit 0, ran in .*",
         ]
 
@@ -736,7 +816,7 @@ def test_run_shell_cmd_trace_stdin(self):
             r"\techo hello",
             r"\t\[started at: [0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9]\]",
             r"\t\[working dir: .*\]",
-            r"\t\[output saved to .*\]",
+            r"\t\[output and state saved to .*\]",
             r"  >> command completed: exit 0, ran in .*",
         ]
 
@@ -1092,7 +1172,7 @@ def test_run_shell_cmd_qa_trace(self):
         pattern += r"\techo \'n: \'; read n; seq 1 \$n\n"
         pattern += r"\t\[started at: .*\]\n"
         pattern += r"\t\[working dir: .*\]\n"
-        pattern += r"\t\[output saved to .*\]\n"
+        pattern += r"\t\[output and state saved to .*\]\n"
         pattern += r'  >> command completed: exit 0, ran in .*'
         self.assertTrue(re.search(pattern, stdout), "Pattern '%s' found in: %s" % (pattern, stdout))
 

diff --git a/test/framework/toy_build.py b/test/framework/toy_build.py
@@ -2985,7 +2985,7 @@ def test_toy_build_trace(self):
                 r"\tgcc toy.c -o toy\n"
                 r"\t\[started at: .*\]",
                 r"\t\[working dir: .*\]",
-                r"\t\[output saved to .*\]",
+                r"\t\[output and state saved to .*\]",
                 r'',
             ]),
             r"  >> command completed: exit 0, ran in .*",