add delay for run tool, add steps for agent mode prompt

Light2Dark · Light2Dark · commit a037962fe5ef · 2025-10-29T20:57:12.000+08:00
diff --git a/frontend/src/core/ai/tools/__tests__/run-cells-tool.test.ts b/frontend/src/core/ai/tools/__tests__/run-cells-tool.test.ts
@@ -44,7 +44,7 @@ describe("RunStaleCellsTool", () => {
       store,
     };
 
-    tool = new RunStaleCellsTool();
+    tool = new RunStaleCellsTool({ postExecutionDelay: 0 });
 
     cellId1 = "cell-1" as CellId;
     cellId2 = "cell-2" as CellId;
diff --git a/frontend/src/core/ai/tools/edit-notebook-tool.ts b/frontend/src/core/ai/tools/edit-notebook-tool.ts
@@ -190,7 +190,11 @@ export class EditNotebookTool
     }
     return {
       status: "success",
-      next_steps: ["If you need to perform more edits, call this tool again."],
+      next_steps: [
+        "If you need to perform more edits, call this tool again.",
+        "You should use the lint notebook tool to check for errors and lint issues. Fix them by editing the notebook.",
+        "You should use the run stale cells tool to run the cells that have been edited or newly added. This allows you to see the output of the cells and fix any errors.",
+      ],
     };
   };
 
diff --git a/frontend/src/core/ai/tools/run-cells-tool.ts b/frontend/src/core/ai/tools/run-cells-tool.ts
@@ -21,6 +21,9 @@ import {
 } from "./base";
 import type { CopilotMode } from "./registry";
 
+const POST_EXECUTION_DELAY = 200;
+const WAIT_FOR_CELLS_TIMEOUT = 30_000;
+
 interface CellOutput {
   consoleOutput?: string;
   cellOutput?: string;
@@ -65,6 +68,12 @@ export class RunStaleCellsTool
   }) satisfies z.ZodType<RunStaleCellsOutput>;
   readonly mode: CopilotMode[] = ["agent"];
 
+  private readonly postExecutionDelay: number;
+
+  constructor(opts?: { postExecutionDelay?: number }) {
+    this.postExecutionDelay = opts?.postExecutionDelay ?? POST_EXECUTION_DELAY;
+  }
+
   handler = async (
     _args: EmptyToolInput,
     toolContext: ToolNotebookContext,
@@ -89,7 +98,12 @@ export class RunStaleCellsTool
     });
 
     // Wait for all cells to finish executing
-    const allCellsFinished = await this.waitForCellsToFinish(store, staleCells);
+    const allCellsFinished = await this.waitForCellsToFinish(
+      store,
+      staleCells,
+      WAIT_FOR_CELLS_TIMEOUT,
+      this.postExecutionDelay,
+    );
     if (!allCellsFinished) {
       return {
         status: "success",
@@ -115,7 +129,9 @@ export class RunStaleCellsTool
 
       const cellOutput = cellContextData.cellOutput;
       const consoleOutputs = cellContextData.consoleOutputs;
-      if (!cellOutput && !consoleOutputs) {
+      const hasConsoleOutput = consoleOutputs && consoleOutputs.length > 0;
+
+      if (!cellOutput && !hasConsoleOutput) {
         // Set null to show no output
         cellsToOutput.set(cellId, null);
         continue;
@@ -128,7 +144,7 @@ export class RunStaleCellsTool
         }
       }
 
-      if (consoleOutputs) {
+      if (hasConsoleOutput) {
         consoleOutputString = consoleOutputs
           .map((output) => this.formatOutputString(output))
           .join("\n");
@@ -207,7 +223,8 @@ export class RunStaleCellsTool
   private async waitForCellsToFinish(
     store: JotaiStore,
     cellIds: CellId[],
-    timeout = 30_000,
+    timeout: number,
+    postExecutionDelay: number,
   ): Promise<boolean> {
     const checkAllFinished = (
       notebook: ReturnType<typeof notebookAtom.read>,
@@ -220,9 +237,18 @@ export class RunStaleCellsTool
       });
     };
 
-    // If already finished, return immediately
-    if (checkAllFinished(store.get(notebookAtom))) {
+    // Add a small delay after cells finish to allow console outputs to arrive
+    // Console outputs are streamed and might still be in-flight
+    const delayForConsoleOutputs = async () => {
+      if (postExecutionDelay > 0) {
+        await new Promise((resolve) => setTimeout(resolve, postExecutionDelay));
+      }
       return true;
+    };
+
+    // Return immediately if all cells are finished
+    if (checkAllFinished(store.get(notebookAtom))) {
+      return await delayForConsoleOutputs();
     }
 
     // Wait for notebook state changes with timeout
@@ -233,7 +259,7 @@ export class RunStaleCellsTool
           setTimeout(() => reject(new Error("timeout")), timeout),
         ),
       ]);
-      return true;
+      return await delayForConsoleOutputs();
     } catch {
       return false;
     }
diff --git a/marimo/_server/ai/prompts.py b/marimo/_server/ai/prompts.py
@@ -250,7 +250,10 @@ def _get_mode_intro_message(mode: CopilotMode) -> str:
             "You are in agent mode - you have autonomy to resolve the user's query by using the tools provided. Please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved. \n"
             "\n\n## Agent Mode\n"
             "- You are encouraged to edit existing cells in the notebook or add new cells.\n"
-            "- Once you have edited the notebook, you can use the run cells tool to run the code. Then, run the lint notebook tool to check for errors and lint issues. If there are errors in cells you have added, edit the existing cell. Don't add new cells to correct errors.\n"
+            "- You should do the following things after editing the notebook:\n"
+            "\t 1. Use the lint notebook tool to check for errors and lint issues\n"
+            "\t 2. Run stale cells tool to run the code\n"
+            "\t 3. If there are errors in cells you have added, edit the existing cell. Don't add new cells to correct errors.\n"
             "- If you say you're about to do something, actually do it in the same turn (run the tool call right after).\n"
             "- Group code into logical cells, eg. functions should be in separate cells and all the calls will be in one cell. When asked for explanations or summaries, use markdown cells with proper formatting.\n\n"
             "## Capabilities\n"
diff --git a/tests/_server/ai/snapshots/chat_system_prompts.txt b/tests/_server/ai/snapshots/chat_system_prompts.txt
@@ -899,7 +899,10 @@ You are in agent mode - you have autonomy to resolve the user's query by using t
 
 ## Agent Mode
 - You are encouraged to edit existing cells in the notebook or add new cells.
-- Once you have edited the notebook, you can use the run cells tool to run the code. Then, run the lint notebook tool to check for errors and lint issues. If there are errors in cells you have added, edit the existing cell. Don't add new cells to correct errors.
+- You should do the following things after editing the notebook:
+	 1. Use the lint notebook tool to check for errors and lint issues
+	 2. Run stale cells tool to run the code
+	 3. If there are errors in cells you have added, edit the existing cell. Don't add new cells to correct errors.
 - If you say you're about to do something, actually do it in the same turn (run the tool call right after).
 - Group code into logical cells, eg. functions should be in separate cells and all the calls will be in one cell. When asked for explanations or summaries, use markdown cells with proper formatting.