Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import type { LocalAgentFixture } from "../../../../testing/fake-llm-server/localAgentTypes";

/**
* Tests retry behavior when connection drops after tool-call chunks were emitted
* but before the stream is finalized. This simulates an orphaned tool-call retry
* window and ensures we don't duplicate tool execution.
*/
export const fixture: LocalAgentFixture = {
description: "Connection drop after streaming tool-call chunks",
dropConnectionAfterToolCallByTurn: [{ turnIndex: 0, attempts: [1] }],
turns: [
{
text: "I'll create a file for you.",
toolCalls: [
{
name: "write_file",
args: {
path: "src/recovered-after-tool-call.ts",
content: `export const recoveredAfterToolCall = true;\n`,
description: "File created after tool-call termination recovery",
},
},
],
},
{
text: "Successfully created the file after retrying from a tool-call termination.",
},
],
};
31 changes: 31 additions & 0 deletions e2e-tests/fixtures/engine/local-agent/connection-drop.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import type { LocalAgentFixture } from "../../../../testing/fake-llm-server/localAgentTypes";

/**
* Tests automatic retry after connection drop (e.g., TCP terminated mid-stream).
* This fixture drops the connection on the first attempt of turn 1 (the
* post-tool text turn), which is more realistic than dropping before any
* tool activity. The local agent handler should automatically retry and
* continue without re-running completed work.
*/
export const fixture: LocalAgentFixture = {
description: "Automatic retry after connection drop",
dropConnectionByTurn: [{ turnIndex: 1, attempts: [1] }],
turns: [
{
text: "I'll create a file for you.",
toolCalls: [
{
name: "write_file",
args: {
path: "src/recovered.ts",
content: `export const recovered = true;\n`,
description: "File created after connection recovery",
},
},
],
},
{
text: "Successfully created the file after automatic retry.",
},
],
};
91 changes: 91 additions & 0 deletions e2e-tests/local_agent_connection_retry.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import { expect } from "@playwright/test";
import { testSkipIfWindows } from "./helpers/test_helper";

/**
* E2E test for local-agent connection retry resilience.
* Verifies that the agent automatically recovers from transient connection
* drops (e.g., TCP terminated mid-stream) by retrying the stream.
*/

testSkipIfWindows(
"local-agent - recovers from connection drop",
async ({ po }) => {
await po.setUpDyadPro({ localAgent: true });
await po.importApp("minimal");
await po.chatActions.selectLocalAgentMode();

// The connection-drop fixture drops on turn 1 (after a tool turn already
// completed) to simulate a realistic interrupted follow-up request.
await po.sendPrompt("tc=local-agent/connection-drop");

// Verify the turn still completed and no error box leaked to the UI.
await expect(po.page.getByTestId("chat-error-box")).toHaveCount(0);
const introText = po.page.getByText("I'll create a file for you.");
const completionText = po.page.getByText(
"Successfully created the file after automatic retry.",
);
await expect(introText).toHaveCount(1);
await expect(completionText).toHaveCount(1);
await expect(introText).toBeVisible();
await expect(completionText).toBeVisible();
// Partial chunks from the dropped attempt must not leak into final UI.
await expect(
po.page.getByText("Partial response before connection dr"),
).toHaveCount(0);

// Verify exactly one recovered.ts edit card is shown in chat.
const recoveredEditCard = po.page.getByRole("button", {
name: /recovered\.ts .*src\/recovered\.ts.*Edit/,
});
await expect(recoveredEditCard).toHaveCount(1);

// The replayed conversation order must stay:
// intro assistant text -> tool edit card -> completion assistant text.
const introY = (await introText.boundingBox())?.y;
const editCardY = (await recoveredEditCard.boundingBox())?.y;
const completionY = (await completionText.boundingBox())?.y;
expect(introY).toBeDefined();
expect(editCardY).toBeDefined();
expect(completionY).toBeDefined();
expect(introY!).toBeLessThan(editCardY!);
expect(editCardY!).toBeLessThan(completionY!);

Comment on lines +44 to +52
Copy link
Copy Markdown
Contributor

@cubic-dev-ai cubic-dev-ai bot Mar 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: The conversation-order check is layout-dependent (boundingBox().y) and can become flaky across environments; assert DOM/message sequence instead of pixel coordinates.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At e2e-tests/local_agent_connection_retry.spec.ts, line 44:

<comment>The conversation-order check is layout-dependent (`boundingBox().y`) and can become flaky across environments; assert DOM/message sequence instead of pixel coordinates.</comment>

<file context>
@@ -20,16 +20,35 @@ testSkipIfWindows(
+
+    // The replayed conversation order must stay:
+    // intro assistant text -> tool edit card -> completion assistant text.
+    const introY = (await introText.boundingBox())?.y;
+    const editCardY = (await recoveredEditCard.boundingBox())?.y;
+    const completionY = (await completionText.boundingBox())?.y;
</file context>
Suggested change
const introY = (await introText.boundingBox())?.y;
const editCardY = (await recoveredEditCard.boundingBox())?.y;
const completionY = (await completionText.boundingBox())?.y;
expect(introY).toBeDefined();
expect(editCardY).toBeDefined();
expect(completionY).toBeDefined();
expect(introY!).toBeLessThan(editCardY!);
expect(editCardY!).toBeLessThan(completionY!);
await expect(recoveredEditCard).toBeVisible();
const [introEl, editEl, completionEl] = await Promise.all([
introText.elementHandle(),
recoveredEditCard.elementHandle(),
completionText.elementHandle(),
]);
expect(introEl).toBeTruthy();
expect(editEl).toBeTruthy();
expect(completionEl).toBeTruthy();
const isInOrder = await po.page.evaluate(
([intro, edit, completion]) =>
!!intro &&
!!edit &&
!!completion &&
!!(intro.compareDocumentPosition(edit) & Node.DOCUMENT_POSITION_FOLLOWING) &&
!!(edit.compareDocumentPosition(completion) & Node.DOCUMENT_POSITION_FOLLOWING),
[introEl, editEl, completionEl],
);
expect(isInOrder).toBe(true);
Fix with Cubic

// Snapshot end state for chat + filesystem.
await po.snapshotMessages();
await po.snapshotAppFiles({
name: "after-connection-retry",
files: ["src/recovered.ts"],
});
},
);

testSkipIfWindows(
"local-agent - recovers when drop happens after tool-call stream",
async ({ po }) => {
await po.setUpDyadPro({ localAgent: true });
await po.importApp("minimal");
await po.chatActions.selectLocalAgentMode();

await po.sendPrompt("tc=local-agent/connection-drop-after-tool-call");

await expect(po.page.getByTestId("chat-error-box")).toHaveCount(0);
await expect(
po.page.getByText(
"Successfully created the file after retrying from a tool-call termination.",
),
).toBeVisible();

await expect(
po.page
.getByRole("button", {
name: /recovered-after-tool-call\.ts .*src\/recovered-after-tool-call\.ts.*Edit/,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 MEDIUM | test-correctness

Using .first() may mask duplicate tool execution visible to user

The first test (recovers from connection drop) correctly asserts toHaveCount(1) for the edit card. This test uses .first() without asserting the count first. If the retry logic accidentally creates duplicate edit cards (e.g., the tool runs twice), this test would still pass while the user would see a confusing duplicate.

💡 Suggestion: Add await expect(editCard).toHaveCount(1) before .first(), matching the pattern in the first test.

})
.first(),
).toBeVisible();

await po.snapshotAppFiles({
name: "after-tool-call-connection-retry",
files: ["src/recovered-after-tool-call.ts"],
});
},
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
=== src/recovered.ts ===
export const recovered = true;
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
=== src/recovered-after-tool-call.ts ===
export const recoveredAfterToolCall = true;
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
- paragraph: /Generate an AI_RULES\.md file for this app\. Describe the tech stack in 5-\d+ bullet points and describe clear rules about what libraries to use for what\./
- button "file1.txt file1.txt Edit":
- img
- text: ""
- button "Edit":
- img
- text: ""
- img
- paragraph: More EOM
- button "Copy":
- img
- img
- text: Approved
- img
- text: claude-opus-4-5
- img
- text: less than a minute ago
- img
- text: (1 files changed)
- button "Copy Request ID":
- img
- text: ""
- paragraph: tc=local-agent/connection-drop
- paragraph: I'll create a file for you.
- 'button "recovered.ts src/recovered.ts Edit Summary: File created after connection recovery"':
- img
- text: ""
- button "Edit":
- img
- text: ""
- img
- text: ""
- paragraph: Successfully created the file after automatic retry.
- button "Copy":
- img
- img
- text: claude-opus-4-5
- img
- text: less than a minute ago
- button "Copy Request ID":
- img
- text: ""
- button "Undo":
- img
- text: ""
- button "Retry":
- img
- text: ""
178 changes: 178 additions & 0 deletions src/__tests__/local_agent_handler.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,184 @@ describe("handleLocalAgentStream", () => {
expect(lastContentUpdate.data.content).toContain("Hello, ");
expect(lastContentUpdate.data.content).toContain("world!");
});

it("should retry and resume when a stream terminates transiently", async () => {
// Arrange
const { event, getMessagesByChannel } = createFakeEvent();
mockSettings = buildTestSettings({ enableDyadPro: true });
mockChatData = buildTestChat();

const streamMessagesByAttempt: any[][] = [];
let attemptCount = 0;
mockStreamTextImpl = (options) => {
attemptCount += 1;
streamMessagesByAttempt.push(options.messages ?? []);

if (attemptCount === 1) {
return {
fullStream: (async function* () {
yield { type: "text-delta", text: "Partial response. " };
throw new TypeError("terminated");
})(),
response: Promise.resolve({ messages: [] }),
steps: Promise.resolve([]),
};
}

return {
fullStream: (async function* () {
yield { type: "text-delta", text: "Recovered output." };
})(),
response: Promise.resolve({
messages: [
{
role: "assistant",
content: [{ type: "text", text: "Recovered output." }],
},
],
}),
steps: Promise.resolve([{ toolCalls: [] }]),
};
};

// Act
await handleLocalAgentStream(
event,
{ chatId: 1, prompt: "test" },
new AbortController(),
{
placeholderMessageId: 10,
systemPrompt: "You are helpful",
dyadRequestId,
},
);

// Assert
expect(attemptCount).toBe(2);
expect(getMessagesByChannel("chat:response:error")).toHaveLength(0);

const contentUpdates = dbOperations.updates.filter(
(u) => u.data.content !== undefined,
);
const finalContent = contentUpdates[contentUpdates.length - 1].data
.content as string;
expect(finalContent).toContain("Partial response.");
expect(finalContent).toContain("Recovered output.");

const continuationInstructionFound = (
streamMessagesByAttempt[1] ?? []
).some(
(message: any) =>
message.role === "user" &&
Array.isArray(message.content) &&
message.content.some(
(part: any) =>
part.type === "text" &&
typeof part.text === "string" &&
part.text.includes(
"previous response stream was interrupted by a transient network error",
),
),
);
expect(continuationInstructionFound).toBe(true);
});

it("should replay emitted tool events before retrying a terminated stream", async () => {
// Arrange
const { event, getMessagesByChannel } = createFakeEvent();
mockSettings = buildTestSettings({ enableDyadPro: true });
mockChatData = buildTestChat();

const streamMessagesByAttempt: any[][] = [];
let attemptCount = 0;
mockStreamTextImpl = (options) => {
attemptCount += 1;
streamMessagesByAttempt.push(options.messages ?? []);

if (attemptCount === 1) {
return {
fullStream: (async function* () {
yield { type: "text-delta", text: "Working with tools. " };
yield {
type: "tool-call",
toolCallId: "call_replay_1",
toolName: "read_file",
input: { path: "README.md" },
};
yield {
type: "tool-result",
toolCallId: "call_replay_1",
toolName: "read_file",
output: "README content",
};
throw new TypeError("terminated");
})(),
response: Promise.resolve({ messages: [] }),
steps: Promise.resolve([]),
};
}

return {
fullStream: (async function* () {
yield { type: "text-delta", text: "Resumed after replay." };
})(),
response: Promise.resolve({
messages: [
{
role: "assistant",
content: [{ type: "text", text: "Resumed after replay." }],
},
],
}),
steps: Promise.resolve([{ toolCalls: [] }]),
};
};

// Act
await handleLocalAgentStream(
event,
{ chatId: 1, prompt: "test" },
new AbortController(),
{
placeholderMessageId: 10,
systemPrompt: "You are helpful",
dyadRequestId,
},
);

// Assert
expect(attemptCount).toBe(2);
expect(getMessagesByChannel("chat:response:error")).toHaveLength(0);

const secondAttemptMessages = streamMessagesByAttempt[1] ?? [];
const hasReplayedToolCall = secondAttemptMessages.some(
(message: any) =>
message.role === "assistant" &&
Array.isArray(message.content) &&
message.content.some(
(part: any) =>
part.type === "tool-call" &&
part.toolCallId === "call_replay_1" &&
part.toolName === "read_file",
),
);
const hasReplayedToolResult = secondAttemptMessages.some(
(message: any) =>
message.role === "tool" &&
Array.isArray(message.content) &&
message.content.some(
(part: any) =>
part.type === "tool-result" &&
part.toolCallId === "call_replay_1" &&
part.toolName === "read_file" &&
part.output?.type === "text" &&
part.output?.value === "README content",
),
);

expect(hasReplayedToolCall).toBe(true);
expect(hasReplayedToolResult).toBe(true);
});
});

describe("Stream processing - reasoning blocks", () => {
Expand Down
Loading
Loading