Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 169 additions & 0 deletions packages/core/src/core/coreToolScheduler.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1859,6 +1859,175 @@ describe('CoreToolScheduler request queueing', () => {
});
});

describe('CoreToolScheduler truncated output protection', () => {
function createTruncationTestScheduler(
tool: TestApprovalTool | MockTool,
toolNames: string[],
) {
const onAllToolCallsComplete = vi.fn();
const onToolCallsUpdate = vi.fn();

const mockToolRegistry = {
getTool: () => tool,
getAllToolNames: () => toolNames,
getFunctionDeclarations: () => [],
tools: new Map(),
} as unknown as ToolRegistry;

const mockConfig = {
getSessionId: () => 'test-session-id',
getUsageStatisticsEnabled: () => true,
getDebugMode: () => false,
getApprovalMode: () => ApprovalMode.AUTO_EDIT,
getAllowedTools: () => [],
getExcludeTools: () => undefined,
getContentGeneratorConfig: () => ({
model: 'test-model',
authType: 'gemini',
}),
getShellExecutionConfig: () => ({
terminalWidth: 90,
terminalHeight: 30,
}),
storage: {
getProjectTempDir: () => '/tmp',
},
getTruncateToolOutputThreshold: () =>
DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
getToolRegistry: () => mockToolRegistry,
getUseModelRouter: () => false,
getGeminiClient: () => null,
getChatRecordingService: () => undefined,
isInteractive: () => true,
} as unknown as Config;

const scheduler = new CoreToolScheduler({
config: mockConfig,
onAllToolCallsComplete,
onToolCallsUpdate,
getPreferredEditor: () => 'vscode',
onEditorClose: vi.fn(),
});

return { scheduler, onAllToolCallsComplete };
}

it('should reject Kind.Edit tool calls when wasOutputTruncated is true', async () => {
const declarativeTool = new TestApprovalTool({
getApprovalMode: () => ApprovalMode.AUTO_EDIT,
} as unknown as Config);
const { scheduler, onAllToolCallsComplete } = createTruncationTestScheduler(
declarativeTool,
[TestApprovalTool.Name],
);

await scheduler.schedule(
[
{
callId: '1',
name: TestApprovalTool.Name,
args: { id: 'test-truncated' },
isClientInitiated: false,
prompt_id: 'prompt-id-truncated',
wasOutputTruncated: true,
},
],
new AbortController().signal,
);

await vi.waitFor(() => {
expect(onAllToolCallsComplete).toHaveBeenCalled();
});

const completedCalls = onAllToolCallsComplete.mock
.calls[0][0] as ToolCall[];
expect(completedCalls).toHaveLength(1);
const completedCall = completedCalls[0];
expect(completedCall.status).toBe('error');

if (completedCall.status === 'error') {
const errorMessage = completedCall.response.error?.message;
expect(errorMessage).toContain('truncated due to max_tokens limit');
expect(errorMessage).toContain(
'rejected to prevent writing truncated content',
);
}
});

it('should allow Kind.Edit tool calls when wasOutputTruncated is false', async () => {
const declarativeTool = new TestApprovalTool({
getApprovalMode: () => ApprovalMode.AUTO_EDIT,
} as unknown as Config);
const { scheduler, onAllToolCallsComplete } = createTruncationTestScheduler(
declarativeTool,
[TestApprovalTool.Name],
);

await scheduler.schedule(
[
{
callId: '1',
name: TestApprovalTool.Name,
args: { id: 'test-normal' },
isClientInitiated: false,
prompt_id: 'prompt-id-normal',
wasOutputTruncated: false,
},
],
new AbortController().signal,
);

await vi.waitFor(() => {
expect(onAllToolCallsComplete).toHaveBeenCalled();
});

const completedCalls = onAllToolCallsComplete.mock
.calls[0][0] as ToolCall[];
expect(completedCalls).toHaveLength(1);
// Should succeed (not error) since wasOutputTruncated is false
expect(completedCalls[0].status).toBe('success');
});

it('should allow non-Edit tools when wasOutputTruncated is true', async () => {
const mockTool = new MockTool({
name: 'mockReadTool',
execute: async () => ({
llmContent: 'read result',
returnDisplay: 'read result',
}),
});
const { scheduler, onAllToolCallsComplete } = createTruncationTestScheduler(
mockTool,
['mockReadTool'],
);

await scheduler.schedule(
[
{
callId: '1',
name: 'mockReadTool',
args: {},
isClientInitiated: false,
prompt_id: 'prompt-id-read-truncated',
wasOutputTruncated: true,
},
],
new AbortController().signal,
);

await vi.waitFor(() => {
expect(onAllToolCallsComplete).toHaveBeenCalled();
});

const completedCalls = onAllToolCallsComplete.mock
.calls[0][0] as ToolCall[];
expect(completedCalls).toHaveLength(1);
// Non-Edit tools should still execute even when output was truncated
expect(completedCalls[0].status).toBe('success');
});
});

describe('CoreToolScheduler Sequential Execution', () => {
it('should execute tool calls in a batch sequentially', async () => {
// Arrange
Expand Down
42 changes: 41 additions & 1 deletion packages/core/src/core/coreToolScheduler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import {
logToolOutputTruncated,
ToolOutputTruncatedEvent,
InputFormat,
Kind,
SkillTool,
} from '../index.js';
import type {
Expand All @@ -55,6 +56,23 @@ import levenshtein from 'fast-levenshtein';
import { getPlanModeSystemReminder } from './prompts.js';
import { ShellToolInvocation } from '../tools/shell.js';

const TRUNCATION_PARAM_GUIDANCE =
'Note: Your previous response was truncated due to max_tokens limit, ' +
'which likely caused incomplete tool call parameters. ' +
'Please retry the tool call with complete parameters. ' +
'If the content is too large for a single response, ' +
'consider splitting it into smaller parts.';

const TRUNCATION_EDIT_REJECTION =
'Your previous response was truncated due to max_tokens limit, ' +
'which likely produced incomplete file content. ' +
'The tool call has been rejected to prevent writing ' +
'truncated content to the file. ' +
'Please retry the tool call with complete content. ' +
'If the content is too large for a single response, ' +
'consider splitting it into smaller parts ' +
'(e.g., write_file for initial content, then edit for additions).';

export type ValidatingToolCall = {
status: 'validating';
request: ToolCallRequestInfo;
Expand Down Expand Up @@ -773,19 +791,41 @@ export class CoreToolScheduler {
reqInfo.args,
);
if (invocationOrError instanceof Error) {
const error = reqInfo.wasOutputTruncated
? new Error(
`${invocationOrError.message} ${TRUNCATION_PARAM_GUIDANCE}`,
)
: invocationOrError;
return {
status: 'error',
request: reqInfo,
tool: toolInstance,
response: createErrorResponse(
reqInfo,
invocationOrError,
error,
ToolErrorType.INVALID_TOOL_PARAMS,
),
durationMs: 0,
};
}

// Reject file-modifying calls when truncated to prevent
// writing incomplete content.
if (reqInfo.wasOutputTruncated && toolInstance.kind === Kind.Edit) {
const truncationError = new Error(TRUNCATION_EDIT_REJECTION);
return {
status: 'error',
request: reqInfo,
tool: toolInstance,
response: createErrorResponse(
reqInfo,
truncationError,
ToolErrorType.OUTPUT_TRUNCATED,
),
durationMs: 0,
};
}

return {
status: 'validating',
request: reqInfo,
Expand Down
Loading