Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
670a02f
deflake
Adib234 Feb 26, 2026
b734b75
update
Adib234 Feb 26, 2026
d01bd83
update
Adib234 Feb 26, 2026
944e1d1
update
Adib234 Feb 26, 2026
ad4bad2
address comment
Adib234 Feb 26, 2026
eae8145
update test
Adib234 Feb 26, 2026
d8f6232
test change
Adib234 Feb 26, 2026
b5258bb
debug
Adib234 Feb 27, 2026
03b8d24
update
Adib234 Feb 27, 2026
730cd41
debug
Adib234 Feb 27, 2026
7f67017
fix build error
Adib234 Feb 27, 2026
3ff6c40
Merge branch 'main' into adibakm/deflake-plan-mode
Adib234 Feb 27, 2026
c03d733
revert debugging
Adib234 Feb 27, 2026
6e248a3
build and format
Adib234 Feb 27, 2026
bb1bb26
add policy in integration test
Adib234 Mar 1, 2026
133fd7b
update policy in integration test
Adib234 Mar 1, 2026
9103d75
test change
Adib234 Mar 1, 2026
20a4af0
test
Adib234 Mar 1, 2026
6731df1
change test to interactive
Adib234 Mar 1, 2026
367b382
remove unused code
Adib234 Mar 1, 2026
c2993a1
disable interactive prompts at startup
Adib234 Mar 1, 2026
8b27136
Merge branch 'main' into adibakm/deflake-plan-mode
Adib234 Mar 1, 2026
e9f0515
revert deflake.yml
Adib234 Mar 1, 2026
487389b
remove unnecessary comment
Adib234 Mar 1, 2026
1c23919
split test into 2 tests
Adib234 Mar 1, 2026
caf1d5e
split test into 2 tests
Adib234 Mar 1, 2026
db14739
address nit
Adib234 Mar 2, 2026
d870519
revert deflake.yml
Adib234 Mar 2, 2026
487c23a
revert deflake.yml
Adib234 Mar 2, 2026
cab449d
revert deflake.yml
Adib234 Mar 2, 2026
51ad095
address lint
Adib234 Mar 2, 2026
bc2aff5
Merge branch 'main' into adibakm/deflake-plan-mode
Adib234 Mar 2, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/deflake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ jobs:
name: 'Slow E2E - Win'
runs-on: 'gemini-cli-windows-16-core'
if: "github.repository == 'google-gemini/gemini-cli'"

steps:
- name: 'Checkout'
uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5
Expand Down
110 changes: 83 additions & 27 deletions integration-tests/plan-mode.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
* SPDX-License-Identifier: Apache-2.0
*/

import { writeFileSync } from 'node:fs';
import { join } from 'node:path';
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { TestRig, checkModelOutputContent } from './test-helper.js';
import { TestRig, checkModelOutputContent, GEMINI_DIR } from './test-helper.js';

describe('Plan Mode', () => {
let rig: TestRig;
Expand Down Expand Up @@ -62,50 +64,98 @@ describe('Plan Mode', () => {
});
});

it.skip('should allow write_file only in the plans directory in plan mode', async () => {
await rig.setup(
'should allow write_file only in the plans directory in plan mode',
{
settings: {
experimental: { plan: true },
tools: {
core: ['write_file', 'read_file', 'list_directory'],
allowed: ['write_file'],
it('should allow write_file to the plans directory in plan mode', async () => {
const plansDir = '.gemini/tmp/foo/123/plans';
const testName =
'should allow write_file to the plans directory in plan mode';

await rig.setup(testName, {
settings: {
experimental: { plan: true },
tools: {
core: ['write_file', 'read_file', 'list_directory'],
},
general: {
defaultApprovalMode: 'plan',
plan: {
directory: plansDir,
},
general: { defaultApprovalMode: 'plan' },
},
},
});

// Disable the interactive terminal setup prompt in tests
writeFileSync(
join(rig.homeDir!, GEMINI_DIR, 'state.json'),
JSON.stringify({ terminalSetupPromptShown: true }, null, 2),
);

// We ask the agent to create a plan for a feature, which should trigger a write_file in the plans directory.
// Verify that write_file outside of plan directory fails
await rig.run({
const run = await rig.runInteractive({
approvalMode: 'plan',
stdin:
'Create a file called plan.md in the plans directory. Then create a file called hello.txt in the current directory',
});

const toolLogs = rig.readToolLogs();
const writeLogs = toolLogs.filter(
(l) => l.toolRequest.name === 'write_file',
await run.type('Create a file called plan.md in the plans directory.');
await run.type('\r');

await rig.expectToolCallSuccess(['write_file'], 30000, (args) =>
args.includes('plan.md'),
);

const planWrite = writeLogs.find(
const toolLogs = rig.readToolLogs();
const planWrite = toolLogs.find(
(l) =>
l.toolRequest.name === 'write_file' &&
l.toolRequest.args.includes('plans') &&
l.toolRequest.args.includes('plan.md'),
);
expect(planWrite?.toolRequest.success).toBe(true);
});

const blockedWrite = writeLogs.find((l) =>
l.toolRequest.args.includes('hello.txt'),
it('should deny write_file to non-plans directory in plan mode', async () => {
const plansDir = '.gemini/tmp/foo/123/plans';
const testName =
'should deny write_file to non-plans directory in plan mode';

await rig.setup(testName, {
settings: {
experimental: { plan: true },
tools: {
core: ['write_file', 'read_file', 'list_directory'],
},
general: {
defaultApprovalMode: 'plan',
plan: {
directory: plansDir,
},
},
},
});

// Disable the interactive terminal setup prompt in tests
writeFileSync(
join(rig.homeDir!, GEMINI_DIR, 'state.json'),
JSON.stringify({ terminalSetupPromptShown: true }, null, 2),
);

// Model is undeterministic, sometimes a blocked write appears in tool logs and sometimes it doesn't
if (blockedWrite) {
expect(blockedWrite?.toolRequest.success).toBe(false);
}
const run = await rig.runInteractive({
approvalMode: 'plan',
});

expect(planWrite?.toolRequest.success).toBe(true);
await run.type('Create a file called hello.txt in the current directory.');
await run.type('\r');

const toolLogs = rig.readToolLogs();
const writeLog = toolLogs.find(
(l) =>
l.toolRequest.name === 'write_file' &&
l.toolRequest.args.includes('hello.txt'),
);

// In Plan Mode, writes outside the plans directory should be blocked.
// Model is undeterministic, sometimes it doesn't even try, but if it does, it must fail.
if (writeLog) {
expect(writeLog.toolRequest.success).toBe(false);
}
});

it('should be able to enter plan mode from default mode', async () => {
Expand All @@ -119,6 +169,12 @@ describe('Plan Mode', () => {
},
});

// Disable the interactive terminal setup prompt in tests
writeFileSync(
join(rig.homeDir!, GEMINI_DIR, 'state.json'),
JSON.stringify({ terminalSetupPromptShown: true }, null, 2),
);

// Start in default mode and ask to enter plan mode.
await rig.run({
approvalMode: 'default',
Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/policy/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ import * as crypto from 'node:crypto';
import { fileURLToPath } from 'node:url';
import { Storage } from '../config/storage.js';
import {
ApprovalMode,
type PolicyEngineConfig,
PolicyDecision,
type PolicyRule,
ApprovalMode,
type PolicySettings,
type SafetyCheckerRule,
} from './types.js';
Expand Down
1 change: 1 addition & 0 deletions packages/test-utils/src/test-rig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import { fileURLToPath } from 'node:url';
import { env } from 'node:process';
import { setTimeout as sleep } from 'node:timers/promises';
import { DEFAULT_GEMINI_MODEL, GEMINI_DIR } from '@google/gemini-cli-core';
export { GEMINI_DIR };
import * as pty from '@lydell/node-pty';
import stripAnsi from 'strip-ansi';
import * as os from 'node:os';
Expand Down
Loading