google-gemini · Adib234 · Mar 2, 2026 · Feb 26, 2026 · Feb 26, 2026 · Feb 26, 2026
@@ -4,13 +4,13 @@ on:
   workflow_dispatch:
     inputs:
       branch_ref:
-        description: 'Branch to run on'
-        required: true
-        default: 'main'
+        description: 'Branch to run on (defaults to selected branch)'
+        required: false
         type: 'string'
-      test_name_pattern:
-        description: 'The test name pattern to use'
+      test_filter:
+        description: 'Test name pattern to run (passed to vitest -t)'
         required: false
+        default: 'should (allow|deny) write_file to (the plans|non-plans) directory in plan mode'
         type: 'string'
       runs:
         description: 'The number of runs'
@@ -27,7 +27,6 @@ jobs:
   deflake_e2e_linux:
     name: 'E2E Test (Linux) - ${{ matrix.sandbox }}'
     runs-on: 'gemini-cli-ubuntu-16-core'
-    if: "github.repository == 'google-gemini/gemini-cli'"
     strategy:
       fail-fast: false
       matrix:
@@ -41,7 +40,7 @@ jobs:
       - name: 'Checkout'
         uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5
         with:
-          ref: '${{ github.event.pull_request.head.sha }}'
+          ref: '${{ github.event.inputs.branch_ref || github.ref }}'
           repository: '${{ github.repository }}'
 
       - name: 'Set up Node.js ${{ matrix.node-version }}'
@@ -62,28 +61,37 @@ jobs:
       - name: 'Run E2E tests'
         env:
           GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
+          DEBUG_SCHEDULER: 'true'
           IS_DOCKER: "${{ matrix.sandbox == 'sandbox:docker' }}"
           KEEP_OUTPUT: 'true'
           RUNS: '${{ github.event.inputs.runs }}'
-          TEST_NAME_PATTERN: '${{ github.event.inputs.test_name_pattern }}'
+          TEST_FILTER: '${{ github.event.inputs.test_filter }}'
           VERBOSE: 'true'
         shell: 'bash'
         run: |
+          # Construct the inner command
           if [[ "${IS_DOCKER}" == "true" ]]; then
-            npm run deflake:test:integration:sandbox:docker -- --runs="${RUNS}" -- --testNamePattern "'${TEST_NAME_PATTERN}'"
+            INNER_CMD="npm run test:integration:sandbox:docker -- --retry=0"
           else
-            npm run deflake:test:integration:sandbox:none -- --runs="${RUNS}" -- --testNamePattern "'${TEST_NAME_PATTERN}'"
+            INNER_CMD="npm run test:integration:sandbox:none -- --retry=0"
+          fi
+
+          # Append test filter if provided
+          if [[ -n "${TEST_FILTER}" ]]; then
+            INNER_CMD="${INNER_CMD} -t \"${TEST_FILTER}\""
           fi
 
+          # Run deflake script directly
+          node scripts/deflake.js --command="${INNER_CMD}" --runs="${RUNS}"
+
   deflake_e2e_mac:
     name: 'E2E Test (macOS)'
     runs-on: 'macos-latest'
-    if: "github.repository == 'google-gemini/gemini-cli'"
     steps:
       - name: 'Checkout'
         uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5
         with:
-          ref: '${{ github.event.pull_request.head.sha }}'
+          ref: '${{ github.event.inputs.branch_ref || github.ref }}'
           repository: '${{ github.repository }}'
 
       - name: 'Set up Node.js 20.x'
@@ -105,24 +113,28 @@ jobs:
         if: "runner.os != 'Windows'"
         env:
           GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
+          DEBUG_SCHEDULER: 'true'
           KEEP_OUTPUT: 'true'
           RUNS: '${{ github.event.inputs.runs }}'
           SANDBOX: 'sandbox:none'
-          TEST_NAME_PATTERN: '${{ github.event.inputs.test_name_pattern }}'
+          TEST_FILTER: '${{ github.event.inputs.test_filter }}'
           VERBOSE: 'true'
         run: |
-          npm run deflake:test:integration:sandbox:none -- --runs="${RUNS}" -- --testNamePattern "'${TEST_NAME_PATTERN}'"
+          INNER_CMD="npm run test:integration:sandbox:none -- --retry=0"
+          if [[ -n "${TEST_FILTER}" ]]; then
+            INNER_CMD="${INNER_CMD} -t \"${TEST_FILTER}\""
+          fi
+          node scripts/deflake.js --command="${INNER_CMD}" --runs="${RUNS}"
 
   deflake_e2e_windows:
     name: 'Slow E2E - Win'
     runs-on: 'gemini-cli-windows-16-core'
-    if: "github.repository == 'google-gemini/gemini-cli'"
 
     steps:
       - name: 'Checkout'
         uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5
         with:
-          ref: '${{ github.event.pull_request.head.sha }}'
+          ref: '${{ github.event.inputs.branch_ref || github.ref }}'
           repository: '${{ github.repository }}'
 
       - name: 'Set up Node.js 20.x'
@@ -160,14 +172,19 @@ jobs:
       - name: 'Run E2E tests'
         env:
           GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
+          DEBUG_SCHEDULER: 'true'
           KEEP_OUTPUT: 'true'
           SANDBOX: 'sandbox:none'
           VERBOSE: 'true'
           NODE_OPTIONS: '--max-old-space-size=32768 --max-semi-space-size=256'
           UV_THREADPOOL_SIZE: '32'
           NODE_ENV: 'test'
           RUNS: '${{ github.event.inputs.runs }}'
-          TEST_NAME_PATTERN: '${{ github.event.inputs.test_name_pattern }}'
+          TEST_FILTER: '${{ github.event.inputs.test_filter }}'
         shell: 'pwsh'
         run: |
-          npm run deflake:test:integration:sandbox:none -- --runs="$env:RUNS" -- --testNamePattern "'$env:TEST_NAME_PATTERN'"
+          $InnerCmd = "npm run test:integration:sandbox:none -- --retry=0"
+          if ($env:TEST_FILTER) {
+            $InnerCmd = "$InnerCmd -t `"$env:TEST_FILTER`""
+          }
+          node scripts/deflake.js --command "$InnerCmd" --runs $env:RUNS
@@ -4,8 +4,10 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+import { writeFileSync } from 'node:fs';
+import { join } from 'node:path';
 import { describe, it, expect, beforeEach, afterEach } from 'vitest';
-import { TestRig, checkModelOutputContent } from './test-helper.js';
+import { TestRig, checkModelOutputContent, GEMINI_DIR } from './test-helper.js';
 
 describe('Plan Mode', () => {
   let rig: TestRig;
@@ -62,50 +64,98 @@ describe('Plan Mode', () => {
     });
   });
 
-  it.skip('should allow write_file only in the plans directory in plan mode', async () => {
-    await rig.setup(
-      'should allow write_file only in the plans directory in plan mode',
-      {
-        settings: {
-          experimental: { plan: true },
-          tools: {
-            core: ['write_file', 'read_file', 'list_directory'],
-            allowed: ['write_file'],
+  it('should allow write_file to the plans directory in plan mode', async () => {
+    const plansDir = '.gemini/tmp/v1/session/plans';
-    const plansDir = '.gemini/tmp/v1/session/plans';
+    const plansDir = '.gemini/tmp/foo/123/plans';
-    const plansDir = '.gemini/tmp/v1/session/plans';
+    const plansDir = '.gemini/tmp/foo/123/plans';
+    const testName =
+      'should allow write_file to the plans directory in plan mode';
+
+    await rig.setup(testName, {
+      settings: {
+        experimental: { plan: true },
+        tools: {
+          core: ['write_file', 'read_file', 'list_directory'],
+        },
+        general: {
+          defaultApprovalMode: 'plan',
+          plan: {
+            directory: plansDir,
           },
-          general: { defaultApprovalMode: 'plan' },
         },
       },
+    });
+
+    // Disable the interactive terminal setup prompt in tests
+    writeFileSync(
+      join(rig.homeDir!, GEMINI_DIR, 'state.json'),
+      JSON.stringify({ terminalSetupPromptShown: true }, null, 2),
     );
 
-    // We ask the agent to create a plan for a feature, which should trigger a write_file in the plans directory.
-    // Verify that write_file outside of plan directory fails
-    await rig.run({
+    const run = await rig.runInteractive({
       approvalMode: 'plan',
-      stdin:
-        'Create a file called plan.md in the plans directory. Then create a file called hello.txt in the current directory',
     });
 
-    const toolLogs = rig.readToolLogs();
-    const writeLogs = toolLogs.filter(
-      (l) => l.toolRequest.name === 'write_file',
+    await run.type('Create a file called plan.md in the plans directory.');
+    await run.type('\r');
+
+    await rig.expectToolCallSuccess(['write_file'], 30000, (args) =>
+      args.includes('plan.md'),
     );
 
-    const planWrite = writeLogs.find(
+    const toolLogs = rig.readToolLogs();
+    const planWrite = toolLogs.find(
       (l) =>
+        l.toolRequest.name === 'write_file' &&
         l.toolRequest.args.includes('plans') &&
         l.toolRequest.args.includes('plan.md'),
     );
+    expect(planWrite?.toolRequest.success).toBe(true);
+  });
 
-    const blockedWrite = writeLogs.find((l) =>
-      l.toolRequest.args.includes('hello.txt'),
+  it('should deny write_file to non-plans directory in plan mode', async () => {
+    const plansDir = '.gemini/tmp/v1/session/plans';
+    const testName =
+      'should deny write_file to non-plans directory in plan mode';
+
+    await rig.setup(testName, {
+      settings: {
+        experimental: { plan: true },
+        tools: {
+          core: ['write_file', 'read_file', 'list_directory'],
+        },
+        general: {
+          defaultApprovalMode: 'plan',
+          plan: {
+            directory: plansDir,
+          },
+        },
+      },
+    });
+
+    // Disable the interactive terminal setup prompt in tests
+    writeFileSync(
+      join(rig.homeDir!, GEMINI_DIR, 'state.json'),
+      JSON.stringify({ terminalSetupPromptShown: true }, null, 2),
     );
 
-    // Model is undeterministic, sometimes a blocked write appears in tool logs and sometimes it doesn't
-    if (blockedWrite) {
-      expect(blockedWrite?.toolRequest.success).toBe(false);
-    }
+    const run = await rig.runInteractive({
+      approvalMode: 'plan',
+    });
 
-    expect(planWrite?.toolRequest.success).toBe(true);
+    await run.type('Create a file called hello.txt in the current directory.');
+    await run.type('\r');
+
+    const toolLogs = rig.readToolLogs();
+    const writeLog = toolLogs.find(
+      (l) =>
+        l.toolRequest.name === 'write_file' &&
+        l.toolRequest.args.includes('hello.txt'),
+    );
+
+    // In Plan Mode, writes outside the plans directory should be blocked.
+    // Model is undeterministic, sometimes it doesn't even try, but if it does, it must fail.
+    if (writeLog) {
+      expect(writeLog.toolRequest.success).toBe(false);
+    }
   });
 
   it('should be able to enter plan mode from default mode', async () => {
@@ -119,6 +169,12 @@ describe('Plan Mode', () => {
       },
     });
 
+    // Disable the interactive terminal setup prompt in tests
+    writeFileSync(
+      join(rig.homeDir!, GEMINI_DIR, 'state.json'),
+      JSON.stringify({ terminalSetupPromptShown: true }, null, 2),
+    );
+
     // Start in default mode and ask to enter plan mode.
     await rig.run({
       approvalMode: 'default',

@@ -10,10 +10,10 @@ import * as crypto from 'node:crypto';
 import { fileURLToPath } from 'node:url';
 import { Storage } from '../config/storage.js';
 import {
+  ApprovalMode,
   type PolicyEngineConfig,
   PolicyDecision,
   type PolicyRule,
-  ApprovalMode,
   type PolicySettings,
   type SafetyCheckerRule,
 } from './types.js';

@@ -12,6 +12,7 @@ import { fileURLToPath } from 'node:url';
 import { env } from 'node:process';
 import { setTimeout as sleep } from 'node:timers/promises';
 import { DEFAULT_GEMINI_MODEL, GEMINI_DIR } from '@google/gemini-cli-core';
+export { GEMINI_DIR };
 import * as pty from '@lydell/node-pty';
 import stripAnsi from 'strip-ansi';
 import * as os from 'node:os';