google-gemini
diff --git a/‎docs/cli/index.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/cli/index.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/cli/plan-mode.md‎
Lines changed: 106 additions & 0 deletions b/‎docs/cli/plan-mode.md‎
Lines changed: 106 additions & 0 deletions
diff --git a/‎docs/sidebar.json‎
Lines changed: 1 addition & 0 deletions b/‎docs/sidebar.json‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎evals/save_memory.eval.ts‎
Lines changed: 80 additions & 37 deletions b/‎evals/save_memory.eval.ts‎
Lines changed: 80 additions & 37 deletions
diff --git a/‎evals/test-helper.ts‎
Lines changed: 2 additions & 2 deletions b/‎evals/test-helper.ts‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎evals/validation_fidelity.eval.ts‎
Lines changed: 85 additions & 0 deletions b/‎evals/validation_fidelity.eval.ts‎
Lines changed: 85 additions & 0 deletions
@@ -23,6 +23,8 @@ overview of Gemini CLI, see the [main documentation page](../index.md).
 
 ## Advanced features
 
+- **[Plan mode (experimental)](./plan-mode.md):** Use a safe, read-only mode for
+  planning complex changes.
 - **[Checkpointing](./checkpointing.md):** Automatically save and restore
   snapshots of your session and files.
 - **[Enterprise configuration](./enterprise.md):** Deploy and manage Gemini CLI
 
@@ -0,0 +1,106 @@
+# Plan Mode (experimental) <!-- omit in toc -->
+
+Plan Mode is a safe, read-only mode for researching and designing complex
+changes. It prevents modifications while you research, design and plan an
+implementation strategy.
+
+> **Note: Plan Mode is currently an experimental feature.**
+>
+> Experimental features are subject to change. To use Plan Mode, enable it via
+> `/settings` (search for `Plan`) or add the following to your `settings.json`:
+>
+> ```json
+> {
+>   "experimental": {
+>     "plan": true
+>   }
+> }
+> ```
+>
+> Your feedback is invaluable as we refine this feature. If you have ideas,
+> suggestions, or encounter issues:
+>
+> - Use the `/bug` command within the CLI to file an issue.
+> - [Open an issue](https://github.com/google-gemini/gemini-cli/issues) on
+>   GitHub.
+
+- [Starting in Plan Mode](#starting-in-plan-mode)
+- [How to use Plan Mode](#how-to-use-plan-mode)
+  - [Entering Plan Mode](#entering-plan-mode)
+  - [The Planning Workflow](#the-planning-workflow)
+  - [Exiting Plan Mode](#exiting-plan-mode)
+- [Tool Restrictions](#tool-restrictions)
+
+## Starting in Plan Mode
+
+You can configure Gemini CLI to start directly in Plan Mode by default:
+
+1.  Type `/settings` in the CLI.
+2.  Search for `Approval Mode`.
+3.  Set the value to `Plan`.
+
+Other ways to start in Plan Mode:
+
+- **CLI Flag:** `gemini --approval-mode=plan`
+- **Manual Settings:** Manually update your `settings.json`:
+
+  ```json
+  {
+    "tools": {
+      "approvalMode": "plan"
+    }
+  }
+  ```
+
+## How to use Plan Mode
+
+### Entering Plan Mode
+
+You can enter Plan Mode in three ways:
+
+1.  **Keyboard Shortcut:** Press `Shift+Tab` to cycle through approval modes
+    (`Default` -> `Plan` -> `Auto-Edit`).
+2.  **Command:** Type `/plan` in the input box.
+3.  **Natural Language:** Ask the agent to "start a plan for...".
+
+### The Planning Workflow
+
+1.  **Requirements:** The agent clarifies goals using `ask_user`.
+2.  **Exploration:** The agent uses read-only tools (like [`read_file`]) to map
+    the codebase and validate assumptions.
+3.  **Planning:** A detailed plan is written to a temporary Markdown file.
+4.  **Review:** You review the plan.
+    - **Approve:** Exit Plan Mode and start implementation (switching to
+      Auto-Edit or Default approval mode).
+    - **Iterate:** Provide feedback to refine the plan.
+
+### Exiting Plan Mode
+
+To exit Plan Mode:
+
+1. **Keyboard Shortcut:** Press `Shift+Tab` to cycle to the desired mode.
+1. **Tool:** The agent calls the `exit_plan_mode` tool to present the finalized
+   plan for your approval.
+
+## Tool Restrictions
+
+Plan Mode enforces strict safety policies to prevent accidental changes.
+
+These are the only allowed tools:
+
+- **FileSystem (Read):** [`read_file`], [`list_directory`], [`glob`]
+- **Search:** [`grep_search`], [`google_web_search`]
+- **Interaction:** `ask_user`
+- **MCP Tools (Read):** Read-only [MCP tools] (e.g., `github_read_issue`,
+  `postgres_read_schema`) are allowed.
+- **Planning (Write):** [`write_file`] and [`replace`] ONLY allowed for `.md`
+  files in the `~/.gemini/tmp/<project>/plans/` directory.
+
+[`list_directory`]: ../tools/file-system.md#1-list_directory-readfolder
+[`read_file`]: ../tools/file-system.md#2-read_file-readfile
+[`grep_search`]: ../tools/file-system.md#5-grep_search-searchtext
+[`write_file`]: ../tools/file-system.md#3-write_file-writefile
+[`glob`]: ../tools/file-system.md#4-glob-findfiles
+[`google_web_search`]: ../tools/web-search.md
+[`replace`]: ../tools/file-system.md#6-replace-edit
+[MCP tools]: ../tools/mcp-server.md
@@ -20,6 +20,7 @@
       { "label": "Project context (GEMINI.md)", "slug": "docs/cli/gemini-md" },
       { "label": "Shell commands", "slug": "docs/tools/shell" },
       { "label": "Session management", "slug": "docs/cli/session-management" },
+      { "label": "Plan mode (experimental)", "slug": "docs/cli/plan-mode" },
       { "label": "Todos", "slug": "docs/tools/todos" },
       { "label": "Web search and fetch", "slug": "docs/tools/web-search" }
     ]
 
@@ -109,7 +109,7 @@ describe('save_memory', () => {
     params: {
       settings: { tools: { core: ['save_memory'] } },
     },
-    prompt: `My dog's name is Buddy. What is my dog's name?`,
+    prompt: `Please remember that my dog's name is Buddy.`,
     assert: async (rig, result) => {
       const wasToolCalled = await rig.waitForToolCall('save_memory');
       expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
@@ -145,25 +145,34 @@ describe('save_memory', () => {
     },
   });
 
-  const rememberingDbSchemaLocation =
-    "Agent remembers project's database schema location";
+  const ignoringDbSchemaLocation =
+    "Agent ignores workspace's database schema location";
   evalTest('ALWAYS_PASSES', {
-    name: rememberingDbSchemaLocation,
+    name: ignoringDbSchemaLocation,
     params: {
-      settings: { tools: { core: ['save_memory'] } },
+      settings: {
+        tools: {
+          core: [
+            'save_memory',
+            'list_directory',
+            'read_file',
+            'run_shell_command',
+          ],
+        },
+      },
     },
-    prompt: `The database schema for this project is located in \`db/schema.sql\`.`,
+    prompt: `The database schema for this workspace is located in \`db/schema.sql\`.`,
     assert: async (rig, result) => {
-      const wasToolCalled = await rig.waitForToolCall('save_memory');
-      expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
-        true,
-      );
+      await rig.waitForTelemetryReady();
+      const wasToolCalled = rig
+        .readToolLogs()
+        .some((log) => log.toolRequest.name === 'save_memory');
+      expect(
+        wasToolCalled,
+        'save_memory should not be called for workspace-specific information',
+      ).toBe(false);
 
       assertModelHasOutput(result);
-      checkModelOutputContent(result, {
-        expectedContent: [/database schema|ok|remember|will do/i],
-        testName: `${TEST_PREFIX}${rememberingDbSchemaLocation}`,
-      });
     },
   });
 
@@ -189,38 +198,74 @@ describe('save_memory', () => {
     },
   });
 
-  const rememberingTestCommand =
-    'Agent remembers specific project test command';
+  const ignoringBuildArtifactLocation =
+    'Agent ignores workspace build artifact location';
   evalTest('ALWAYS_PASSES', {
-    name: rememberingTestCommand,
+    name: ignoringBuildArtifactLocation,
     params: {
-      settings: { tools: { core: ['save_memory'] } },
+      settings: {
+        tools: {
+          core: [
+            'save_memory',
+            'list_directory',
+            'read_file',
+            'run_shell_command',
+          ],
+        },
+      },
     },
-    prompt: `The command to run all backend tests is \`npm run test:backend\`.`,
+    prompt: `In this workspace, build artifacts are stored in the \`dist/artifacts\` directory.`,
     assert: async (rig, result) => {
-      const wasToolCalled = await rig.waitForToolCall('save_memory');
-      expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
-        true,
-      );
+      await rig.waitForTelemetryReady();
+      const wasToolCalled = rig
+        .readToolLogs()
+        .some((log) => log.toolRequest.name === 'save_memory');
+      expect(
+        wasToolCalled,
+        'save_memory should not be called for workspace-specific information',
+      ).toBe(false);
+
+      assertModelHasOutput(result);
+    },
+  });
+
+  const ignoringMainEntryPoint = "Agent ignores workspace's main entry point";
+  evalTest('ALWAYS_PASSES', {
+    name: ignoringMainEntryPoint,
+    params: {
+      settings: {
+        tools: {
+          core: [
+            'save_memory',
+            'list_directory',
+            'read_file',
+            'run_shell_command',
+          ],
+        },
+      },
+    },
+    prompt: `The main entry point for this workspace is \`src/index.js\`.`,
+    assert: async (rig, result) => {
+      await rig.waitForTelemetryReady();
+      const wasToolCalled = rig
+        .readToolLogs()
+        .some((log) => log.toolRequest.name === 'save_memory');
+      expect(
+        wasToolCalled,
+        'save_memory should not be called for workspace-specific information',
+      ).toBe(false);
 
       assertModelHasOutput(result);
-      checkModelOutputContent(result, {
-        expectedContent: [
-          /command to run all backend tests|ok|remember|will do/i,
-        ],
-        testName: `${TEST_PREFIX}${rememberingTestCommand}`,
-      });
     },
   });
 
-  const rememberingMainEntryPoint =
-    "Agent remembers project's main entry point";
+  const rememberingBirthday = "Agent remembers user's birthday";
   evalTest('ALWAYS_PASSES', {
-    name: rememberingMainEntryPoint,
+    name: rememberingBirthday,
     params: {
       settings: { tools: { core: ['save_memory'] } },
     },
-    prompt: `The main entry point for this project is \`src/index.js\`.`,
+    prompt: `My birthday is on June 15th.`,
     assert: async (rig, result) => {
       const wasToolCalled = await rig.waitForToolCall('save_memory');
       expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
@@ -229,10 +274,8 @@ describe('save_memory', () => {
 
       assertModelHasOutput(result);
       checkModelOutputContent(result, {
-        expectedContent: [
-          /main entry point for this project|ok|remember|will do/i,
-        ],
-        testName: `${TEST_PREFIX}${rememberingMainEntryPoint}`,
+        expectedContent: [/June 15th|ok|remember|will do/i],
+        testName: `${TEST_PREFIX}${rememberingBirthday}`,
       });
     },
   });
 
@@ -49,7 +49,7 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
       // bootstrap test projects.
       const rootNodeModules = path.join(process.cwd(), 'node_modules');
       const testNodeModules = path.join(rig.testDir || '', 'node_modules');
-      if (fs.existsSync(rootNodeModules)) {
+      if (fs.existsSync(rootNodeModules) && !fs.existsSync(testNodeModules)) {
         fs.symlinkSync(rootNodeModules, testNodeModules, 'dir');
       }
 
@@ -162,7 +162,7 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
   if (policy === 'USUALLY_PASSES' && !process.env['RUN_EVALS']) {
     it.skip(evalCase.name, fn);
   } else {
-    it(evalCase.name, fn);
+    it(evalCase.name, fn, evalCase.timeout);
   }
 }
 
 
@@ -0,0 +1,85 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, expect } from 'vitest';
+import { evalTest } from './test-helper.js';
+
+describe('validation_fidelity', () => {
+  evalTest('ALWAYS_PASSES', {
+    name: 'should perform exhaustive validation autonomously when guided by system instructions',
+    files: {
+      'src/types.ts': `
+export interface LogEntry {
+  level: 'info' | 'warn' | 'error';
+  message: string;
+}
+`,
+      'src/logger.ts': `
+import { LogEntry } from './types.js';
+
+export function formatLog(entry: LogEntry): string {
+  return \`[\${entry.level.toUpperCase()}] \${entry.message}\`;
+}
+`,
+      'src/logger.test.ts': `
+import { expect, test } from 'vitest';
+import { formatLog } from './logger.js';
+import { LogEntry } from './types.js';
+
+test('formats log correctly', () => {
+  const entry: LogEntry = { level: 'info', message: 'test message' };
+  expect(formatLog(entry)).toBe('[INFO] test message');
+});
+`,
+      'package.json': JSON.stringify({
+        name: 'test-project',
+        type: 'module',
+        scripts: {
+          test: 'vitest run',
+          build: 'tsc --noEmit',
+        },
+      }),
+      'tsconfig.json': JSON.stringify({
+        compilerOptions: {
+          target: 'ESNext',
+          module: 'ESNext',
+          moduleResolution: 'node',
+          strict: true,
+          esModuleInterop: true,
+          skipLibCheck: true,
+          forceConsistentCasingInFileNames: true,
+        },
+      }),
+    },
+    prompt:
+      "Refactor the 'LogEntry' interface in 'src/types.ts' to rename the 'message' field to 'payload'.",
+    timeout: 600000,
+    assert: async (rig) => {
+      // The goal of this eval is to see if the agent realizes it needs to update usages
+      // AND run 'npm run build' or 'tsc' autonomously to ensure project-wide structural integrity.
+
+      const toolLogs = rig.readToolLogs();
+      const shellCalls = toolLogs.filter(
+        (log) => log.toolRequest.name === 'run_shell_command',
+      );
+
+      const hasBuildOrTsc = shellCalls.some((log) => {
+        const cmd = JSON.parse(log.toolRequest.args).command.toLowerCase();
+        return (
+          cmd.includes('npm run build') ||
+          cmd.includes('tsc') ||
+          cmd.includes('typecheck') ||
+          cmd.includes('npm run verify')
+        );
+      });
+
+      expect(
+        hasBuildOrTsc,
+        'Expected the agent to autonomously run a build or type-check command to verify the refactoring',
+      ).toBe(true);
+    },
+  });
+});
Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,7 @@`
`20`	`20`	`{ "label": "Project context (GEMINI.md)", "slug": "docs/cli/gemini-md" },`
`21`	`21`	`{ "label": "Shell commands", "slug": "docs/tools/shell" },`
`22`	`22`	`{ "label": "Session management", "slug": "docs/cli/session-management" },`
	`23`	`+ { "label": "Plan mode (experimental)", "slug": "docs/cli/plan-mode" },`
`23`	`24`	`{ "label": "Todos", "slug": "docs/tools/todos" },`
`24`	`25`	`{ "label": "Web search and fetch", "slug": "docs/tools/web-search" }`
`25`	`26`	`]`
Original file line number	Diff line number	Diff line change
`@@ -49,7 +49,7 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {`
`49`	`49`	`// bootstrap test projects.`
`50`	`50`	`const rootNodeModules = path.join(process.cwd(), 'node_modules');`
`51`	`51`	`const testNodeModules = path.join(rig.testDir \|\| '', 'node_modules');`
`52`		`- if (fs.existsSync(rootNodeModules)) {`
	`52`	`+ if (fs.existsSync(rootNodeModules) && !fs.existsSync(testNodeModules)) {`
`53`	`53`	`fs.symlinkSync(rootNodeModules, testNodeModules, 'dir');`
`54`	`54`	`}`
`55`	`55`
`@@ -162,7 +162,7 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {`
`162`	`162`	`if (policy === 'USUALLY_PASSES' && !process.env['RUN_EVALS']) {`
`163`	`163`	`it.skip(evalCase.name, fn);`
`164`	`164`	`} else {`
`165`		`- it(evalCase.name, fn);`
	`165`	`+ it(evalCase.name, fn, evalCase.timeout);`
`166`	`166`	`}`
`167`	`167`	`}`
`168`	`168`