From 50be2a1817556f124df2f071cb315866d8d9fea8 Mon Sep 17 00:00:00 2001 From: Jerop Kipruto Date: Fri, 6 Feb 2026 17:19:48 -0500 Subject: [PATCH 1/3] fix(core): prevent subagent bypass in plan mode Increase Plan Mode policy priorities to ensure they override dynamic subagent rules. Subagents are registered with priority 1.05 (PRIORITY_SUBAGENT_TOOL), so Plan Mode deny rules are updated to 60 (1.060) and explicit allows to 70 (1.070). Closes #18482 --- packages/core/src/agents/registry.ts | 4 +- packages/core/src/policy/config.ts | 2 + packages/core/src/policy/policies/plan.toml | 22 +++---- .../core/src/policy/policy-engine.test.ts | 32 ++++++++++ packages/core/src/policy/toml-loader.test.ts | 63 ++++++++++++++++++- packages/core/src/policy/types.ts | 6 ++ 6 files changed, 115 insertions(+), 14 deletions(-) diff --git a/packages/core/src/agents/registry.ts b/packages/core/src/agents/registry.ts index 66a990f1db3..03726320bcc 100644 --- a/packages/core/src/agents/registry.ts +++ b/packages/core/src/agents/registry.ts @@ -21,7 +21,7 @@ import { type ModelConfig, ModelConfigService, } from '../services/modelConfigService.js'; -import { PolicyDecision } from '../policy/types.js'; +import { PolicyDecision, PRIORITY_SUBAGENT_TOOL } from '../policy/types.js'; /** * Returns the model config alias for a given agent definition. @@ -297,7 +297,7 @@ export class AgentRegistry { definition.kind === 'local' ? PolicyDecision.ALLOW : PolicyDecision.ASK_USER, - priority: 1.05, + priority: PRIORITY_SUBAGENT_TOOL, source: 'AgentRegistry (Dynamic)', }); } diff --git a/packages/core/src/policy/config.ts b/packages/core/src/policy/config.ts index 7f6f4d9f3d1..e08ebe43ebf 100644 --- a/packages/core/src/policy/config.ts +++ b/packages/core/src/policy/config.ts @@ -194,6 +194,8 @@ export async function createPolicyEngineConfig( // 10: Write tools default to ASK_USER (becomes 1.010 in default tier) // 15: Auto-edit tool override (becomes 1.015 in default tier) // 50: Read-only tools (becomes 1.050 in default tier) + // 60: Plan mode catch-all DENY override (becomes 1.060 in default tier) + // 70: Plan mode explicit ALLOW override (becomes 1.070 in default tier) // 999: YOLO mode allow-all (becomes 1.999 in default tier) // MCP servers that are explicitly excluded in settings.mcp.excluded diff --git a/packages/core/src/policy/policies/plan.toml b/packages/core/src/policy/policies/plan.toml index 194680c9683..5275513f293 100644 --- a/packages/core/src/policy/policies/plan.toml +++ b/packages/core/src/policy/policies/plan.toml @@ -21,15 +21,15 @@ # # TOML policy priorities (before transformation): # 10: Write tools default to ASK_USER (becomes 1.010 in default tier) -# 20: Plan mode catch-all DENY override (becomes 1.020 in default tier) -# 50: Read-only tools (becomes 1.050 in default tier) +# 60: Plan mode catch-all DENY override (becomes 1.060 in default tier) +# 70: Read-only tools (becomes 1.070 in default tier) # 999: YOLO mode allow-all (becomes 1.999 in default tier) # Catch-All: Deny everything by default in Plan mode. [[rule]] decision = "deny" -priority = 20 +priority = 60 modes = ["plan"] deny_message = "You are in Plan Mode - adjust your prompt to only use read and search tools." @@ -38,49 +38,49 @@ deny_message = "You are in Plan Mode - adjust your prompt to only use read and s [[rule]] toolName = "glob" decision = "allow" -priority = 50 +priority = 70 modes = ["plan"] [[rule]] toolName = "grep_search" decision = "allow" -priority = 50 +priority = 70 modes = ["plan"] [[rule]] toolName = "list_directory" decision = "allow" -priority = 50 +priority = 70 modes = ["plan"] [[rule]] toolName = "read_file" decision = "allow" -priority = 50 +priority = 70 modes = ["plan"] [[rule]] toolName = "google_web_search" decision = "allow" -priority = 50 +priority = 70 modes = ["plan"] [[rule]] toolName = "ask_user" decision = "ask_user" -priority = 50 +priority = 70 modes = ["plan"] [[rule]] toolName = "exit_plan_mode" decision = "ask_user" -priority = 50 +priority = 70 modes = ["plan"] # Allow write_file and replace for .md files in plans directory [[rule]] toolName = ["write_file", "replace"] decision = "allow" -priority = 50 +priority = 70 modes = ["plan"] argsPattern = "\"file_path\":\"[^\"]+/\\.gemini/tmp/[a-zA-Z0-9_-]+/plans/[a-zA-Z0-9_-]+\\.md\"" diff --git a/packages/core/src/policy/policy-engine.test.ts b/packages/core/src/policy/policy-engine.test.ts index dba06550d25..93cf89536f4 100644 --- a/packages/core/src/policy/policy-engine.test.ts +++ b/packages/core/src/policy/policy-engine.test.ts @@ -13,6 +13,7 @@ import { type SafetyCheckerRule, InProcessCheckerType, ApprovalMode, + PRIORITY_SUBAGENT_TOOL, } from './types.js'; import type { FunctionCall } from '@google/genai'; import { SafetyCheckDecision } from '../safety/protocol.js'; @@ -1481,6 +1482,37 @@ describe('PolicyEngine', () => { }); }); + describe('Plan Mode vs Subagent Priority (Regression)', () => { + it('should DENY subagents in Plan Mode despite dynamic allow rules', async () => { + // Plan Mode Deny (1.06) > Subagent Allow (1.05) + + const fixedRules: PolicyRule[] = [ + { + decision: PolicyDecision.DENY, + priority: 1.06, + modes: [ApprovalMode.PLAN], + }, + { + toolName: 'codebase_investigator', + decision: PolicyDecision.ALLOW, + priority: PRIORITY_SUBAGENT_TOOL, + }, + ]; + + const fixedEngine = new PolicyEngine({ + rules: fixedRules, + approvalMode: ApprovalMode.PLAN, + }); + + const fixedResult = await fixedEngine.check( + { name: 'codebase_investigator' }, + undefined, + ); + + expect(fixedResult.decision).toBe(PolicyDecision.DENY); + }); + }); + describe('shell command parsing failure', () => { it('should return ALLOW in YOLO mode even if shell command parsing fails', async () => { const { splitCommands } = await import('../utils/shell-utils.js'); diff --git a/packages/core/src/policy/toml-loader.test.ts b/packages/core/src/policy/toml-loader.test.ts index da851cd3690..4b0cd1df440 100644 --- a/packages/core/src/policy/toml-loader.test.ts +++ b/packages/core/src/policy/toml-loader.test.ts @@ -5,12 +5,17 @@ */ import { describe, it, expect, beforeEach, afterEach } from 'vitest'; -import { PolicyDecision } from './types.js'; +import { + PolicyDecision, + ApprovalMode, + PRIORITY_SUBAGENT_TOOL, +} from './types.js'; import * as fs from 'node:fs/promises'; import * as path from 'node:path'; import * as os from 'node:os'; import { loadPoliciesFromToml } from './toml-loader.js'; import type { PolicyLoadResult } from './toml-loader.js'; +import { PolicyEngine } from './policy-engine.js'; describe('policy-toml-loader', () => { let tempDir: string; @@ -500,4 +505,60 @@ priority = 100 expect(error.message).toContain('Failed to read policy directory'); }); }); + + describe('Built-in Plan Mode Policy', () => { + it('should override default subagent rules when in Plan Mode', async () => { + const planTomlPath = path.resolve(__dirname, 'policies', 'plan.toml'); + const fileContent = await fs.readFile(planTomlPath, 'utf-8'); + const tempPolicyDir = await fs.mkdtemp( + path.join(os.tmpdir(), 'plan-policy-test-'), + ); + try { + await fs.writeFile(path.join(tempPolicyDir, 'plan.toml'), fileContent); + const getPolicyTier = () => 1; // Default tier + + // 1. Load the actual Plan Mode policies + const result = await loadPoliciesFromToml( + [tempPolicyDir], + getPolicyTier, + ); + + // 2. Initialize Policy Engine with these rules + const engine = new PolicyEngine({ + rules: result.rules, + approvalMode: ApprovalMode.PLAN, + }); + + // 3. Simulate a Subagent being registered (Dynamic Rule) + engine.addRule({ + toolName: 'codebase_investigator', + decision: PolicyDecision.ALLOW, + priority: PRIORITY_SUBAGENT_TOOL, + source: 'AgentRegistry (Dynamic)', + }); + + // 4. Verify Behavior: + // The Plan Mode "Catch-All Deny" (from plan.toml) should override the Subagent Allow + const checkResult = await engine.check( + { name: 'codebase_investigator' }, + undefined, + ); + + expect( + checkResult.decision, + 'Subagent should be DENIED in Plan Mode', + ).toBe(PolicyDecision.DENY); + + // 5. Verify Explicit Allows still work + // e.g. 'read_file' should be allowed because its priority in plan.toml (70) is higher than the deny (60) + const readResult = await engine.check({ name: 'read_file' }, undefined); + expect( + readResult.decision, + 'Explicitly allowed tools (read_file) should be ALLOWED in Plan Mode', + ).toBe(PolicyDecision.ALLOW); + } finally { + await fs.rm(tempPolicyDir, { recursive: true, force: true }); + } + }); + }); }); diff --git a/packages/core/src/policy/types.ts b/packages/core/src/policy/types.ts index db487a6ab3f..7b43e311176 100644 --- a/packages/core/src/policy/types.ts +++ b/packages/core/src/policy/types.ts @@ -276,3 +276,9 @@ export interface CheckResult { decision: PolicyDecision; rule?: PolicyRule; } + +/** + * Priority for subagent tools (registered dynamically). + * Resulting transformed priority: 1.050 + */ +export const PRIORITY_SUBAGENT_TOOL = 1.05; From 798a2fe979efc63006fc6beab2a9efbd5c4a8c5c Mon Sep 17 00:00:00 2001 From: Jerop Kipruto Date: Fri, 6 Feb 2026 17:29:58 -0500 Subject: [PATCH 2/3] chore(core): consolidate plan mode policy rules --- packages/core/src/policy/policies/plan.toml | 36 ++------------------ packages/core/src/policy/toml-loader.test.ts | 4 +++ packages/core/src/policy/types.ts | 2 +- 3 files changed, 8 insertions(+), 34 deletions(-) diff --git a/packages/core/src/policy/policies/plan.toml b/packages/core/src/policy/policies/plan.toml index 5275513f293..12aa94d8938 100644 --- a/packages/core/src/policy/policies/plan.toml +++ b/packages/core/src/policy/policies/plan.toml @@ -22,7 +22,7 @@ # TOML policy priorities (before transformation): # 10: Write tools default to ASK_USER (becomes 1.010 in default tier) # 60: Plan mode catch-all DENY override (becomes 1.060 in default tier) -# 70: Read-only tools (becomes 1.070 in default tier) +# 70: Plan mode explicit ALLOW override (becomes 1.070 in default tier) # 999: YOLO mode allow-all (becomes 1.999 in default tier) # Catch-All: Deny everything by default in Plan mode. @@ -36,43 +36,13 @@ deny_message = "You are in Plan Mode - adjust your prompt to only use read and s # Explicitly Allow Read-Only Tools in Plan mode. [[rule]] -toolName = "glob" +toolName = ["glob", "grep_search", "list_directory", "read_file", "google_web_search"] decision = "allow" priority = 70 modes = ["plan"] [[rule]] -toolName = "grep_search" -decision = "allow" -priority = 70 -modes = ["plan"] - -[[rule]] -toolName = "list_directory" -decision = "allow" -priority = 70 -modes = ["plan"] - -[[rule]] -toolName = "read_file" -decision = "allow" -priority = 70 -modes = ["plan"] - -[[rule]] -toolName = "google_web_search" -decision = "allow" -priority = 70 -modes = ["plan"] - -[[rule]] -toolName = "ask_user" -decision = "ask_user" -priority = 70 -modes = ["plan"] - -[[rule]] -toolName = "exit_plan_mode" +toolName = ["ask_user", "exit_plan_mode"] decision = "ask_user" priority = 70 modes = ["plan"] diff --git a/packages/core/src/policy/toml-loader.test.ts b/packages/core/src/policy/toml-loader.test.ts index 4b0cd1df440..9938efa9509 100644 --- a/packages/core/src/policy/toml-loader.test.ts +++ b/packages/core/src/policy/toml-loader.test.ts @@ -13,10 +13,14 @@ import { import * as fs from 'node:fs/promises'; import * as path from 'node:path'; import * as os from 'node:os'; +import { fileURLToPath } from 'node:url'; import { loadPoliciesFromToml } from './toml-loader.js'; import type { PolicyLoadResult } from './toml-loader.js'; import { PolicyEngine } from './policy-engine.js'; +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + describe('policy-toml-loader', () => { let tempDir: string; diff --git a/packages/core/src/policy/types.ts b/packages/core/src/policy/types.ts index 7b43e311176..6ccabd504a5 100644 --- a/packages/core/src/policy/types.ts +++ b/packages/core/src/policy/types.ts @@ -279,6 +279,6 @@ export interface CheckResult { /** * Priority for subagent tools (registered dynamically). - * Resulting transformed priority: 1.050 + * Effective priority matching Tier 1 (Default) read-only tools. */ export const PRIORITY_SUBAGENT_TOOL = 1.05; From 4ee56e2005ed6420ba17612a1f030cac04c0ec7c Mon Sep 17 00:00:00 2001 From: Jerop Kipruto Date: Fri, 6 Feb 2026 17:37:02 -0500 Subject: [PATCH 3/3] fix(cli): update integration test for read-only tool priority --- packages/cli/src/config/policy-engine.integration.test.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/cli/src/config/policy-engine.integration.test.ts b/packages/cli/src/config/policy-engine.integration.test.ts index 43c9d391f99..0568aa62bc9 100644 --- a/packages/cli/src/config/policy-engine.integration.test.ts +++ b/packages/cli/src/config/policy-engine.integration.test.ts @@ -434,8 +434,8 @@ describe('Policy Engine Integration Tests', () => { expect(mcpServerRule?.priority).toBe(2.1); // MCP allowed server const readOnlyToolRule = rules.find((r) => r.toolName === 'glob'); - // Priority 50 in default tier → 1.05 - expect(readOnlyToolRule?.priority).toBeCloseTo(1.05, 5); + // Priority 70 in default tier → 1.07 (Overriding Plan Mode Deny) + expect(readOnlyToolRule?.priority).toBeCloseTo(1.07, 5); // Verify the engine applies these priorities correctly expect( @@ -590,8 +590,8 @@ describe('Policy Engine Integration Tests', () => { expect(server1Rule?.priority).toBe(2.1); // Allowed servers (user tier) const globRule = rules.find((r) => r.toolName === 'glob'); - // Priority 50 in default tier → 1.05 - expect(globRule?.priority).toBeCloseTo(1.05, 5); // Auto-accept read-only + // Priority 70 in default tier → 1.07 + expect(globRule?.priority).toBeCloseTo(1.07, 5); // Auto-accept read-only // The PolicyEngine will sort these by priority when it's created const engine = new PolicyEngine(config);