google-gemini · Durgesh-Kumar-Dewangan · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026
@@ -20,16 +20,44 @@ import { DEFAULT_GEMINI_MODEL } from '@google/gemini-cli-core';
  * forbidden. Evals must test against the full, default tool set to ensure
  * realistic behavior.
  */
-interface EvalConfigOverrides {
-  /** Restricting tools via excludeTools in evals is forbidden. */
-  excludeTools?: never;
-  /** Restricting tools via coreTools in evals is forbidden. */
-  coreTools?: never;
-  /** Restricting tools via allowedTools in evals is forbidden. */
-  allowedTools?: never;
-  /** Restricting tools via mainAgentTools in evals is forbidden. */
-  mainAgentTools?: never;
+type ForbiddenToolKeys =
+  | 'excludeTools'
+  | 'coreTools'
+  | 'allowedTools'
+  | 'mainAgentTools';
+
+type NoToolRestrictions<T> = T & {
+  [K in ForbiddenToolKeys]?: never;
+};
+
+export type EvalConfigOverrides = NoToolRestrictions<{
   [key: string]: unknown;
+}>;
+
+/**
+ * Runtime safety: remove forbidden tool restriction keys if present.
+ */
+function sanitizeConfigOverrides(
+  overrides?: Record<string, unknown>
+): Record<string, unknown> | undefined {
+  if (!overrides) return overrides;
+
+  const forbidden: ForbiddenToolKeys[] = [
+    'excludeTools',
+    'coreTools',
+    'allowedTools',
+    'mainAgentTools',
+  ];
+
+  const sanitized = { ...overrides };
+
+  for (const key of forbidden) {
+    if (key in sanitized) {
+      delete sanitized[key];
+    }
+  }
+
+  return sanitized;
 }
 
 export interface AppEvalCase {
@@ -51,7 +79,7 @@ export function appEvalTest(policy: EvalPolicy, evalCase: AppEvalCase) {
     const rig = new AppRig({
       configOverrides: {
         model: DEFAULT_GEMINI_MODEL,
-        ...evalCase.configOverrides,
+        ...sanitizeConfigOverrides(evalCase.configOverrides),
       },
     });
 

@@ -11,7 +11,8 @@ describe('CliHelpAgent Delegation', () => {
         },
       },
     },
-    prompt: 'Help me create a subagent in this project',
+    //  Refined prompt for clearer delegation intent
+    prompt: 'How do I create and configure a subagent using the CLI in this project?',
     timeout: 60000,
     assert: async (rig, _result) => {
       const toolLogs = rig.readToolLogs();

@@ -7,6 +7,19 @@
 import { describe, expect } from 'vitest';
 import { appEvalTest } from './app-test-helper.js';
 
+/**
+ * Compile-time guard to prevent tool restriction usage in eval configs
+ */
+type ForbiddenToolKeys =
+  | 'excludeTools'
+  | 'coreTools'
+  | 'allowedTools'
+  | 'mainAgentTools';
+
+type NoToolRestrictions<T> = T & {
+  [K in ForbiddenToolKeys]?: never;
+};
+
 describe('generalist_delegation', () => {
   // --- Positive Evals (Should Delegate) ---
 
@@ -21,7 +34,7 @@ describe('generalist_delegation', () => {
       experimental: {
         enableAgents: true,
       },
-    },
+    } as NoToolRestrictions<typeof Object>,
     files: {
       'file1.ts': 'console.log("no semi")',
       'file2.ts': 'console.log("no semi")',
@@ -64,7 +77,7 @@ describe('generalist_delegation', () => {
       experimental: {
         enableAgents: true,
       },
-    },
+    } as NoToolRestrictions<typeof Object>,
     files: {
       'src/a.ts': 'export const a = 1;',
       'src/b.ts': 'export const b = 2;',
@@ -104,7 +117,7 @@ describe('generalist_delegation', () => {
       experimental: {
         enableAgents: true,
       },
-    },
+    } as NoToolRestrictions<typeof Object>,
     files: {
       'README.md': 'This is a proyect.',
     },
@@ -138,7 +151,7 @@ describe('generalist_delegation', () => {
       experimental: {
         enableAgents: true,
       },
-    },
+    } as NoToolRestrictions<typeof Object>,
     files: {
       'src/VERSION': '1.2.3',
     },

@@ -11,6 +11,20 @@ import {
   checkModelOutputContent,
 } from './test-helper.js';
 
+function parseToolArgs(args: unknown): Record<string, any> | null {
+  if (args && typeof args === 'object') {
+    return args as Record<string, any>;
+  }
+  if (typeof args === 'string') {
+    try {
+      return JSON.parse(args) as Record<string, any>;
+    } catch {
+      return null;
+    }
+  }
+  return null;
+}
+
 describe('grep_search_functionality', () => {
   const TEST_PREFIX = 'Grep Search Functionality: ';
 
@@ -44,8 +58,8 @@ describe('grep_search_functionality', () => {
         'grep_search',
         undefined,
         (args) => {
-          const params = JSON.parse(args);
-          return params.case_sensitive === true;
+          const params = parseToolArgs(args);
+          return params?.case_sensitive === true;
         },
       );
       expect(
@@ -74,8 +88,8 @@ describe('grep_search_functionality', () => {
         'grep_search',
         undefined,
         (args) => {
-          const params = JSON.parse(args);
-          return params.names_only === true;
+          const params = parseToolArgs(args);
+          return params?.names_only === true;
         },
       );
       expect(
@@ -104,8 +118,8 @@ describe('grep_search_functionality', () => {
         'grep_search',
         undefined,
         (args) => {
-          const params = JSON.parse(args);
-          return params.include_pattern === '*.js';
+          const params = parseToolArgs(args);
+          return params?.include_pattern === '*.js';
         },
       );
       expect(
@@ -134,8 +148,8 @@ describe('grep_search_functionality', () => {
         'grep_search',
         undefined,
         (args) => {
-          const params = JSON.parse(args);
-          return params.dir_path === 'src';
+          const params = parseToolArgs(args);
+          return params?.dir_path === 'src';
         },
       );
       expect(

@@ -11,12 +11,25 @@ import fs from 'node:fs';
 import { appEvalTest } from './app-test-helper.js';
 import { PolicyDecision } from '@google/gemini-cli-core';
 
+/**
+ * Compile-time guard to prevent tool restriction usage in eval configs
+ */
+type ForbiddenToolKeys =
+  | 'excludeTools'
+  | 'coreTools'
+  | 'allowedTools'
+  | 'mainAgentTools';
+
+type NoToolRestrictions<T> = T & {
+  [K in ForbiddenToolKeys]?: never;
+};
+
 describe('Model Steering Behavioral Evals', () => {
   appEvalTest('USUALLY_PASSES', {
     name: 'Corrective Hint: Model switches task based on hint during tool turn',
     configOverrides: {
       modelSteering: true,
-    },
+    } as NoToolRestrictions<Record<string, unknown>>,
     files: {
       'README.md':
         '# Gemini CLI\nThis is a tool for developers.\nLicense: Apache-2.0\nLine 4\nLine 5\nLine 6',
@@ -55,7 +68,7 @@ describe('Model Steering Behavioral Evals', () => {
     name: 'Suggestive Hint: Model incorporates user guidance mid-stream',
     configOverrides: {
       modelSteering: true,
-    },
+    } as NoToolRestrictions<Record<string, unknown>>,
     files: {},
     prompt: 'Create a file called "hw.js" with a JS hello world.',
     setup: async (rig) => {

@@ -11,9 +11,23 @@ import {
   checkModelOutputContent,
 } from '../integration-tests/test-helper.js';
 
+/**
+ * Compile-time guard to prevent tool restriction usage in eval configs
+ */
+type ForbiddenToolKeys =
+  | 'excludeTools'
+  | 'coreTools'
+  | 'allowedTools'
+  | 'mainAgentTools';
+
+type NoToolRestrictions<T> = T & {
+  [K in ForbiddenToolKeys]?: never;
+};
+
 describe('save_memory', () => {
   const TEST_PREFIX = 'Save memory test: ';
   const rememberingFavoriteColor = "Agent remembers user's favorite color";
+
   evalTest('ALWAYS_PASSES', {
     name: rememberingFavoriteColor,
 
@@ -33,7 +47,9 @@ describe('save_memory', () => {
       });
     },
   });
+
   const rememberingCommandRestrictions = 'Agent remembers command restrictions';
+
   evalTest('USUALLY_PASSES', {
     name: rememberingCommandRestrictions,
 
@@ -53,6 +69,7 @@ describe('save_memory', () => {
   });
 
   const rememberingWorkflow = 'Agent remembers workflow preferences';
+
   evalTest('USUALLY_PASSES', {
     name: rememberingWorkflow,
 
@@ -73,6 +90,7 @@ describe('save_memory', () => {
 
   const ignoringTemporaryInformation =
     'Agent ignores temporary conversation details';
+
   evalTest('ALWAYS_PASSES', {
     name: ignoringTemporaryInformation,
 
@@ -82,6 +100,7 @@ describe('save_memory', () => {
       const wasToolCalled = rig
         .readToolLogs()
         .some((log) => log.toolRequest.name === 'save_memory');
+
       expect(
         wasToolCalled,
         'save_memory should not be called for temporary information',
@@ -96,6 +115,7 @@ describe('save_memory', () => {
   });
 
   const rememberingPetName = "Agent remembers user's pet's name";
+
   evalTest('ALWAYS_PASSES', {
     name: rememberingPetName,
 
@@ -115,6 +135,7 @@ describe('save_memory', () => {
   });
 
   const rememberingCommandAlias = 'Agent remembers custom command aliases';
+
   evalTest('ALWAYS_PASSES', {
     name: rememberingCommandAlias,
 
@@ -135,6 +156,7 @@ describe('save_memory', () => {
 
   const ignoringDbSchemaLocation =
     "Agent ignores workspace's database schema location";
+
   evalTest('USUALLY_PASSES', {
     name: ignoringDbSchemaLocation,
     prompt: `The database schema for this workspace is located in \`db/schema.sql\`.`,
@@ -143,6 +165,7 @@ describe('save_memory', () => {
       const wasToolCalled = rig
         .readToolLogs()
         .some((log) => log.toolRequest.name === 'save_memory');
+
       expect(
         wasToolCalled,
         'save_memory should not be called for workspace-specific information',
@@ -154,6 +177,7 @@ describe('save_memory', () => {
 
   const rememberingCodingStyle =
     "Agent remembers user's coding style preference";
+
   evalTest('ALWAYS_PASSES', {
     name: rememberingCodingStyle,
 
@@ -174,6 +198,7 @@ describe('save_memory', () => {
 
   const ignoringBuildArtifactLocation =
     'Agent ignores workspace build artifact location';
+
   evalTest('USUALLY_PASSES', {
     name: ignoringBuildArtifactLocation,
     prompt: `In this workspace, build artifacts are stored in the \`dist/artifacts\` directory.`,
@@ -182,6 +207,7 @@ describe('save_memory', () => {
       const wasToolCalled = rig
         .readToolLogs()
         .some((log) => log.toolRequest.name === 'save_memory');
+
       expect(
         wasToolCalled,
         'save_memory should not be called for workspace-specific information',
@@ -191,7 +217,9 @@ describe('save_memory', () => {
     },
   });
 
-  const ignoringMainEntryPoint = "Agent ignores workspace's main entry point";
+  const ignoringMainEntryPoint =
+    "Agent ignores workspace's main entry point";
+
   evalTest('USUALLY_PASSES', {
     name: ignoringMainEntryPoint,
     prompt: `The main entry point for this workspace is \`src/index.js\`.`,
@@ -200,6 +228,7 @@ describe('save_memory', () => {
       const wasToolCalled = rig
         .readToolLogs()
         .some((log) => log.toolRequest.name === 'save_memory');
+
       expect(
         wasToolCalled,
         'save_memory should not be called for workspace-specific information',
@@ -210,6 +239,7 @@ describe('save_memory', () => {
   });
 
   const rememberingBirthday = "Agent remembers user's birthday";
+
   evalTest('ALWAYS_PASSES', {
     name: rememberingBirthday,