google-gemini · alisa-alisa · Feb 5, 2026 · Jan 30, 2026 · Feb 2, 2026 · Feb 2, 2026
@@ -6,25 +6,234 @@
 
 import { describe, expect } from 'vitest';
 import { evalTest } from './test-helper.js';
-import { validateModelOutput } from '../integration-tests/test-helper.js';
+import {
+  assertModelHasOutput,
+  checkModelOutputContent,
+} from '../integration-tests/test-helper.js';
 
 describe('save_memory', () => {
+  const TEST_PREFIX = 'Save memory test: ';
+  const rememberingFavoriteColor = "Agent remembers user's favorite color";
   evalTest('ALWAYS_PASSES', {
-    name: 'should be able to save to memory',
+    name: rememberingFavoriteColor,
     params: {
       settings: { tools: { core: ['save_memory'] } },
     },
     prompt: `remember that my favorite color is  blue.
 
     what is my favorite color? tell me that and surround it with $ symbol`,
     assert: async (rig, result) => {
-      const foundToolCall = await rig.waitForToolCall('save_memory');
+      const wasToolCalled = await rig.waitForToolCall('save_memory');
+      expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
+        true,
+      );
+
+      assertModelHasOutput(result);
+      checkModelOutputContent(result, {
+        expectedContent: 'blue',
+        testName: `${TEST_PREFIX}${rememberingFavoriteColor}`,
+      });
+    },
+  });
+  const rememberingCommandRestrictions = 'Agent remembers command restrictions';
+  evalTest('ALWAYS_PASSES', {
+    name: rememberingCommandRestrictions,
+    params: {
+      settings: { tools: { core: ['save_memory'] } },
+    },
+    prompt: `I don't want you to ever run npm commands.`,
+    assert: async (rig, result) => {
+      const wasToolCalled = await rig.waitForToolCall('save_memory');
+      expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
+        true,
+      );
+
+      assertModelHasOutput(result);
+      checkModelOutputContent(result, {
+        expectedContent: [/not run npm commands|remember|ok/i],
+        testName: `${TEST_PREFIX}${rememberingCommandRestrictions}`,
+      });
+    },
+  });
+
+  const rememberingWorkflow = 'Agent remembers workflow preferences';
+  evalTest('ALWAYS_PASSES', {
+    name: rememberingWorkflow,
+    params: {
+      settings: { tools: { core: ['save_memory'] } },
+    },
+    prompt: `I want you to always lint after building.`,
+    assert: async (rig, result) => {
+      const wasToolCalled = await rig.waitForToolCall('save_memory');
+      expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
+        true,
+      );
+
+      assertModelHasOutput(result);
+      checkModelOutputContent(result, {
+        expectedContent: [/always|ok|remember|will do/i],
+        testName: `${TEST_PREFIX}${rememberingWorkflow}`,
+      });
+    },
+  });
+
+  const ignoringTemporaryInformation =
+    'Agent ignores temporary conversation details';
+  evalTest('ALWAYS_PASSES', {
+    name: ignoringTemporaryInformation,
+    params: {
+      settings: { tools: { core: ['save_memory'] } },
+    },
+    prompt: `I'm going to get a coffee.`,
+    assert: async (rig, result) => {
+      await rig.waitForTelemetryReady();
+      const wasToolCalled = rig
+        .readToolLogs()
+        .some((log) => log.toolRequest.name === 'save_memory');
       expect(
-        foundToolCall,
-        'Expected to find a save_memory tool call',
-      ).toBeTruthy();
+        wasToolCalled,
+        'save_memory should not be called for temporary information',
+      ).toBe(false);
+
+      assertModelHasOutput(result);
+      checkModelOutputContent(result, {
+        testName: `${TEST_PREFIX}${ignoringTemporaryInformation}`,
+        forbiddenContent: [/remember|will do/i],
+      });
+    },
+  });
+
+  const rememberingPetName = "Agent remembers user's pet's name";
+  evalTest('ALWAYS_PASSES', {
+    name: rememberingPetName,
+    params: {
+      settings: { tools: { core: ['save_memory'] } },
+    },
+    prompt: `My dog's name is Buddy. What is my dog's name?`,
+    assert: async (rig, result) => {
+      const wasToolCalled = await rig.waitForToolCall('save_memory');
+      expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
+        true,
+      );
+
+      assertModelHasOutput(result);
+      checkModelOutputContent(result, {
+        expectedContent: [/Buddy/i],
+        testName: `${TEST_PREFIX}${rememberingPetName}`,
+      });
+    },
+  });
+
+  const rememberingCommandAlias = 'Agent remembers custom command aliases';
+  evalTest('ALWAYS_PASSES', {
+    name: rememberingCommandAlias,
+    params: {
+      settings: { tools: { core: ['save_memory'] } },
+    },
+    prompt: `When I say 'start server', you should run 'npm run dev'.`,
+    assert: async (rig, result) => {
+      const wasToolCalled = await rig.waitForToolCall('save_memory');
+      expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
+        true,
+      );
+
+      assertModelHasOutput(result);
+      checkModelOutputContent(result, {
+        expectedContent: [/npm run dev|start server|ok|remember|will do/i],
+        testName: `${TEST_PREFIX}${rememberingCommandAlias}`,
+      });
+    },
+  });
+
+  const rememberingDbSchemaLocation =
+    "Agent remembers project's database schema location";
+  evalTest('ALWAYS_PASSES', {
+    name: rememberingDbSchemaLocation,
+    params: {
+      settings: { tools: { core: ['save_memory'] } },
+    },
+    prompt: `The database schema for this project is located in \`db/schema.sql\`.`,
+    assert: async (rig, result) => {
+      const wasToolCalled = await rig.waitForToolCall('save_memory');
+      expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
+        true,
+      );
+
+      assertModelHasOutput(result);
+      checkModelOutputContent(result, {
+        expectedContent: [/database schema|ok|remember|will do/i],
+        testName: `${TEST_PREFIX}${rememberingDbSchemaLocation}`,
+      });
+    },
+  });
+
+  const rememberingCodingStyle =
+    "Agent remembers user's coding style preference";
+  evalTest('ALWAYS_PASSES', {
+    name: rememberingCodingStyle,
+    params: {
+      settings: { tools: { core: ['save_memory'] } },
+    },
+    prompt: `I prefer to use tabs instead of spaces for indentation.`,
+    assert: async (rig, result) => {
+      const wasToolCalled = await rig.waitForToolCall('save_memory');
+      expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
+        true,
+      );
+
+      assertModelHasOutput(result);
+      checkModelOutputContent(result, {
+        expectedContent: [/tabs instead of spaces|ok|remember|will do/i],
+        testName: `${TEST_PREFIX}${rememberingCodingStyle}`,
+      });
+    },
+  });
+
+  const rememberingTestCommand =
+    'Agent remembers specific project test command';
+  evalTest('ALWAYS_PASSES', {
+    name: rememberingTestCommand,
+    params: {
+      settings: { tools: { core: ['save_memory'] } },
+    },
+    prompt: `The command to run all backend tests is \`npm run test:backend\`.`,
+    assert: async (rig, result) => {
+      const wasToolCalled = await rig.waitForToolCall('save_memory');
+      expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
+        true,
+      );
+
+      assertModelHasOutput(result);
+      checkModelOutputContent(result, {
+        expectedContent: [
+          /command to run all backend tests|ok|remember|will do/i,
+        ],
+        testName: `${TEST_PREFIX}${rememberingTestCommand}`,
+      });
+    },
+  });
+
+  const rememberingMainEntryPoint =
+    "Agent remembers project's main entry point";
+  evalTest('ALWAYS_PASSES', {
+    name: rememberingMainEntryPoint,
+    params: {
+      settings: { tools: { core: ['save_memory'] } },
+    },
+    prompt: `The main entry point for this project is \`src/index.js\`.`,
+    assert: async (rig, result) => {
+      const wasToolCalled = await rig.waitForToolCall('save_memory');
+      expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
+        true,
+      );
 
-      validateModelOutput(result, 'blue', 'Save memory test');
+      assertModelHasOutput(result);
+      checkModelOutputContent(result, {
+        expectedContent: [
+          /main entry point for this project|ok|remember|will do/i,
+        ],
+        testName: `${TEST_PREFIX}${rememberingMainEntryPoint}`,
+      });
     },
   });
 });
@@ -7,7 +7,12 @@
 import { describe, it, expect, beforeEach, afterEach } from 'vitest';
 import { existsSync } from 'node:fs';
 import * as path from 'node:path';
-import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
+import {
+  TestRig,
+  printDebugInfo,
+  assertModelHasOutput,
+  checkModelOutputContent,
+} from './test-helper.js';
 
 describe('file-system', () => {
   let rig: TestRig;
@@ -43,8 +48,11 @@ describe('file-system', () => {
       'Expected to find a read_file tool call',
     ).toBeTruthy();
 
-    // Validate model output - will throw if no output, warn if missing expected content
-    validateModelOutput(result, 'hello world', 'File read test');
+    assertModelHasOutput(result);
+    checkModelOutputContent(result, {
+      expectedContent: 'hello world',
+      testName: 'File read test',
+    });
   });
 
   it('should be able to write a file', async () => {
@@ -74,8 +82,8 @@ describe('file-system', () => {
       'Expected to find a write_file, edit, or replace tool call',
     ).toBeTruthy();
 
-    // Validate model output - will throw if no output
-    validateModelOutput(result, null, 'File write test');
+    assertModelHasOutput(result);
+    checkModelOutputContent(result, { testName: 'File write test' });
 
     const fileContent = rig.readFile('test.txt');
 

@@ -6,7 +6,12 @@
 
 import { WEB_SEARCH_TOOL_NAME } from '../packages/core/src/tools/tool-names.js';
 import { describe, it, expect, beforeEach, afterEach } from 'vitest';
-import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
+import {
+  TestRig,
+  printDebugInfo,
+  assertModelHasOutput,
+  checkModelOutputContent,
+} from './test-helper.js';
 
 describe('web search tool', () => {
   let rig: TestRig;
@@ -68,12 +73,11 @@ describe('web search tool', () => {
       `Expected to find a call to ${WEB_SEARCH_TOOL_NAME}`,
     ).toBeTruthy();
 
-    // Validate model output - will throw if no output, warn if missing expected content
-    const hasExpectedContent = validateModelOutput(
-      result,
-      ['weather', 'london'],
-      'Google web search test',
-    );
+    assertModelHasOutput(result);
+    const hasExpectedContent = checkModelOutputContent(result, {
+      expectedContent: ['weather', 'london'],
+      testName: 'Google web search test',
+    });
 
     // If content was missing, log the search queries used
     if (!hasExpectedContent) {

@@ -9,7 +9,8 @@ import {
   TestRig,
   poll,
   printDebugInfo,
-  validateModelOutput,
+  assertModelHasOutput,
+  checkModelOutputContent,
 } from './test-helper.js';
 import { existsSync } from 'node:fs';
 import { join } from 'node:path';
@@ -68,7 +69,10 @@ describe('list_directory', () => {
       throw e;
     }
 
-    // Validate model output - will throw if no output, warn if missing expected content
-    validateModelOutput(result, ['file1.txt', 'subdir'], 'List directory test');
+    assertModelHasOutput(result);
+    checkModelOutputContent(result, {
+      expectedContent: ['file1.txt', 'subdir'],
+      testName: 'List directory test',
+    });
   });
 });
@@ -5,7 +5,12 @@
  */
 
 import { describe, it, expect, beforeEach, afterEach } from 'vitest';
-import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
+import {
+  TestRig,
+  printDebugInfo,
+  assertModelHasOutput,
+  checkModelOutputContent,
+} from './test-helper.js';
 
 describe('read_many_files', () => {
   let rig: TestRig;
@@ -50,7 +55,7 @@ describe('read_many_files', () => {
       'Expected to find either read_many_files or multiple read_file tool calls',
     ).toBeTruthy();
 
-    // Validate model output - will throw if no output
-    validateModelOutput(result, null, 'Read many files test');
+    assertModelHasOutput(result);
+    checkModelOutputContent(result, { testName: 'Read many files test' });
   });
 });