Skip to content

Commit 947a6ec

Browse files
committed
fix(evals): update save_memory evals and simplify tool description
- Updated behavioral evaluations to verify workspace-local memory restriction using clear examples (schema, artifacts, entry points). - Simplified and consolidated save_memory tool description and schema to explicitly forbid workspace-specific facts while maintaining token efficiency. - Ensured evaluations have appropriate file tools to prevent hallucinations or incorrect behavior. - Verified stable passage across both Gemini 2.5 and Gemini 3 models.
1 parent 69f562b commit 947a6ec

File tree

2 files changed

+98
-71
lines changed

2 files changed

+98
-71
lines changed

evals/save_memory.eval.ts

Lines changed: 79 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -145,25 +145,34 @@ describe('save_memory', () => {
145145
},
146146
});
147147

148-
const rememberingDbSchemaLocation =
149-
"Agent remembers project's database schema location";
148+
const ignoringDbSchemaLocation =
149+
"Agent ignores workspace's database schema location";
150150
evalTest('ALWAYS_PASSES', {
151-
name: rememberingDbSchemaLocation,
151+
name: ignoringDbSchemaLocation,
152152
params: {
153-
settings: { tools: { core: ['save_memory'] } },
153+
settings: {
154+
tools: {
155+
core: [
156+
'save_memory',
157+
'list_directory',
158+
'read_file',
159+
'run_shell_command',
160+
],
161+
},
162+
},
154163
},
155-
prompt: `The database schema for this project is located in \`db/schema.sql\`.`,
164+
prompt: `The database schema for this workspace is located in \`db/schema.sql\`.`,
156165
assert: async (rig, result) => {
157-
const wasToolCalled = await rig.waitForToolCall('save_memory');
158-
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
159-
true,
160-
);
166+
await rig.waitForTelemetryReady();
167+
const wasToolCalled = rig
168+
.readToolLogs()
169+
.some((log) => log.toolRequest.name === 'save_memory');
170+
expect(
171+
wasToolCalled,
172+
'save_memory should not be called for workspace-specific information',
173+
).toBe(false);
161174

162175
assertModelHasOutput(result);
163-
checkModelOutputContent(result, {
164-
expectedContent: [/database schema|ok|remember|will do/i],
165-
testName: `${TEST_PREFIX}${rememberingDbSchemaLocation}`,
166-
});
167176
},
168177
});
169178

@@ -189,38 +198,74 @@ describe('save_memory', () => {
189198
},
190199
});
191200

192-
const rememberingTestCommand =
193-
'Agent remembers specific project test command';
201+
const ignoringBuildArtifactLocation =
202+
'Agent ignores workspace build artifact location';
194203
evalTest('ALWAYS_PASSES', {
195-
name: rememberingTestCommand,
204+
name: ignoringBuildArtifactLocation,
196205
params: {
197-
settings: { tools: { core: ['save_memory'] } },
206+
settings: {
207+
tools: {
208+
core: [
209+
'save_memory',
210+
'list_directory',
211+
'read_file',
212+
'run_shell_command',
213+
],
214+
},
215+
},
198216
},
199-
prompt: `The command to run all backend tests is \`npm run test:backend\`.`,
217+
prompt: `Build artifacts are stored in the \`dist/artifacts\` directory.`,
200218
assert: async (rig, result) => {
201-
const wasToolCalled = await rig.waitForToolCall('save_memory');
202-
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
203-
true,
204-
);
219+
await rig.waitForTelemetryReady();
220+
const wasToolCalled = rig
221+
.readToolLogs()
222+
.some((log) => log.toolRequest.name === 'save_memory');
223+
expect(
224+
wasToolCalled,
225+
'save_memory should not be called for workspace-specific information',
226+
).toBe(false);
227+
228+
assertModelHasOutput(result);
229+
},
230+
});
231+
232+
const ignoringMainEntryPoint = "Agent ignores workspace's main entry point";
233+
evalTest('ALWAYS_PASSES', {
234+
name: ignoringMainEntryPoint,
235+
params: {
236+
settings: {
237+
tools: {
238+
core: [
239+
'save_memory',
240+
'list_directory',
241+
'read_file',
242+
'run_shell_command',
243+
],
244+
},
245+
},
246+
},
247+
prompt: `The main entry point for this workspace is \`src/index.js\`.`,
248+
assert: async (rig, result) => {
249+
await rig.waitForTelemetryReady();
250+
const wasToolCalled = rig
251+
.readToolLogs()
252+
.some((log) => log.toolRequest.name === 'save_memory');
253+
expect(
254+
wasToolCalled,
255+
'save_memory should not be called for workspace-specific information',
256+
).toBe(false);
205257

206258
assertModelHasOutput(result);
207-
checkModelOutputContent(result, {
208-
expectedContent: [
209-
/command to run all backend tests|ok|remember|will do/i,
210-
],
211-
testName: `${TEST_PREFIX}${rememberingTestCommand}`,
212-
});
213259
},
214260
});
215261

216-
const rememberingMainEntryPoint =
217-
"Agent remembers project's main entry point";
262+
const rememberingBirthday = "Agent remembers user's birthday";
218263
evalTest('ALWAYS_PASSES', {
219-
name: rememberingMainEntryPoint,
264+
name: rememberingBirthday,
220265
params: {
221266
settings: { tools: { core: ['save_memory'] } },
222267
},
223-
prompt: `The main entry point for this project is \`src/index.js\`.`,
268+
prompt: `My birthday is on June 15th.`,
224269
assert: async (rig, result) => {
225270
const wasToolCalled = await rig.waitForToolCall('save_memory');
226271
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
@@ -229,10 +274,8 @@ describe('save_memory', () => {
229274

230275
assertModelHasOutput(result);
231276
checkModelOutputContent(result, {
232-
expectedContent: [
233-
/main entry point for this project|ok|remember|will do/i,
234-
],
235-
testName: `${TEST_PREFIX}${rememberingMainEntryPoint}`,
277+
expectedContent: [/June 15th|ok|remember|will do/i],
278+
testName: `${TEST_PREFIX}${rememberingBirthday}`,
236279
});
237280
},
238281
});

packages/core/src/tools/memoryTool.ts

Lines changed: 19 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import {
1111
Kind,
1212
ToolConfirmationOutcome,
1313
} from './tools.js';
14-
import type { FunctionDeclaration } from '@google/genai';
1514
import * as fs from 'node:fs/promises';
1615
import * as path from 'node:path';
1716
import { Storage } from '../config/storage.js';
@@ -26,41 +25,14 @@ import { ToolErrorType } from './tool-error.js';
2625
import { MEMORY_TOOL_NAME } from './tool-names.js';
2726
import type { MessageBus } from '../confirmation-bus/message-bus.js';
2827

29-
const memoryToolSchemaData: FunctionDeclaration = {
30-
name: MEMORY_TOOL_NAME,
31-
description:
32-
'Saves a specific piece of information, fact, or user preference to your long-term memory. Use this when the user explicitly asks you to remember something, or when they state a clear, concise fact or preference that seems important to retain for future interactions. Examples: "Always lint after building", "Never run sudo commands", "Remember my address".',
33-
parametersJsonSchema: {
34-
type: 'object',
35-
properties: {
36-
fact: {
37-
type: 'string',
38-
description:
39-
'The specific fact or piece of information to remember. Should be a clear, self-contained statement.',
40-
},
41-
},
42-
required: ['fact'],
43-
additionalProperties: false,
44-
},
45-
};
46-
4728
const memoryToolDescription = `
48-
Saves a specific piece of information or fact to your long-term memory.
49-
50-
Use this tool:
51-
52-
- When the user explicitly asks you to remember something (e.g., "Remember that I like pineapple on pizza", "Please save this: my cat's name is Whiskers").
53-
- When the user states a clear, concise fact about themselves, their preferences, or their environment that seems important for you to retain for future interactions to provide a more personalized and effective assistance.
29+
Saves concise global user context (preferences, facts) for use across ALL workspaces.
5430
55-
Do NOT use this tool:
31+
### CRITICAL: GLOBAL CONTEXT ONLY
32+
NEVER save workspace-specific context, local paths, or commands (e.g. "The entry point is src/index.js", "The test command is npm test"). These are local to the current workspace and must NOT be saved globally. EXCLUSIVELY for context relevant across ALL workspaces.
5633
57-
- To remember conversational context that is only relevant for the current session.
58-
- To save long, complex, or rambling pieces of text. The fact should be relatively short and to the point.
59-
- If you are unsure whether the information is a fact worth remembering long-term. If in doubt, you can ask the user, "Should I remember that for you?"
60-
61-
## Parameters
62-
63-
- \`fact\` (string, required): The specific fact or piece of information to remember. This should be a clear, self-contained statement. For example, if the user says "My favorite color is blue", the fact would be "My favorite color is blue".`;
34+
- Use for "Remember X" or clear personal facts.
35+
- Do NOT use for session context.`;
6436

6537
export const DEFAULT_CONTEXT_FILENAME = 'GEMINI.md';
6638
export const MEMORY_SECTION_HEADER = '## Gemini Added Memories';
@@ -313,9 +285,21 @@ export class MemoryTool
313285
super(
314286
MemoryTool.Name,
315287
'SaveMemory',
316-
memoryToolDescription,
288+
memoryToolDescription +
289+
' Examples: "Always lint after building", "Never run sudo commands", "Remember my address".',
317290
Kind.Think,
318-
memoryToolSchemaData.parametersJsonSchema as Record<string, unknown>,
291+
{
292+
type: 'object',
293+
properties: {
294+
fact: {
295+
type: 'string',
296+
description:
297+
'The specific fact or piece of information to remember. Should be a clear, self-contained statement.',
298+
},
299+
},
300+
required: ['fact'],
301+
additionalProperties: false,
302+
},
319303
messageBus,
320304
true,
321305
false,

0 commit comments

Comments
 (0)