@@ -109,7 +109,7 @@ describe('save_memory', () => {
109109 params : {
110110 settings : { tools : { core : [ 'save_memory' ] } } ,
111111 } ,
112- prompt : `My dog's name is Buddy. What is my dog's name? ` ,
112+ prompt : `Please remember that my dog's name is Buddy.` ,
113113 assert : async ( rig , result ) => {
114114 const wasToolCalled = await rig . waitForToolCall ( 'save_memory' ) ;
115115 expect ( wasToolCalled , 'Expected save_memory tool to be called' ) . toBe (
@@ -145,25 +145,34 @@ describe('save_memory', () => {
145145 } ,
146146 } ) ;
147147
148- const rememberingDbSchemaLocation =
149- "Agent remembers project 's database schema location" ;
148+ const ignoringDbSchemaLocation =
149+ "Agent ignores workspace 's database schema location" ;
150150 evalTest ( 'ALWAYS_PASSES' , {
151- name : rememberingDbSchemaLocation ,
151+ name : ignoringDbSchemaLocation ,
152152 params : {
153- settings : { tools : { core : [ 'save_memory' ] } } ,
153+ settings : {
154+ tools : {
155+ core : [
156+ 'save_memory' ,
157+ 'list_directory' ,
158+ 'read_file' ,
159+ 'run_shell_command' ,
160+ ] ,
161+ } ,
162+ } ,
154163 } ,
155- prompt : `The database schema for this project is located in \`db/schema.sql\`.` ,
164+ prompt : `The database schema for this workspace is located in \`db/schema.sql\`.` ,
156165 assert : async ( rig , result ) => {
157- const wasToolCalled = await rig . waitForToolCall ( 'save_memory' ) ;
158- expect ( wasToolCalled , 'Expected save_memory tool to be called' ) . toBe (
159- true ,
160- ) ;
166+ await rig . waitForTelemetryReady ( ) ;
167+ const wasToolCalled = rig
168+ . readToolLogs ( )
169+ . some ( ( log ) => log . toolRequest . name === 'save_memory' ) ;
170+ expect (
171+ wasToolCalled ,
172+ 'save_memory should not be called for workspace-specific information' ,
173+ ) . toBe ( false ) ;
161174
162175 assertModelHasOutput ( result ) ;
163- checkModelOutputContent ( result , {
164- expectedContent : [ / d a t a b a s e s c h e m a | o k | r e m e m b e r | w i l l d o / i] ,
165- testName : `${ TEST_PREFIX } ${ rememberingDbSchemaLocation } ` ,
166- } ) ;
167176 } ,
168177 } ) ;
169178
@@ -189,38 +198,74 @@ describe('save_memory', () => {
189198 } ,
190199 } ) ;
191200
192- const rememberingTestCommand =
193- 'Agent remembers specific project test command ' ;
201+ const ignoringBuildArtifactLocation =
202+ 'Agent ignores workspace build artifact location ' ;
194203 evalTest ( 'ALWAYS_PASSES' , {
195- name : rememberingTestCommand ,
204+ name : ignoringBuildArtifactLocation ,
196205 params : {
197- settings : { tools : { core : [ 'save_memory' ] } } ,
206+ settings : {
207+ tools : {
208+ core : [
209+ 'save_memory' ,
210+ 'list_directory' ,
211+ 'read_file' ,
212+ 'run_shell_command' ,
213+ ] ,
214+ } ,
215+ } ,
198216 } ,
199- prompt : `The command to run all backend tests is \`npm run test:backend\` .` ,
217+ prompt : `In this workspace, build artifacts are stored in the \`dist/artifacts\` directory .` ,
200218 assert : async ( rig , result ) => {
201- const wasToolCalled = await rig . waitForToolCall ( 'save_memory' ) ;
202- expect ( wasToolCalled , 'Expected save_memory tool to be called' ) . toBe (
203- true ,
204- ) ;
219+ await rig . waitForTelemetryReady ( ) ;
220+ const wasToolCalled = rig
221+ . readToolLogs ( )
222+ . some ( ( log ) => log . toolRequest . name === 'save_memory' ) ;
223+ expect (
224+ wasToolCalled ,
225+ 'save_memory should not be called for workspace-specific information' ,
226+ ) . toBe ( false ) ;
227+
228+ assertModelHasOutput ( result ) ;
229+ } ,
230+ } ) ;
231+
232+ const ignoringMainEntryPoint = "Agent ignores workspace's main entry point" ;
233+ evalTest ( 'ALWAYS_PASSES' , {
234+ name : ignoringMainEntryPoint ,
235+ params : {
236+ settings : {
237+ tools : {
238+ core : [
239+ 'save_memory' ,
240+ 'list_directory' ,
241+ 'read_file' ,
242+ 'run_shell_command' ,
243+ ] ,
244+ } ,
245+ } ,
246+ } ,
247+ prompt : `The main entry point for this workspace is \`src/index.js\`.` ,
248+ assert : async ( rig , result ) => {
249+ await rig . waitForTelemetryReady ( ) ;
250+ const wasToolCalled = rig
251+ . readToolLogs ( )
252+ . some ( ( log ) => log . toolRequest . name === 'save_memory' ) ;
253+ expect (
254+ wasToolCalled ,
255+ 'save_memory should not be called for workspace-specific information' ,
256+ ) . toBe ( false ) ;
205257
206258 assertModelHasOutput ( result ) ;
207- checkModelOutputContent ( result , {
208- expectedContent : [
209- / c o m m a n d t o r u n a l l b a c k e n d t e s t s | o k | r e m e m b e r | w i l l d o / i,
210- ] ,
211- testName : `${ TEST_PREFIX } ${ rememberingTestCommand } ` ,
212- } ) ;
213259 } ,
214260 } ) ;
215261
216- const rememberingMainEntryPoint =
217- "Agent remembers project's main entry point" ;
262+ const rememberingBirthday = "Agent remembers user's birthday" ;
218263 evalTest ( 'ALWAYS_PASSES' , {
219- name : rememberingMainEntryPoint ,
264+ name : rememberingBirthday ,
220265 params : {
221266 settings : { tools : { core : [ 'save_memory' ] } } ,
222267 } ,
223- prompt : `The main entry point for this project is \`src/index.js\` .` ,
268+ prompt : `My birthday is on June 15th .` ,
224269 assert : async ( rig , result ) => {
225270 const wasToolCalled = await rig . waitForToolCall ( 'save_memory' ) ;
226271 expect ( wasToolCalled , 'Expected save_memory tool to be called' ) . toBe (
@@ -229,10 +274,8 @@ describe('save_memory', () => {
229274
230275 assertModelHasOutput ( result ) ;
231276 checkModelOutputContent ( result , {
232- expectedContent : [
233- / m a i n e n t r y p o i n t f o r t h i s p r o j e c t | o k | r e m e m b e r | w i l l d o / i,
234- ] ,
235- testName : `${ TEST_PREFIX } ${ rememberingMainEntryPoint } ` ,
277+ expectedContent : [ / J u n e 1 5 t h | o k | r e m e m b e r | w i l l d o / i] ,
278+ testName : `${ TEST_PREFIX } ${ rememberingBirthday } ` ,
236279 } ) ;
237280 } ,
238281 } ) ;
0 commit comments