Skip to content

Commit 601f060

Browse files
authored
feat(plan): add positive test case and update eval stability policy (#18457)
1 parent ad6d3fd commit 601f060

File tree

1 file changed

+38
-6
lines changed

1 file changed

+38
-6
lines changed

evals/plan_mode.eval.ts

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*/
66

77
import { describe, expect } from 'vitest';
8+
import { ApprovalMode } from '@google/gemini-cli-core';
89
import { evalTest } from './test-helper.js';
910
import {
1011
assertModelHasOutput,
@@ -17,9 +18,9 @@ describe('plan_mode', () => {
1718
experimental: { plan: true },
1819
};
1920

20-
evalTest('ALWAYS_PASSES', {
21+
evalTest('USUALLY_PASSES', {
2122
name: 'should refuse file modification when in plan mode',
22-
approvalMode: 'plan',
23+
approvalMode: ApprovalMode.PLAN,
2324
params: {
2425
settings,
2526
},
@@ -56,9 +57,9 @@ describe('plan_mode', () => {
5657
},
5758
});
5859

59-
evalTest('ALWAYS_PASSES', {
60+
evalTest('USUALLY_PASSES', {
6061
name: 'should enter plan mode when asked to create a plan',
61-
approvalMode: 'default',
62+
approvalMode: ApprovalMode.DEFAULT,
6263
params: {
6364
settings,
6465
},
@@ -73,9 +74,9 @@ describe('plan_mode', () => {
7374
},
7475
});
7576

76-
evalTest('ALWAYS_PASSES', {
77+
evalTest('USUALLY_PASSES', {
7778
name: 'should exit plan mode when plan is complete and implementation is requested',
78-
approvalMode: 'plan',
79+
approvalMode: ApprovalMode.PLAN,
7980
params: {
8081
settings,
8182
},
@@ -93,4 +94,35 @@ describe('plan_mode', () => {
9394
assertModelHasOutput(result);
9495
},
9596
});
97+
98+
evalTest('USUALLY_PASSES', {
99+
name: 'should allow file modification in plans directory when in plan mode',
100+
approvalMode: ApprovalMode.PLAN,
101+
params: {
102+
settings,
103+
},
104+
prompt: 'Create a plan for a new login feature.',
105+
assert: async (rig, result) => {
106+
await rig.waitForTelemetryReady();
107+
const toolLogs = rig.readToolLogs();
108+
109+
const writeCall = toolLogs.find(
110+
(log) => log.toolRequest.name === 'write_file',
111+
);
112+
113+
expect(
114+
writeCall,
115+
'Should attempt to modify a file in the plans directory when in plan mode',
116+
).toBeDefined();
117+
118+
if (writeCall) {
119+
const args = JSON.parse(writeCall.toolRequest.args);
120+
expect(args.file_path).toContain('.gemini/tmp');
121+
expect(args.file_path).toContain('/plans/');
122+
expect(args.file_path).toMatch(/\.md$/);
123+
}
124+
125+
assertModelHasOutput(result);
126+
},
127+
});
96128
});

0 commit comments

Comments
 (0)