Skip to content

Commit d26323d

Browse files
committed
Split validateModelOutput into assertModelHasOutput and checkModelOutputContent to follow the Single Responsibility Principle.
1 parent 10452b7 commit d26323d

File tree

10 files changed

+135
-106
lines changed

10 files changed

+135
-106
lines changed

evals/save_memory.eval.ts

Lines changed: 58 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66

77
import { describe, expect } from 'vitest';
88
import { evalTest } from './test-helper.js';
9-
import { validateModelOutput } from '../integration-tests/test-helper.js';
9+
import {
10+
assertModelHasOutput,
11+
checkModelOutputContent,
12+
} from '../integration-tests/test-helper.js';
1013

1114
describe('save_memory', () => {
1215
const TEST_PREFIX = 'Save memory test: ';
@@ -25,12 +28,11 @@ describe('save_memory', () => {
2528
true,
2629
);
2730

28-
expect(
29-
validateModelOutput(result, {
30-
expectedContent: 'blue',
31-
testName: `${TEST_PREFIX}${testName}`,
32-
}),
33-
).toBe(true);
31+
assertModelHasOutput(result);
32+
checkModelOutputContent(result, {
33+
expectedContent: 'blue',
34+
testName: `${TEST_PREFIX}${testName}`,
35+
});
3436
},
3537
});
3638
const testName2 = 'Remembering User Preferences - Command Restrictions';
@@ -46,12 +48,11 @@ describe('save_memory', () => {
4648
true,
4749
);
4850

49-
expect(
50-
validateModelOutput(result, {
51-
expectedContent: [/not run npm commands|remember|ok/i],
52-
testName: `${TEST_PREFIX}${testName2}`,
53-
}),
54-
).toBe(true);
51+
assertModelHasOutput(result);
52+
checkModelOutputContent(result, {
53+
expectedContent: [/not run npm commands|remember|ok/i],
54+
testName: `${TEST_PREFIX}${testName2}`,
55+
});
5556
},
5657
});
5758

@@ -68,12 +69,11 @@ describe('save_memory', () => {
6869
true,
6970
);
7071

71-
expect(
72-
validateModelOutput(result, {
73-
expectedContent: [/always|ok|remember|will do/i],
74-
testName: `${TEST_PREFIX}${testName4}`,
75-
}),
76-
).toBe(true);
72+
assertModelHasOutput(result);
73+
checkModelOutputContent(result, {
74+
expectedContent: [/always|ok|remember|will do/i],
75+
testName: `${TEST_PREFIX}${testName4}`,
76+
});
7777
},
7878
});
7979

@@ -92,12 +92,11 @@ describe('save_memory', () => {
9292
);
9393
}
9494

95-
expect(
96-
validateModelOutput(result, {
97-
testName: `${TEST_PREFIX}${testName5}`,
98-
forbiddenContent: [/remember|will do/i],
99-
}),
100-
).toBe(true);
95+
assertModelHasOutput(result);
96+
checkModelOutputContent(result, {
97+
testName: `${TEST_PREFIX}${testName5}`,
98+
forbiddenContent: [/remember|will do/i],
99+
});
101100
},
102101
});
103102

@@ -114,12 +113,11 @@ describe('save_memory', () => {
114113
true,
115114
);
116115

117-
expect(
118-
validateModelOutput(result, {
119-
expectedContent: [/Buddy/i],
120-
testName: `${TEST_PREFIX}${testName6}`,
121-
}),
122-
).toBe(true);
116+
assertModelHasOutput(result);
117+
checkModelOutputContent(result, {
118+
expectedContent: [/Buddy/i],
119+
testName: `${TEST_PREFIX}${testName6}`,
120+
});
123121
},
124122
});
125123

@@ -136,12 +134,11 @@ describe('save_memory', () => {
136134
true,
137135
);
138136

139-
expect(
140-
validateModelOutput(result, {
141-
expectedContent: [/npm run dev|start server|ok|remember|will do/i],
142-
testName: `${TEST_PREFIX}${testName7}`,
143-
}),
144-
).toBe(true);
137+
assertModelHasOutput(result);
138+
checkModelOutputContent(result, {
139+
expectedContent: [/npm run dev|start server|ok|remember|will do/i],
140+
testName: `${TEST_PREFIX}${testName7}`,
141+
});
145142
},
146143
});
147144

@@ -158,12 +155,11 @@ describe('save_memory', () => {
158155
true,
159156
);
160157

161-
expect(
162-
validateModelOutput(result, {
163-
expectedContent: [/database schema|ok|remember|will do/i],
164-
testName: `${TEST_PREFIX}${testName8}`,
165-
}),
166-
).toBe(true);
158+
assertModelHasOutput(result);
159+
checkModelOutputContent(result, {
160+
expectedContent: [/database schema|ok|remember|will do/i],
161+
testName: `${TEST_PREFIX}${testName8}`,
162+
});
167163
},
168164
});
169165

@@ -180,12 +176,11 @@ describe('save_memory', () => {
180176
true,
181177
);
182178

183-
expect(
184-
validateModelOutput(result, {
185-
expectedContent: [/tabs instead of spaces|ok|remember|will do/i],
186-
testName: `${TEST_PREFIX}${testName9}`,
187-
}),
188-
).toBe(true);
179+
assertModelHasOutput(result);
180+
checkModelOutputContent(result, {
181+
expectedContent: [/tabs instead of spaces|ok|remember|will do/i],
182+
testName: `${TEST_PREFIX}${testName9}`,
183+
});
189184
},
190185
});
191186

@@ -202,14 +197,13 @@ describe('save_memory', () => {
202197
true,
203198
);
204199

205-
expect(
206-
validateModelOutput(result, {
207-
expectedContent: [
208-
/command to run all backend tests|ok|remember|will do/i,
209-
],
210-
testName: `${TEST_PREFIX}${testName10}`,
211-
}),
212-
).toBe(true);
200+
assertModelHasOutput(result);
201+
checkModelOutputContent(result, {
202+
expectedContent: [
203+
/command to run all backend tests|ok|remember|will do/i,
204+
],
205+
testName: `${TEST_PREFIX}${testName10}`,
206+
});
213207
},
214208
});
215209

@@ -226,14 +220,13 @@ describe('save_memory', () => {
226220
true,
227221
);
228222

229-
expect(
230-
validateModelOutput(result, {
231-
expectedContent: [
232-
/main entry point for this project|ok|remember|will do/i,
233-
],
234-
testName: `${TEST_PREFIX}${testName11}`,
235-
}),
236-
).toBe(true);
223+
assertModelHasOutput(result);
224+
checkModelOutputContent(result, {
225+
expectedContent: [
226+
/main entry point for this project|ok|remember|will do/i,
227+
],
228+
testName: `${TEST_PREFIX}${testName11}`,
229+
});
237230
},
238231
});
239232
});

integration-tests/file-system.test.ts

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,12 @@
77
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
88
import { existsSync } from 'node:fs';
99
import * as path from 'node:path';
10-
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
10+
import {
11+
TestRig,
12+
printDebugInfo,
13+
assertModelHasOutput,
14+
checkModelOutputContent,
15+
} from './test-helper.js';
1116

1217
describe('file-system', () => {
1318
let rig: TestRig;
@@ -43,8 +48,8 @@ describe('file-system', () => {
4348
'Expected to find a read_file tool call',
4449
).toBeTruthy();
4550

46-
// Validate model output - will throw if no output, warn if missing expected content
47-
validateModelOutput(result, {
51+
assertModelHasOutput(result);
52+
checkModelOutputContent(result, {
4853
expectedContent: 'hello world',
4954
testName: 'File read test',
5055
});
@@ -77,8 +82,8 @@ describe('file-system', () => {
7782
'Expected to find a write_file, edit, or replace tool call',
7883
).toBeTruthy();
7984

80-
// Validate model output - will throw if no output
81-
validateModelOutput(result, { testName: 'File write test' });
85+
assertModelHasOutput(result);
86+
checkModelOutputContent(result, { testName: 'File write test' });
8287

8388
const fileContent = rig.readFile('test.txt');
8489

integration-tests/google_web_search.test.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,12 @@
66

77
import { WEB_SEARCH_TOOL_NAME } from '../packages/core/src/tools/tool-names.js';
88
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
9-
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
9+
import {
10+
TestRig,
11+
printDebugInfo,
12+
assertModelHasOutput,
13+
checkModelOutputContent,
14+
} from './test-helper.js';
1015

1116
describe('web search tool', () => {
1217
let rig: TestRig;
@@ -68,8 +73,8 @@ describe('web search tool', () => {
6873
`Expected to find a call to ${WEB_SEARCH_TOOL_NAME}`,
6974
).toBeTruthy();
7075

71-
// Validate model output - will throw if no output, warn if missing expected content
72-
const hasExpectedContent = validateModelOutput(result, {
76+
assertModelHasOutput(result);
77+
const hasExpectedContent = checkModelOutputContent(result, {
7378
expectedContent: ['weather', 'london'],
7479
testName: 'Google web search test',
7580
});

integration-tests/list_directory.test.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ import {
99
TestRig,
1010
poll,
1111
printDebugInfo,
12-
validateModelOutput,
12+
assertModelHasOutput,
13+
checkModelOutputContent,
1314
} from './test-helper.js';
1415
import { existsSync } from 'node:fs';
1516
import { join } from 'node:path';
@@ -68,8 +69,8 @@ describe('list_directory', () => {
6869
throw e;
6970
}
7071

71-
// Validate model output - will throw if no output, warn if missing expected content
72-
validateModelOutput(result, {
72+
assertModelHasOutput(result);
73+
checkModelOutputContent(result, {
7374
expectedContent: ['file1.txt', 'subdir'],
7475
testName: 'List directory test',
7576
});

integration-tests/read_many_files.test.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,12 @@
55
*/
66

77
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
8-
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
8+
import {
9+
TestRig,
10+
printDebugInfo,
11+
assertModelHasOutput,
12+
checkModelOutputContent,
13+
} from './test-helper.js';
914

1015
describe('read_many_files', () => {
1116
let rig: TestRig;
@@ -50,7 +55,7 @@ describe('read_many_files', () => {
5055
'Expected to find either read_many_files or multiple read_file tool calls',
5156
).toBeTruthy();
5257

53-
// Validate model output - will throw if no output
54-
validateModelOutput(result, { testName: 'Read many files test' });
58+
assertModelHasOutput(result);
59+
checkModelOutputContent(result, { testName: 'Read many files test' });
5560
});
5661
});

integration-tests/run_shell_command.test.ts

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,12 @@
55
*/
66

77
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
8-
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
8+
import {
9+
TestRig,
10+
printDebugInfo,
11+
assertModelHasOutput,
12+
checkModelOutputContent,
13+
} from './test-helper.js';
914
import { getShellConfiguration } from '../packages/core/src/utils/shell-utils.js';
1015

1116
const { shell } = getShellConfiguration();
@@ -115,9 +120,8 @@ describe('run_shell_command', () => {
115120
'Expected to find a run_shell_command tool call',
116121
).toBeTruthy();
117122

118-
// Validate model output - will throw if no output, warn if missing expected content
119-
// Model often reports exit code instead of showing output
120-
validateModelOutput(result, {
123+
assertModelHasOutput(result);
124+
checkModelOutputContent(result, {
121125
expectedContent: ['hello-world', 'exit code 0'],
122126
testName: 'Shell command test',
123127
});
@@ -148,8 +152,8 @@ describe('run_shell_command', () => {
148152
'Expected to find a run_shell_command tool call',
149153
).toBeTruthy();
150154

151-
// Validate model output - will throw if no output, warn if missing expected content
152-
validateModelOutput(result, {
155+
assertModelHasOutput(result);
156+
checkModelOutputContent(result, {
153157
expectedContent: 'test-stdin',
154158
testName: 'Shell command stdin test',
155159
});
@@ -496,8 +500,8 @@ describe('run_shell_command', () => {
496500
)[0];
497501
expect(toolCall.toolRequest.success).toBe(true);
498502

499-
// Validate model output - will throw if no output, warn if missing expected content
500-
validateModelOutput(result, {
503+
assertModelHasOutput(result);
504+
checkModelOutputContent(result, {
501505
expectedContent: 'test-allow-all',
502506
testName: 'Shell command stdin allow all',
503507
});
@@ -529,7 +533,8 @@ describe('run_shell_command', () => {
529533
foundToolCall,
530534
'Expected to find a run_shell_command tool call',
531535
).toBeTruthy();
532-
validateModelOutput(result, {
536+
assertModelHasOutput(result);
537+
checkModelOutputContent(result, {
533538
expectedContent: varValue,
534539
testName: 'Env var propagation test',
535540
});
@@ -562,7 +567,8 @@ describe('run_shell_command', () => {
562567
'Expected to find a run_shell_command tool call',
563568
).toBeTruthy();
564569

565-
validateModelOutput(result, {
570+
assertModelHasOutput(result);
571+
checkModelOutputContent(result, {
566572
expectedContent: fileName,
567573
testName: 'Platform-specific listing test',
568574
});

integration-tests/simple-mcp-server.test.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,12 @@
1111
*/
1212

1313
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
14-
import { TestRig, poll, validateModelOutput } from './test-helper.js';
14+
import {
15+
TestRig,
16+
poll,
17+
assertModelHasOutput,
18+
checkModelOutputContent,
19+
} from './test-helper.js';
1520
import { join } from 'node:path';
1621
import { writeFileSync } from 'node:fs';
1722

@@ -226,8 +231,8 @@ describe.skip('simple-mcp-server', () => {
226231

227232
expect(foundToolCall, 'Expected to find an add tool call').toBeTruthy();
228233

229-
// Validate model output - will throw if no output, fail if missing expected content
230-
validateModelOutput(output, {
234+
assertModelHasOutput(output);
235+
checkModelOutputContent(output, {
231236
expectedContent: '15',
232237
testName: 'MCP server test',
233238
});

0 commit comments

Comments
 (0)