Skip to content

Commit b5ec654

Browse files
committed
test(framework): add test helper utilities
Add factory functions for creating mock evaluation results, aggregates, and claims for testing the evaluation framework.
1 parent 21e9398 commit b5ec654

File tree

1 file changed

+299
-0
lines changed

1 file changed

+299
-0
lines changed
Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
/**
2+
* Test Helpers for Framework Tests
3+
*
4+
* Factory functions for creating mock evaluation results, aggregates,
5+
* and claims for testing the evaluation framework.
6+
*/
7+
8+
import type { AggregatedResult, SummaryStats } from "../types/aggregate.js";
9+
import type { ClaimEvaluation, ClaimEvidence, ClaimStatus,EvaluationClaim } from "../types/claims.js";
10+
import type { CorrectnessResult, EvaluationResult, Provenance, ResultMetrics, ResultOutputs,RunContext } from "../types/result.js";
11+
import type { SutRole } from "../types/sut.js";
12+
13+
/**
14+
* Create a mock EvaluationResult with sensible defaults.
15+
*
16+
* @param overrides - Partial fields to override defaults
17+
* @returns A complete EvaluationResult
18+
*/
19+
export const createMockResult = (overrides?: Partial<EvaluationResult>): EvaluationResult => {
20+
const defaultRun: RunContext = {
21+
runId: "mock-run-001",
22+
sut: "test-sut-v1.0.0",
23+
sutRole: "primary",
24+
caseId: "test-case-001",
25+
caseClass: "test-class",
26+
};
27+
28+
const defaultCorrectness: CorrectnessResult = {
29+
expectedExists: true,
30+
producedOutput: true,
31+
valid: true,
32+
matchesExpected: true,
33+
};
34+
35+
const defaultOutputs: ResultOutputs = {
36+
summary: {},
37+
};
38+
39+
const defaultMetrics: ResultMetrics = {
40+
numeric: {
41+
"execution-time": 100,
42+
"nodes-expanded": 50,
43+
"path-diversity": 0.75,
44+
},
45+
};
46+
47+
const defaultProvenance: Provenance = {
48+
runtime: {
49+
platform: "darwin",
50+
arch: "arm64",
51+
nodeVersion: "20.0.0",
52+
},
53+
timestamp: new Date().toISOString(),
54+
};
55+
56+
return {
57+
run: { ...defaultRun, ...overrides?.run },
58+
correctness: { ...defaultCorrectness, ...overrides?.correctness },
59+
outputs: { ...defaultOutputs, ...overrides?.outputs },
60+
metrics: { ...defaultMetrics, ...overrides?.metrics },
61+
provenance: { ...defaultProvenance, ...overrides?.provenance },
62+
};
63+
};
64+
65+
/**
66+
* Create multiple mock results for a specific SUT.
67+
*
68+
* @param count - Number of results to create
69+
* @param sut - SUT identifier
70+
* @param role - SUT role (default: "primary")
71+
* @param caseClass - Optional case class
72+
* @returns Array of EvaluationResults
73+
*/
74+
export const createMockResults = (count: number, sut: string, role: SutRole = "primary", caseClass?: string): EvaluationResult[] => Array.from({ length: count }, (_, index) =>
75+
createMockResult({
76+
run: {
77+
runId: `${sut}-run-${index.toString().padStart(3, "0")}`,
78+
sut,
79+
sutRole: role,
80+
caseId: `case-${index.toString().padStart(3, "0")}`,
81+
caseClass,
82+
},
83+
metrics: {
84+
numeric: {
85+
"execution-time": 100 + Math.random() * 50,
86+
"nodes-expanded": 50 + Math.floor(Math.random() * 20),
87+
"path-diversity": 0.5 + Math.random() * 0.4,
88+
},
89+
},
90+
})
91+
);
92+
93+
/**
94+
* Create mock SummaryStats.
95+
*
96+
* @param values - Array of values to compute stats from
97+
* @returns SummaryStats
98+
*/
99+
export const createMockSummaryStats = (values: number[]): SummaryStats => {
100+
if (values.length === 0) {
101+
return {
102+
n: 0,
103+
mean: Number.NaN,
104+
median: Number.NaN,
105+
min: Number.NaN,
106+
max: Number.NaN,
107+
};
108+
}
109+
110+
const sorted = [...values].sort((a, b) => a - b);
111+
const n = values.length;
112+
const sum = values.reduce((accumulator, v) => accumulator + v, 0);
113+
const mean = sum / n;
114+
const midIndex = Math.floor(n / 2);
115+
const median = n % 2 === 0 ? (sorted[midIndex - 1] + sorted[midIndex]) / 2 : sorted[midIndex];
116+
117+
let std: number | undefined;
118+
if (n > 1) {
119+
const variance = values.map((v) => (v - mean) ** 2).reduce((accumulator, v) => accumulator + v, 0) / (n - 1);
120+
std = Math.sqrt(variance);
121+
}
122+
123+
return {
124+
n,
125+
mean,
126+
median,
127+
min: sorted[0],
128+
max: sorted[n - 1],
129+
std,
130+
sum,
131+
};
132+
};
133+
134+
/**
135+
* Create mock AggregatedResult.
136+
*
137+
* @param sut - SUT identifier
138+
* @param role - SUT role
139+
* @param caseClass - Optional case class
140+
* @param metrics - Optional metrics map
141+
* @returns AggregatedResult
142+
*/
143+
export const createMockAggregate = (sut: string, role: SutRole = "primary", caseClass?: string, metrics?: Record<string, SummaryStats>): AggregatedResult => {
144+
const defaultMetrics: Record<string, SummaryStats> = {
145+
"execution-time": createMockSummaryStats([100, 110, 105, 115, 120]),
146+
"nodes-expanded": createMockSummaryStats([50, 55, 52, 58, 60]),
147+
"path-diversity": createMockSummaryStats([0.7, 0.75, 0.72, 0.78, 0.8]),
148+
};
149+
150+
return {
151+
sut,
152+
sutRole: role,
153+
caseClass,
154+
group: {
155+
runCount: 5,
156+
caseCount: 5,
157+
},
158+
correctness: {
159+
validRate: 1,
160+
producedOutputRate: 1,
161+
matchesExpectedRate: 1,
162+
},
163+
metrics: metrics ?? defaultMetrics,
164+
};
165+
};
166+
167+
/**
168+
* Create mock aggregates for testing comparisons.
169+
*
170+
* @returns Array with primary and baseline aggregates
171+
*/
172+
export const createMockAggregates = (): AggregatedResult[] => [
173+
createMockAggregate("degree-prioritised-v1.0.0", "primary", "scale-free", {
174+
"execution-time": createMockSummaryStats([80, 85, 82, 88, 90]),
175+
"nodes-expanded": createMockSummaryStats([40, 45, 42, 48, 50]),
176+
}),
177+
createMockAggregate("standard-bfs-v1.0.0", "baseline", "scale-free", {
178+
"execution-time": createMockSummaryStats([120, 125, 122, 128, 130]),
179+
"nodes-expanded": createMockSummaryStats([70, 75, 72, 78, 80]),
180+
}),
181+
createMockAggregate("frontier-balanced-v1.0.0", "baseline", "scale-free", {
182+
"execution-time": createMockSummaryStats([100, 105, 102, 108, 110]),
183+
"nodes-expanded": createMockSummaryStats([60, 65, 62, 68, 70]),
184+
}),
185+
];
186+
187+
/**
188+
* Create a mock EvaluationClaim.
189+
*
190+
* @param overrides - Partial fields to override defaults
191+
* @returns EvaluationClaim
192+
*/
193+
export const createMockClaim = (overrides?: Partial<EvaluationClaim>): EvaluationClaim => ({
194+
claimId: "C001",
195+
description: "Primary SUT is faster than baseline",
196+
sut: "degree-prioritised-v1.0.0",
197+
baseline: "standard-bfs-v1.0.0",
198+
metric: "execution-time",
199+
direction: "less",
200+
scope: "global",
201+
...overrides,
202+
});
203+
204+
/**
205+
* Create mock ClaimEvidence.
206+
*
207+
* @param overrides - Partial fields to override defaults
208+
* @returns ClaimEvidence
209+
*/
210+
export const createMockEvidence = (overrides?: Partial<ClaimEvidence>): ClaimEvidence => ({
211+
primaryValue: 85,
212+
baselineValue: 125,
213+
delta: -40,
214+
ratio: 0.68,
215+
pValue: 0.01,
216+
effectSize: 1.5,
217+
n: 10,
218+
...overrides,
219+
});
220+
221+
/**
222+
* Create mock ClaimEvaluation.
223+
*
224+
* @param status - Claim status
225+
* @param claim - Optional claim
226+
* @param evidence - Optional evidence
227+
* @returns ClaimEvaluation
228+
*/
229+
export const createMockClaimEvaluation = (status: ClaimStatus = "satisfied", claim?: Partial<EvaluationClaim>, evidence?: Partial<ClaimEvidence>): ClaimEvaluation => ({
230+
claim: createMockClaim(claim),
231+
status,
232+
evidence: createMockEvidence(evidence),
233+
});
234+
235+
/**
236+
* Create an array of mock results with varying metrics.
237+
* Useful for testing aggregation and statistics.
238+
*
239+
* @param executionTimes - Array of execution times
240+
* @param sut - SUT identifier
241+
* @param role - SUT role
242+
* @returns Array of EvaluationResults
243+
*/
244+
export const createMockResultsWithMetrics = (executionTimes: number[], sut: string, role: SutRole = "primary"): EvaluationResult[] => executionTimes.map((time, index) =>
245+
createMockResult({
246+
run: {
247+
runId: `${sut}-run-${index.toString().padStart(3, "0")}`,
248+
sut,
249+
sutRole: role,
250+
caseId: `case-${index.toString().padStart(3, "0")}`,
251+
},
252+
metrics: {
253+
numeric: {
254+
"execution-time": time,
255+
},
256+
},
257+
})
258+
);
259+
260+
/**
261+
* Create a minimal valid result for validation tests.
262+
* Returns the bare minimum fields required by the schema.
263+
*/
264+
export const createMinimalValidResult = (): EvaluationResult => ({
265+
run: {
266+
runId: "minimal-001",
267+
sut: "test-sut",
268+
sutRole: "primary",
269+
caseId: "test-case",
270+
},
271+
correctness: {
272+
expectedExists: false,
273+
producedOutput: true,
274+
valid: true,
275+
matchesExpected: null,
276+
},
277+
outputs: {},
278+
metrics: {
279+
numeric: {},
280+
},
281+
provenance: {
282+
runtime: {
283+
platform: "test",
284+
arch: "test",
285+
nodeVersion: "20.0.0",
286+
},
287+
},
288+
});
289+
290+
/**
291+
* Create an invalid result missing required fields.
292+
* Useful for testing validation logic.
293+
*/
294+
export const createInvalidResult = (): unknown => ({
295+
run: {
296+
// Missing runId, sut, sutRole, caseId
297+
},
298+
// Missing correctness, metrics, provenance
299+
});

0 commit comments

Comments
 (0)