Skip to content

Commit d853fe3

Browse files
committed
test(checkpoint): add unit test for merge behavior
Add unit test for checkpoint merge behavior to validate shard combination logic.
1 parent 376fcbd commit d853fe3

File tree

1 file changed

+188
-0
lines changed

1 file changed

+188
-0
lines changed
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
/**
2+
* Diagnostic Unit Tests for Checkpoint Merge Bug
3+
*
4+
* Tests to reproduce the data loss bug where mergeShards() overwrites
5+
* the main checkpoint instead of combining it with shard results.
6+
*/
7+
8+
import { describe, it, expect, beforeEach, afterEach } from "vitest";
9+
import { readFileSync } from "node:fs";
10+
import { rmSync } from "node:fs";
11+
import { randomBytes } from "node:crypto";
12+
import { tmpdir } from "node:os";
13+
import { join } from "node:path";
14+
15+
import type { EvaluationResult } from "../../types/result.js";
16+
import type { CheckpointData } from "../checkpoint-manager.js";
17+
import { CheckpointManager } from "../checkpoint-manager.js";
18+
import { FileStorage } from "../checkpoint-storage.js";
19+
20+
describe("Checkpoint Merge Bug Diagnostics", () => {
21+
let testDir: string;
22+
let mainCheckpoint: CheckpointManager;
23+
let mainPath: string;
24+
25+
beforeEach(() => {
26+
testDir = join(tmpdir(), `checkpoint-merge-test-${randomBytes(8).toString("hex")}`);
27+
mainPath = join(testDir, "checkpoint.json");
28+
mainCheckpoint = new CheckpointManager({ storage: new FileStorage(mainPath) });
29+
});
30+
31+
/**
32+
* Helper to read checkpoint data directly from file
33+
*/
34+
function readCheckpointFile(path: string): CheckpointData {
35+
return JSON.parse(readFileSync(path, "utf-8"));
36+
}
37+
38+
afterEach(() => {
39+
rmSync(testDir, { recursive: true, force: true });
40+
});
41+
42+
it("diagnostic-1: mergeShards should combine main checkpoint with shards", async () => {
43+
// Setup: Create main checkpoint with 3 completed runs
44+
const run1: EvaluationResult = createMockResult("run-001", "sut-1", "case-1");
45+
const run2: EvaluationResult = createMockResult("run-002", "sut-1", "case-2");
46+
const run3: EvaluationResult = createMockResult("run-003", "sut-1", "case-3");
47+
48+
await mainCheckpoint.saveIncremental(run1);
49+
await mainCheckpoint.saveIncremental(run2);
50+
await mainCheckpoint.saveIncremental(run3);
51+
52+
const mainData = readCheckpointFile(mainPath);
53+
expect(mainData.completedRunIds).toHaveLength(3);
54+
55+
// Create worker shard with 1 additional run
56+
const shardPath = join(testDir, "checkpoint-worker-00.json");
57+
const shardStorage = new FileStorage(shardPath);
58+
await shardStorage.save({
59+
configHash: "test-hash",
60+
createdAt: new Date().toISOString(),
61+
updatedAt: new Date().toISOString(),
62+
completedRunIds: ["run-004"],
63+
results: {
64+
"run-004": createMockResult("run-004", "sut-1", "case-4"),
65+
},
66+
totalPlanned: 4,
67+
});
68+
69+
// Load checkpoint to reset internal state
70+
await mainCheckpoint.load();
71+
72+
// BUG: mergeShards will lose the 3 runs from main checkpoint!
73+
const merged = await mainCheckpoint.mergeShards([shardPath]);
74+
75+
const mainAfterMerge = readCheckpointFile(mainPath);
76+
console.log("Main checkpoint runs:", mainAfterMerge.completedRunIds.length);
77+
console.log("Merged runs:", merged.completedRunIds.length);
78+
console.log("Expected: 4 runs (3 from main + 1 from shard)");
79+
80+
// This should be 4, but will fail showing the bug
81+
expect(merged.completedRunIds).toHaveLength(4);
82+
});
83+
84+
it("diagnostic-2: demonstrates mergeShards creates new checkpoint instead of combining", async () => {
85+
// Setup main checkpoint with data
86+
await mainCheckpoint.saveIncremental(createMockResult("main-001", "sut-1", "case-1"));
87+
88+
const mainBeforeMerge = readCheckpointFile(mainPath);
89+
console.log("Main checkpoint before merge:", mainBeforeMerge.completedRunIds.length);
90+
91+
// Create empty shard (simulating worker that completed no runs)
92+
const shardPath = join(testDir, "checkpoint-worker-00.json");
93+
const shardStorage = new FileStorage(shardPath);
94+
await shardStorage.save({
95+
configHash: "test-hash",
96+
createdAt: new Date().toISOString(),
97+
updatedAt: new Date().toISOString(),
98+
completedRunIds: [],
99+
results: {},
100+
totalPlanned: 132,
101+
});
102+
103+
await mainCheckpoint.load();
104+
105+
// Merge empty shard
106+
await mainCheckpoint.mergeShards([shardPath]);
107+
108+
const mainAfterMerge = readCheckpointFile(mainPath);
109+
console.log("Main checkpoint after merge:", mainAfterMerge.completedRunIds.length);
110+
111+
// BUG: The main checkpoint's run is lost!
112+
expect(mainAfterMerge.completedRunIds.length).toBeGreaterThan(0);
113+
});
114+
115+
it("diagnostic-3: shows mergeShards doesn't preserve main checkpoint data", async () => {
116+
// Create main checkpoint
117+
await mainCheckpoint.saveIncremental(createMockResult("main-001", "sut-1", "case-1"));
118+
await mainCheckpoint.saveIncremental(createMockResult("main-002", "sut-2", "case-2"));
119+
120+
const mainDataBefore = readCheckpointFile(mainPath);
121+
const mainRuns = mainDataBefore.completedRunIds;
122+
console.log("Main checkpoint has runs:", mainRuns);
123+
124+
// Create worker shard
125+
const shardPath = join(testDir, "checkpoint-worker-00.json");
126+
const shardStorage = new FileStorage(shardPath);
127+
await shardStorage.save({
128+
configHash: "test-hash",
129+
createdAt: new Date().toISOString(),
130+
updatedAt: new Date().toISOString(),
131+
completedRunIds: ["shard-001"],
132+
results: {
133+
"shard-001": createMockResult("shard-001", "sut-3", "case-3"),
134+
},
135+
totalPlanned: 132,
136+
});
137+
138+
// Reload main checkpoint
139+
await mainCheckpoint.load();
140+
141+
// Merge
142+
const merged = await mainCheckpoint.mergeShards([shardPath]);
143+
144+
const mainDataAfter = readCheckpointFile(mainPath);
145+
console.log("After merge:");
146+
console.log(" Main checkpoint runs:", mainDataAfter.completedRunIds);
147+
console.log(" Merged runs:", merged.completedRunIds);
148+
console.log(" Expected: 3 runs (2 from main + 1 from shard)");
149+
console.log(" Actual:", merged.completedRunIds.length, "runs");
150+
151+
// Should have both main runs AND shard run
152+
const expectedRunIds = new Set([...mainRuns, "shard-001"]);
153+
const actualRunIds = new Set(merged.completedRunIds);
154+
155+
console.log(" Missing runs:", [...expectedRunIds].filter(x => !actualRunIds.has(x)));
156+
157+
expect(actualRunIds).toEqual(expectedRunIds);
158+
});
159+
});
160+
161+
/**
162+
* Create a mock evaluation result for testing
163+
*/
164+
function createMockResult(runId: string, sut: string, caseId: string): EvaluationResult {
165+
return {
166+
run: {
167+
runId,
168+
sut,
169+
sutRole: "primary" as const,
170+
sutVersion: "1.0.0",
171+
caseId,
172+
caseClass: "test-class",
173+
seed: 42,
174+
repetition: 0,
175+
},
176+
correctness: {
177+
expectedExists: false,
178+
producedOutput: true,
179+
valid: true,
180+
matchesExpected: null,
181+
},
182+
outputs: { summary: {} },
183+
metrics: { numeric: { test: 1 } },
184+
provenance: {
185+
runtime: { platform: "linux", arch: "x64", nodeVersion: "v22.0.0" },
186+
},
187+
};
188+
}

0 commit comments

Comments
 (0)