feat(executor): add timeout protection and plannedRuns parameter

Mearman · Mearman · commit 83fbfe0004ed · 2026-01-22T15:57:17.000Z
- Add Promise.race wrapper around sutInstance.run() to enforce per-run
  timeout limits (300s default for large graphs)
- Add optional plannedRuns parameter to execute() method to support
  parallel worker run filtering
- Use effectivePlannedRuns to avoid shadowing the parameter name

This prevents long-running graph traversals from blocking parallel
workers and allows workers to execute disjoint run subsets.
diff --git a/src/cli-commands/evaluate.ts b/src/cli-commands/evaluate.ts
@@ -616,6 +616,7 @@ const runExecutePhase = async (options: EvaluateOptions, sutRegistry: ExpansionS
 			await executeParallel(remainingRuns, suts, cases, executorConfigWithCallbacks, {
 				workers: options.parallelWorkers,
 				checkpointDir: executeDir,
+				timeoutMs: options.timeoutMs,
 			});
 
 			// Merge worker checkpoints after parallel execution (main process only)
@@ -641,7 +642,8 @@ const runExecutePhase = async (options: EvaluateOptions, sutRegistry: ExpansionS
 			}
 		} else {
 			// Single-process async execution (concurrent but single-threaded)
-			await executor.execute(suts, cases, extractMetrics);
+			// Pass remainingRuns to avoid re-executing completed runs
+			await executor.execute(suts, cases, extractMetrics, remainingRuns);
 		}
 	}
 
diff --git a/src/experiments/framework/executor/__tests__/executor.unit.test.ts b/src/experiments/framework/executor/__tests__/executor.unit.test.ts
@@ -0,0 +1,196 @@
+/**
+ * Unit tests for Executor
+ *
+ * Tests executor behavior including planned runs filtering.
+ */
+
+import { beforeEach, describe, expect, it } from "vitest";
+
+import type { CaseDefinition } from "../../types/case.js";
+import type { SutDefinition } from "../../types/sut.js";
+import { Executor } from "../executor.js";
+
+/**
+ * Mock expander for testing.
+ */
+class MockExpander {
+	async expand(): Promise<void> {
+		// No-op
+	}
+}
+
+/**
+ * Mock result for testing.
+ */
+interface MockResult {
+	value: string;
+}
+
+/**
+ * Create a mock SUT definition.
+ * @param id
+ */
+const createMockSut = (id: string): SutDefinition<MockExpander, MockResult> => ({
+	registration: {
+		id,
+		name: `Mock SUT ${id}`,
+		role: "primary",
+		version: "1.0.0",
+		config: Object.freeze({}),
+		tags: [],
+	},
+	factory: () =>
+		({
+			run: async () => ({ value: id }),
+		}) as never,
+});
+
+/**
+ * Create a mock case definition.
+ * @param id
+ */
+const createMockCase = (id: string): CaseDefinition<MockExpander> => ({
+	case: {
+		caseId: id,
+		caseClass: "test",
+		name: `Test Case ${id}`,
+		version: "1.0.0",
+		inputs: {},
+	},
+	createExpander: async () => new MockExpander(),
+	getSeeds: () => [],
+});
+
+describe("Executor", () => {
+	let executor: Executor<MockExpander, MockResult>;
+
+	beforeEach(() => {
+		executor = new Executor();
+	});
+
+	describe("plan", () => {
+		it("should generate planned runs for SUTs and cases", () => {
+			const suts = [createMockSut("sut1"), createMockSut("sut2")];
+			const cases = [createMockCase("case1"), createMockCase("case2")];
+
+			const planned = executor.plan(suts as never, cases as never);
+
+			expect(planned).toHaveLength(4); // 2 SUTs x 2 cases
+		});
+
+		it("should include repetition in planned runs", () => {
+			const executorWithRep = new Executor({ repetitions: 3 });
+			const suts = [createMockSut("sut1")];
+			const cases = [createMockCase("case1")];
+
+			const planned = executorWithRep.plan(suts as never, cases as never);
+
+			expect(planned).toHaveLength(3); // 1 SUT x 1 case x 3 reps
+			expect(planned[0].repetition).toBe(0);
+			expect(planned[1].repetition).toBe(1);
+			expect(planned[2].repetition).toBe(2);
+		});
+
+		it("should generate unique run IDs", () => {
+			const suts = [createMockSut("sut1")];
+			const cases = [createMockCase("case1"), createMockCase("case2")];
+
+			const planned = executor.plan(suts as never, cases as never);
+			const runIds = new Set(planned.map((r) => r.runId));
+
+			expect(runIds.size).toBe(planned.length); // All unique
+		});
+	});
+
+	describe("execute with plannedRuns parameter", () => {
+		it("should use provided plannedRuns instead of planning", async () => {
+			const suts = [createMockSut("sut1")];
+			const cases = [createMockCase("case1")];
+
+			// Plan all runs
+			const allPlanned = executor.plan(suts, cases);
+
+			// Filter to only run the first one
+			const filteredRuns = [allPlanned[0]];
+
+			const summary = await executor.execute(
+				suts as never,
+				cases as never,
+				() => ({}),
+				filteredRuns
+			);
+
+			expect(summary.totalRuns).toBe(1);
+			expect(summary.successfulRuns).toBe(1);
+		});
+
+		it("should use filtered plannedRuns for single execution", async () => {
+			const suts = [createMockSut("sut1")];
+			const cases = [createMockCase("case1"), createMockCase("case2")];
+
+			// Plan all runs
+			const allPlanned = executor.plan(suts, cases);
+
+			// Filter to only run half
+			const filteredRuns = allPlanned.slice(0, 1);
+
+			const summary = await executor.execute(
+				suts as never,
+				cases as never,
+				() => ({}),
+				filteredRuns
+			);
+
+			expect(summary.totalRuns).toBe(1);
+			expect(summary.successfulRuns).toBe(1);
+		});
+
+		it("should plan all runs when plannedRuns is undefined", async () => {
+			const suts = [createMockSut("sut1")];
+			const cases = [createMockCase("case1")];
+
+			const summary = await executor.execute(
+				suts as never,
+				cases as never,
+				() => ({})
+			);
+
+			expect(summary.totalRuns).toBe(1);
+			expect(summary.successfulRuns).toBe(1);
+		});
+
+		it("should work with empty plannedRuns array", async () => {
+			const suts = [createMockSut("sut1")];
+			const cases = [createMockCase("case1")];
+
+			const summary = await executor.execute(suts as never, cases as never, () => ({}), []);
+
+			expect(summary.totalRuns).toBe(0);
+			expect(summary.successfulRuns).toBe(0);
+		});
+	});
+
+	describe("execute with parallel execution", () => {
+		it("should use provided plannedRuns with concurrency > 1", async () => {
+			const executorWithConcurrency = new Executor({ concurrency: 2 });
+			const suts = [createMockSut("sut1"), createMockSut("sut2")];
+			const cases = [createMockCase("case1")];
+
+			// Plan all runs
+			const allPlanned = executorWithConcurrency.plan(suts as never, cases as never);
+
+			// Filter to only run one
+			const filteredRuns = [allPlanned[0]];
+
+			const summary = await executorWithConcurrency.execute(
+				suts as never,
+				cases as never,
+				() => ({}),
+				filteredRuns
+			);
+
+			expect(summary.totalRuns).toBe(1);
+			expect(summary.successfulRuns).toBe(1);
+		});
+	});
+});
diff --git a/src/experiments/framework/executor/executor.ts b/src/experiments/framework/executor/executor.ts
@@ -217,16 +217,18 @@ export class Executor<TExpander, TResult> {
 	 * @param suts - SUTs to execute
 	 * @param cases - Cases to run against
 	 * @param metricsExtractor - Function to extract metrics from result
+	 * @param plannedRuns - Optional pre-filtered planned runs (for parallel workers)
 	 * @returns Execution summary with all results
 	 */
 	async execute(
 		suts: SutDefinition<TExpander, TResult>[],
 		cases: CaseDefinition<TExpander>[],
-		metricsExtractor: (result: TResult) => Record<string, number>
+		metricsExtractor: (result: TResult) => Record<string, number>,
+		plannedRuns?: PlannedRun[]
 	): Promise<ExecutionSummary> {
 		const startTime = performance.now();
 
-		const plannedRuns = this.plan(suts, cases);
+		const effectivePlannedRuns = plannedRuns ?? this.plan(suts, cases);
 		const sutMap = new Map(suts.map((s) => [s.registration.id, s]));
 		const caseMap = new Map(cases.map((c) => [c.case.caseId, c]));
 
@@ -236,7 +238,7 @@ export class Executor<TExpander, TResult> {
 		if (concurrency <= 1) {
 			// Sequential execution (original behavior)
 			return this.executeSequential(
-				plannedRuns,
+				effectivePlannedRuns,
 				sutMap,
 				caseMap,
 				metricsExtractor,
@@ -246,7 +248,7 @@ export class Executor<TExpander, TResult> {
 
 		// Parallel execution with concurrency limit
 		return this.executeParallel(
-			plannedRuns,
+			effectivePlannedRuns,
 			sutMap,
 			caseMap,
 			metricsExtractor,
@@ -476,8 +478,14 @@ export class Executor<TExpander, TResult> {
 		// Create SUT instance
 		const sutInstance = sutDef.factory(expander, seeds, run.config);
 
-		// Execute
-		const sutResult = await sutInstance.run();
+		// Execute with timeout if configured
+		let sutResult: TResult;
+		sutResult = await (this.config.timeoutMs > 0 ? Promise.race([
+			sutInstance.run(),
+			new Promise<never>((_, reject) =>
+				setTimeout(() => reject(new Error(`Timeout after ${this.config.timeoutMs}ms`)), this.config.timeoutMs)
+			),
+		]) : sutInstance.run());
 
 		const executionTimeMs = performance.now() - runStartTime;
 
diff --git a/src/experiments/framework/executor/parallel-executor.ts b/src/experiments/framework/executor/parallel-executor.ts
@@ -13,11 +13,35 @@
 import { spawn } from "node:child_process";
 import { randomBytes } from "node:crypto";
 import { cpus } from "node:os";
-import { resolve } from "node:path";
+import { dirname, resolve } from "node:path";
 
 import type { EvaluationResult } from "../types/result.js";
 import type { ExecutorConfig, PlannedRun } from "./executor.js";
 
+/**
+ * Get the package root directory by resolving from the entry point script.
+ * The CLI entry point is dist/cli.js, so we go up one level from there.
+ */
+const getPackageRoot = (): string => {
+	// Get the directory containing the entry point script
+	// process.argv[1] is the path to the executed script (e.g., /path/to/graphbox/dist/cli.js)
+	const entryPoint = process.argv[1];
+
+	// Resolve to absolute path first (handles relative paths like "dist/cli.js")
+	const absoluteEntry = resolve(entryPoint);
+	const entryDir = dirname(absoluteEntry);
+
+	// If entry point is in dist/, go up one level to get package root
+	if (entryDir.endsWith("/dist") || entryDir.endsWith(String.raw`\dist`)) {
+		return entryDir.slice(0, -5); // Remove "/dist"
+	}
+
+	// Fallback: use current directory
+	return process.cwd();
+};
+
+const PACKAGE_ROOT = getPackageRoot();
+
 export interface ParallelExecutorOptions {
 	/** Number of parallel processes (default: CPU count) */
 	workers?: number;
@@ -27,6 +51,9 @@ export interface ParallelExecutorOptions {
 
 	/** Checkpoint directory (defaults to "results/execute") */
 	checkpointDir?: string;
+
+	/** Per-run timeout in milliseconds (0 = no timeout) */
+	timeoutMs?: number;
 }
 
 /**
@@ -89,10 +116,14 @@ export const shardPath = (checkpointDir: string, workerIndex: number): string =>
 export const executeParallel = async (runs: PlannedRun[], suts: unknown, cases: unknown[], config: ExecutorConfig & { onResult?: (result: EvaluationResult) => void }, options: ParallelExecutorOptions = {}): Promise<{ results: EvaluationResult[]; errors: Array<{ runId: string; error: string }> }> => {
 	const numberWorkers = options.workers ?? cpus().length;
 	const nodePath = options.nodePath ?? process.execPath;
-	const checkpointDir = options.checkpointDir ?? resolve(process.cwd(), "results/execute");
+	const checkpointDir = options.checkpointDir ?? resolve(PACKAGE_ROOT, "results/execute");
+	const timeoutMs = options.timeoutMs ?? config.timeoutMs ?? 0;
 
 	console.log(`ParallelExecutor: Spawning ${numberWorkers} processes for ${runs.length} runs`);
 	console.log(`Checkpoint directory: ${checkpointDir}`);
+	if (timeoutMs > 0) {
+		console.log(`Per-run timeout: ${timeoutMs}ms (${Math.round(timeoutMs / 1000)}s)`);
+	}
 
 	// Generate unique names for each worker
 	const workerNames = generateWorkerNames(numberWorkers);
@@ -122,15 +153,21 @@ export const executeParallel = async (runs: PlannedRun[], suts: unknown, cases:
 		const workerCheckpointPath = shardPath(checkpointDir, index);
 
 		const arguments_ = [
-			resolve(process.cwd(), "dist/cli.js"),
+			resolve(PACKAGE_ROOT, "dist/cli.js"),
 			"evaluate",
 			"--phase=execute",
 			"--checkpoint-mode=file",
 			`--run-filter=${runFilter}`, // JSON array - needs to be quoted in shell but spawn() handles this
 		];
 
+		// Add timeout if specified
+		if (timeoutMs > 0) {
+			arguments_.push(`--timeout=${timeoutMs}`);
+		}
+
 		return spawn(nodePath, arguments_, {
 			stdio: "inherit",
+			cwd: PACKAGE_ROOT, // Ensure workers use the package root as working directory
 			env: {
 				...process.env,
 				NODE_OPTIONS: "--max-old-space-size=4096",

Original file line number	Diff line number	Diff line change
`@@ -616,6 +616,7 @@ const runExecutePhase = async (options: EvaluateOptions, sutRegistry: ExpansionS`
`616`	`616`	`await executeParallel(remainingRuns, suts, cases, executorConfigWithCallbacks, {`
`617`	`617`	`workers: options.parallelWorkers,`
`618`	`618`	`checkpointDir: executeDir,`
	`619`	`+ timeoutMs: options.timeoutMs,`
`619`	`620`	`});`
`620`	`621`
`621`	`622`	`// Merge worker checkpoints after parallel execution (main process only)`
`@@ -641,7 +642,8 @@ const runExecutePhase = async (options: EvaluateOptions, sutRegistry: ExpansionS`
`641`	`642`	`}`
`642`	`643`	`} else {`
`643`	`644`	`// Single-process async execution (concurrent but single-threaded)`
`644`		`- await executor.execute(suts, cases, extractMetrics);`
	`645`	`+ // Pass remainingRuns to avoid re-executing completed runs`
	`646`	`+ await executor.execute(suts, cases, extractMetrics, remainingRuns);`
`645`	`647`	`}`
`646`	`648`	`}`
`647`	`649`