Skip to content

Commit 4ee9f10

Browse files
committed
feat(agent-challenge): seed platform sdk execution
1 parent a00cb05 commit 4ee9f10

5 files changed

Lines changed: 197 additions & 7 deletions

File tree

docs/operations/validator.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,25 @@ Kubernetes mode requires PostgreSQL control-plane state. The installer provides
9898
registry-scoped `PLATFORM_BROKER_ALLOWED_IMAGES`. SQLite URLs, wildcards, and
9999
broad prefixes such as `platformnetwork/` fail settings validation.
100100

101+
102+
## Agent Challenge Platform SDK Execution Checks
103+
104+
Agent Challenge production Terminal-Bench rollout uses `platform_sdk` through the generic Platform broker. The public proxy must still expose only challenge public routes and must block `/internal/*`, `POST /internal/v1/submissions/{submission_id}/launch`, and generic benchmark execution-shaped routes such as `/benchmark-executions`; the broker is an internal execution substrate, not a public miner API.
105+
106+
Use placeholder commands only and avoid printing token values:
107+
108+
```bash
109+
kubectl -n <validator-namespace> get pods -l app.kubernetes.io/name=agent-challenge
110+
kubectl -n <validator-namespace> logs deployment/<agent-challenge-deployment> --since=30m | rg 'terminal_bench|platform_sdk|tb_running'
111+
kubectl -n <validator-namespace> logs deployment/<platform-broker-deployment> --since=30m | rg 'run request|created job|agent-challenge-terminal-bench-runner'
112+
kubectl -n <validator-namespace> logs deployment/<agent-challenge-deployment> --since=30m | rg -- '--environment-import-path agent_challenge_runner.platform_environment:PlatformEnvironment'
113+
! kubectl -n <validator-namespace> logs deployment/<agent-challenge-deployment> --since=30m | rg --fixed-strings -- '--env daytona'
114+
! kubectl -n <validator-namespace> logs deployment/<agent-challenge-deployment> --since=30m | rg --fixed-strings -- '--env platform'
115+
curl -sS '<api-base-url>/submissions/<submission-id>/status' | rg '"status":"evaluating"|"phase":"evaluation"|"status":"valid"|"status":"error"'
116+
```
117+
118+
Safe Agent Challenge knobs are `CHALLENGE_TERMINAL_BENCH_EXECUTION_BACKEND=platform_sdk`, broker URL plus token file, `CHALLENGE_PLATFORM_SDK_RUNNER_IMAGE=ghcr.io/platformnetwork/agent-challenge-terminal-bench-runner:latest`, `CHALLENGE_PLATFORM_SDK_ENVIRONMENT_IMPORT_PATH=agent_challenge_runner.platform_environment:PlatformEnvironment`, and a scoped allowed-image policy. Platform SDK Harbor commands use `--environment-import-path`, not `--env platform`, and production does not require Daytona credentials. Roll back to `harbor` only for non-production testing or for an explicitly credentialed legacy Harbor environment; production remains `platform_sdk` after rollout.
119+
101120
## Validation
102121

103122
```bash

src/platform_network/cli_app/main.py

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
ChallengeResources,
4545
ChallengeSpec,
4646
DockerOrchestrator,
47+
worker_command_from_metadata,
4748
)
4849
from platform_network.master.kubernetes_broker import (
4950
KubernetesBrokerRouterService,
@@ -184,6 +185,9 @@ async def _spec(self, slug: str) -> ChallengeSpec:
184185
env=record.env,
185186
resources=ChallengeResources.from_mapping(record.resources),
186187
required_capabilities=tuple(record.required_capabilities),
188+
worker_command=worker_command_from_metadata(
189+
getattr(record, "metadata", {}) or {}
190+
),
187191
)
188192

189193
async def pull(self, slug: str):
@@ -373,6 +377,12 @@ async def _run_master_weight_epoch_response(
373377

374378
PRISM_SLUG = "prism"
375379
AGENT_CHALLENGE_SLUG = "agent-challenge"
380+
AGENT_CHALLENGE_TERMINAL_BENCH_RUNNER_IMAGE = (
381+
"ghcr.io/platformnetwork/agent-challenge-terminal-bench-runner:latest"
382+
)
383+
AGENT_CHALLENGE_PLATFORM_ENVIRONMENT_IMPORT_PATH = (
384+
"agent_challenge_runner.platform_environment:PlatformEnvironment"
385+
)
376386
PRISM_IMAGE = "ghcr.io/platformnetwork/prism:latest"
377387
PRISM_EVALUATOR_IMAGE = "ghcr.io/platformnetwork/prism-evaluator:latest"
378388
PRISM_VERSION = "0.1.0"
@@ -401,6 +411,28 @@ def _prism_image_for_settings(image: str, settings: Any | None) -> str:
401411
return reference.pinned(resolve_remote_digest(reference))
402412

403413

414+
def _agent_challenge_platform_sdk_env(settings: Any | None) -> dict[str, str]:
415+
broker_url = _settings_docker_broker_url(settings)
416+
docker_broker_token_file = f"{DEFAULT_SECRET_MOUNT_DIR}/docker_broker_token"
417+
return {
418+
"CHALLENGE_BENCHMARK_BACKEND": "terminal_bench",
419+
"CHALLENGE_DOCKER_ENABLED": "true",
420+
"CHALLENGE_DOCKER_BACKEND": "broker",
421+
"CHALLENGE_DOCKER_BROKER_URL": broker_url,
422+
"CHALLENGE_DOCKER_BROKER_TOKEN_FILE": docker_broker_token_file,
423+
"CHALLENGE_DOCKER_NETWORK": "default",
424+
"CHALLENGE_HARBOR_ENV": "",
425+
"CHALLENGE_HARBOR_INSTALL_MODE": "prebuilt",
426+
"CHALLENGE_PLATFORM_SDK_ENVIRONMENT_IMPORT_PATH": (
427+
AGENT_CHALLENGE_PLATFORM_ENVIRONMENT_IMPORT_PATH
428+
),
429+
"CHALLENGE_PLATFORM_SDK_RUNNER_IMAGE": (
430+
AGENT_CHALLENGE_TERMINAL_BENCH_RUNNER_IMAGE
431+
),
432+
"CHALLENGE_TERMINAL_BENCH_EXECUTION_BACKEND": "platform_sdk",
433+
}
434+
435+
404436
def prism_challenge_create(settings: Any | None = None) -> ChallengeCreate:
405437
challenge_token_file = f"{DEFAULT_SECRET_MOUNT_DIR}/challenge_token"
406438
docker_broker_token_file = f"{DEFAULT_SECRET_MOUNT_DIR}/docker_broker_token"
@@ -475,10 +507,23 @@ async def seed_prism_challenges(
475507
except (ChallengeNotFoundError, KeyError):
476508
result[AGENT_CHALLENGE_SLUG] = "missing"
477509
else:
510+
record = await _resolve(registry.get(AGENT_CHALLENGE_SLUG))
511+
metadata = dict(getattr(record, "metadata", {}) or {})
512+
metadata["worker_command"] = ["agent-challenge-worker"]
513+
env = dict(getattr(record, "env", {}) or {})
514+
env.update(_agent_challenge_platform_sdk_env(settings))
515+
required_capabilities = set(getattr(record, "required_capabilities", []) or [])
516+
required_capabilities.update({"docker_executor", "get_weights", "proxy_routes"})
478517
await _resolve(
479518
registry.update(
480519
AGENT_CHALLENGE_SLUG,
481-
ChallengeUpdate(emission_percent=AGENT_CHALLENGE_EMISSION_PERCENT),
520+
ChallengeUpdate(
521+
emission_percent=AGENT_CHALLENGE_EMISSION_PERCENT,
522+
env=env,
523+
metadata=metadata,
524+
required_capabilities=sorted(required_capabilities),
525+
secrets=["challenge_token", "docker_broker_token"],
526+
),
482527
)
483528
)
484529
result[AGENT_CHALLENGE_SLUG] = "updated"
@@ -923,12 +968,18 @@ async def refresh() -> None:
923968
if settings.kubernetes.challenge_mode == "statefulset"
924969
else "Deployment"
925970
)
926-
kube_client.patch_workload_image(
927-
kind=workload_kind,
928-
name=workload_name,
929-
container="challenge",
930-
image=desired,
931-
)
971+
containers = ["challenge"]
972+
if worker_command_from_metadata(
973+
getattr(record, "metadata", {}) or {}
974+
):
975+
containers.append("worker")
976+
for container in containers:
977+
kube_client.patch_workload_image(
978+
kind=workload_kind,
979+
name=workload_name,
980+
container=container,
981+
image=desired,
982+
)
932983
typer.echo(
933984
f"{record.slug}: patched {workload_kind}/{workload_name}"
934985
)

src/platform_network/master/docker_orchestrator.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import re
1515
import stat
1616
import time
17+
from collections.abc import Mapping, Sequence
1718
from dataclasses import dataclass, field
1819
from pathlib import Path
1920
from typing import Any
@@ -141,6 +142,7 @@ class ChallengeSpec:
141142
required_capabilities: tuple[str, ...] = ("get_weights", "proxy_routes")
142143
expected_api_version: str = DEFAULT_API_VERSION
143144
port: int = DEFAULT_CHALLENGE_PORT
145+
worker_command: tuple[str, ...] = ()
144146

145147
@property
146148
def safe_slug(self) -> str:
@@ -180,6 +182,20 @@ def all_secrets(self) -> dict[str, str]:
180182
return secrets
181183

182184

185+
def worker_command_from_metadata(metadata: Mapping[str, Any]) -> tuple[str, ...]:
186+
raw = metadata.get("worker_command")
187+
if raw is None:
188+
return ()
189+
if isinstance(raw, str) or not isinstance(raw, Sequence):
190+
raise DockerOrchestrationError("worker_command metadata must be a string list")
191+
command = tuple(raw)
192+
if not command or any(not isinstance(item, str) or not item for item in command):
193+
raise DockerOrchestrationError(
194+
"worker_command metadata must be a non-empty string list"
195+
)
196+
return command
197+
198+
183199
@dataclass(frozen=True)
184200
class ChallengeRuntime:
185201
"""In-memory runtime state for a started challenge."""

src/platform_network/validator/normal_runner.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from platform_network.master.docker_orchestrator import (
1313
ChallengeResources,
1414
ChallengeSpec,
15+
worker_command_from_metadata,
1516
)
1617
from platform_network.schemas.challenge import ChallengeStatus
1718
from platform_network.schemas.weights import MasterWeightsResponse
@@ -53,6 +54,9 @@ async def run_once(self) -> None:
5354
env=challenge.env,
5455
resources=ChallengeResources.from_mapping(challenge.resources),
5556
required_capabilities=tuple(challenge.required_capabilities),
57+
worker_command=worker_command_from_metadata(
58+
getattr(challenge, "metadata", {}) or {}
59+
),
5660
)
5761
self.orchestrator.start_challenge(spec)
5862

tests/unit/test_client_service_cli_config.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ async def get_weights(self, **kwargs: object) -> ChallengeWeightsResult:
263263
volumes={},
264264
env={},
265265
secrets=[],
266+
metadata={"worker_command": ["agent-challenge-worker"]},
266267
)
267268
setter = Setter()
268269
service = MasterWeightService(
@@ -293,6 +294,7 @@ def start_challenge(self, spec):
293294
assert orchestrator.specs[0].slug == "demo"
294295
assert orchestrator.specs[0].resources.cpu == 2.0
295296
assert orchestrator.specs[0].resources.memory == "1g"
297+
assert orchestrator.specs[0].worker_command == ("agent-challenge-worker",)
296298

297299

298300
@pytest.mark.asyncio
@@ -527,6 +529,7 @@ def test_cli_create_and_runtime_controller(tmp_path: Path) -> None:
527529
image="ghcr.io/o/demo:1",
528530
version="1",
529531
resources={"cpus": "1.5", "memory": "2g"},
532+
metadata={"worker_command": ["agent-challenge-worker"]},
530533
)
531534
)
532535

@@ -549,6 +552,7 @@ def restart_challenge(self, spec):
549552
assert asyncio.run(controller.restart("demo"))["detail"] == "challenge-demo"
550553
assert orchestrator.specs[0].resources.cpu == 1.5
551554
assert orchestrator.specs[0].resources.memory == "2g"
555+
assert orchestrator.specs[0].worker_command == ("agent-challenge-worker",)
552556
assert asyncio.run(controller.status("demo"))["status"] == "unknown"
553557

554558

@@ -1441,6 +1445,30 @@ def test_seed_prism_challenges_is_idempotent_and_preserves_tokens() -> None:
14411445
assert "token" not in prism.metadata
14421446
assert "database_url" not in prism.metadata
14431447
assert agent.emission_percent == Decimal("15")
1448+
assert agent.metadata["worker_command"] == ["agent-challenge-worker"]
1449+
assert agent.required_capabilities == [
1450+
"docker_executor",
1451+
"get_weights",
1452+
"proxy_routes",
1453+
]
1454+
assert agent.secrets == ["challenge_token", "docker_broker_token"]
1455+
assert agent.env["CHALLENGE_BENCHMARK_BACKEND"] == "terminal_bench"
1456+
assert agent.env["CHALLENGE_DOCKER_ENABLED"] == "true"
1457+
assert agent.env["CHALLENGE_DOCKER_BACKEND"] == "broker"
1458+
assert agent.env["CHALLENGE_DOCKER_BROKER_URL"] == "http://platform-broker:8082"
1459+
assert agent.env["CHALLENGE_DOCKER_BROKER_TOKEN_FILE"] == (
1460+
"/run/secrets/platform/docker_broker_token"
1461+
)
1462+
assert agent.env["CHALLENGE_DOCKER_NETWORK"] == "default"
1463+
assert agent.env["CHALLENGE_HARBOR_ENV"] == ""
1464+
assert agent.env["CHALLENGE_HARBOR_INSTALL_MODE"] == "prebuilt"
1465+
assert agent.env["CHALLENGE_PLATFORM_SDK_ENVIRONMENT_IMPORT_PATH"] == (
1466+
"agent_challenge_runner.platform_environment:PlatformEnvironment"
1467+
)
1468+
assert agent.env["CHALLENGE_PLATFORM_SDK_RUNNER_IMAGE"] == (
1469+
"ghcr.io/platformnetwork/agent-challenge-terminal-bench-runner:latest"
1470+
)
1471+
assert agent.env["CHALLENGE_TERMINAL_BENCH_EXECUTION_BACKEND"] == "platform_sdk"
14441472

14451473

14461474
def test_seed_prism_challenges_pins_images_for_production_policy(
@@ -1588,6 +1616,78 @@ def patch_workload_image(self, **kwargs: object) -> None:
15881616
assert "demo: patched StatefulSet/challenge-demo" in result.output
15891617

15901618

1619+
def test_master_refresh_challenge_images_patches_worker_container_when_configured(
1620+
monkeypatch: pytest.MonkeyPatch,
1621+
) -> None:
1622+
digest = "sha256:" + "a" * 64
1623+
image = f"ghcr.io/platformnetwork/agent-challenge:latest@{digest}"
1624+
patches: list[dict[str, object]] = []
1625+
1626+
class Registry:
1627+
async def list(self):
1628+
return [
1629+
SimpleNamespace(
1630+
slug="agent-challenge",
1631+
image=image,
1632+
status=ChallengeStatus.ACTIVE,
1633+
metadata={"worker_command": ["agent-challenge-worker"]},
1634+
)
1635+
]
1636+
1637+
async def update(self, slug: str, update: object) -> None:
1638+
raise AssertionError("already-current image must not update registry")
1639+
1640+
class KubeClient:
1641+
def __init__(self, **kwargs: object) -> None:
1642+
patches.append({"init": kwargs})
1643+
1644+
def patch_workload_image(self, **kwargs: object) -> None:
1645+
patches.append(kwargs)
1646+
1647+
settings = SimpleNamespace(
1648+
runtime=SimpleNamespace(backend="kubernetes"),
1649+
kubernetes=SimpleNamespace(
1650+
namespace="platform-master",
1651+
in_cluster=True,
1652+
kubeconfig=None,
1653+
challenge_mode="statefulset",
1654+
),
1655+
)
1656+
1657+
import platform_network.kubernetes.client as kube_module
1658+
import platform_network.validator.image_updater as image_updater_module
1659+
1660+
monkeypatch.setattr(cli_module, "load_settings", lambda config: settings)
1661+
monkeypatch.setattr(cli_module, "_master_registry", lambda settings: Registry())
1662+
monkeypatch.setattr(
1663+
cli_module, "_challenge_orchestrator", lambda settings: object()
1664+
)
1665+
monkeypatch.setattr(kube_module, "KubernetesClient", KubeClient)
1666+
monkeypatch.setattr(
1667+
image_updater_module,
1668+
"resolve_remote_digest",
1669+
lambda image_reference: digest,
1670+
)
1671+
1672+
result = CliRunner().invoke(
1673+
app, ["master", "refresh-challenge-images", "--config", "unused.yaml"]
1674+
)
1675+
1676+
assert result.exit_code == 0, result.output
1677+
assert {
1678+
"kind": "StatefulSet",
1679+
"name": "challenge-agent-challenge",
1680+
"container": "challenge",
1681+
"image": image,
1682+
} in patches
1683+
assert {
1684+
"kind": "StatefulSet",
1685+
"name": "challenge-agent-challenge",
1686+
"container": "worker",
1687+
"image": image,
1688+
} in patches
1689+
1690+
15911691
def test_registry_client_with_asgi_transport(monkeypatch: pytest.MonkeyPatch) -> None:
15921692
async def handler(request: httpx.Request) -> httpx.Response:
15931693
return httpx.Response(

0 commit comments

Comments
 (0)