Skip to content

Commit eaef7ba

Browse files
authored
Allow Docker-out-of-docker in AGBench (#6047)
This PR allows docker-out-of-docker scenarios to be run in agbench (e.g., agent teams that rely on the DockerCommandLineExecutor) This is becoming increasingly important for benchmarking and testing, since the behaviors of running local executors can diverge in important ways.
1 parent ff847cc commit eaef7ba

File tree

2 files changed

+25
-6
lines changed

2 files changed

+25
-6
lines changed

python/packages/agbench/src/agbench/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33

44
from typing_extensions import TypedDict
55

6+
from .linter.cli import lint_cli
67
from .remove_missing_cmd import remove_missing_cli
78
from .run_cmd import run_cli
89
from .tabulate_cmd import tabulate_cli
910
from .version import __version__
10-
from .linter.cli import lint_cli
1111

1212

1313
class CommandSpec(TypedDict):

python/packages/agbench/src/agbench/run_cmd.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,13 @@
77
import random
88
import re
99
import shutil
10+
import stat
1011
import subprocess
1112
import sys
1213
import time
1314
import traceback
1415
from multiprocessing import Pool
15-
from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast
16+
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union, cast
1617

1718
import docker
1819
import yaml
@@ -375,7 +376,7 @@ def replace_in_list(lst: List[Any]) -> None:
375376
replace_in_list(cast(List[Any], json_data)) # type: ignore
376377

377378

378-
def run_scenario_natively(work_dir: str, env: Mapping[str, str], timeout: int = TASK_TIMEOUT) -> None:
379+
def run_scenario_natively(work_dir: str, env: Dict[str, str], timeout: int = TASK_TIMEOUT) -> None:
379380
"""
380381
Run a scenario in the native environment.
381382
@@ -479,7 +480,7 @@ def run_scenario_natively(work_dir: str, env: Mapping[str, str], timeout: int =
479480

480481

481482
def run_scenario_in_docker(
482-
work_dir: str, env: Mapping[str, str], timeout: int = TASK_TIMEOUT, docker_image: Optional[str] = None
483+
work_dir: str, env: Dict[str, str], timeout: int = TASK_TIMEOUT, docker_image: Optional[str] = None
483484
) -> None:
484485
"""
485486
Run a scenario in a Docker environment.
@@ -594,8 +595,26 @@ def run_scenario_in_docker(
594595
autogen_repo_base = os.path.join(autogen_repo_base, "python")
595596
volumes[str(pathlib.Path(autogen_repo_base).absolute())] = {"bind": "/autogen_python", "mode": "rw"}
596597

598+
# Add the Docker socket if we are running on Linux
599+
# This allows docker-out-of-docker to work, but provides access to the Docker daemon on the host.
600+
# This maintains good isolation for experiment purposes (e.g., ensuring consistent initial conditions),
601+
# but deminishes the security benefits of using Docker (e.g., when facing a deliberately malicious agent).
602+
# since it would allow clients to mount privalaged images, volumes, etc.
603+
docker_host = os.environ.get("DOCKER_HOST", "unix:///var/run/docker.sock")
604+
if docker_host.startswith("unix://"):
605+
docker_socket = os.path.abspath(docker_host[7:])
606+
if os.path.exists(docker_socket):
607+
st_mode = os.stat(docker_socket).st_mode
608+
if stat.S_ISSOCK(st_mode):
609+
volumes[docker_socket] = {"bind": "/var/run/docker.sock", "mode": "rw"}
610+
611+
# Update the environment variables so that the inner docker client can
612+
# mount the workspace
613+
env = {k: v for k, v in env.items()}
614+
env["HOST_WORKSPACE"] = str(pathlib.Path(work_dir).absolute())
615+
597616
print("Mounting:")
598-
for k in volumes:
617+
for k in volumes.keys():
599618
bind = volumes[k]["bind"]
600619
mode = volumes[k]["mode"].upper()
601620
if bind == "/workspace":
@@ -609,7 +628,7 @@ def run_scenario_in_docker(
609628
image,
610629
command=["sh", "run.sh"],
611630
working_dir="/workspace",
612-
environment=dict(env),
631+
environment=env,
613632
detach=True,
614633
remove=True,
615634
auto_remove=True,

0 commit comments

Comments
 (0)