chore: update port configuration and improve connection logic

rebootyang · rebootyang · commit 5a01fe4ebbe4 · 2025-10-24T18:41:57.000+08:00
- Change default port from 5000 to 5001 to avoid conflicts with macOS services (e.g., AirTunes)
- Integrate new DroidAgent config API using config_manager
- Improve device connection logic to properly handle emulators and network devices
- Add timeout handling and error handling mechanisms
- Support custom API base URL configuration via --api-base flag
- Update README documentation with additional configuration options and examples
- Update dependency lock file
diff --git a/README.md b/README.md
@@ -93,7 +93,7 @@ droidworld check
 Execute from `droidrun-android-world` directory:
 ```bash
 # Example: add contact task
-droidworld run --tasks ContactsAddContact
+droidworld run --task ContactsAddContact
 ```
 
 ---
@@ -179,6 +179,12 @@ Run a specific task by name:
 droidworld run --task ContactsAddContact
 ```
 
+Run multiple specific tasks:
+
+```bash
+droidworld run --task ContactsAddContact --task ContactsDeleteContact
+```
+
 ### List Available Tasks
 
 View all available tasks with their IDs:
@@ -189,17 +195,66 @@ droidworld list-tasks
 
 ### Customizing the Benchmark
 
+#### LLM Provider Configuration
+
 ```bash
-# Run with a different LLM provider and model
-droidworld run --llm-provider Anthropic --llm-model claude-3-sonnet-20240229
+# Use Anthropic Claude
+droidworld run --task ContactsAddContact \
+  --llm-provider Anthropic \
+  --llm-model claude-3-sonnet-20240229
+
+# Use OpenAI-compatible API (e.g., third-party proxy)
+droidworld run --task ContactsAddContact \
+  --llm-provider OpenAILike \
+  --llm-model gemini-2.5-pro \
+  --api-base http://your-api-endpoint/v1
+
+# Enable vision and reasoning modes
+droidworld run --task ContactsAddContact \
+  --vision \
+  --reasoning
+```
+
+#### Task Family Selection
+
+Choose from different task families:
+- `android_world` (default): Full Android World task suite
+- `android`: Android-specific tasks
+- `miniwob`: MiniWoB tasks
+- `information_retrieval`: Information retrieval tasks
 
+```bash
+droidworld run --task-family android --min-task-idx 0 --max-task-idx 5
+```
+
+#### Performance Tuning
+
+```bash
 # Set maximum steps per task: multiplier * task complexity
-droidworld run --max-step-multiplier 15
+droidworld run --task ContactsAddContact --max-steps-multiplier 15
+
+# Set timeout: multiplier (in seconds) per task
+droidworld run --task ContactsAddContact --timeout-multiplier 300
+
+# Adjust LLM temperature
+droidworld run --task ContactsAddContact --temperature 0.7
+```
+
+#### Advanced Options
 
+```bash
 # Run multiple parameter combinations per task
-droidworld run --n-task-combinations 3
+droidworld run --task ContactsAddContact --n-task-combinations 3
+
+# Enable debug mode and tracing
+droidworld run --task ContactsAddContact --debug --tracing
+
+# Use custom environment URL and device serial
+droidworld run --task ContactsAddContact \
+  --env-url http://localhost:5001 \
+  --env-serial emulator-5554
 
-# Check all available configuration options with
+# Check all available configuration options
 droidworld run --help
 ```
 
diff --git a/droidrun b/droidrun
@@ -1 +1 @@
-Subproject commit d7bd2d5da856e865cf1d298085cbcbd165fcdc7f
+Subproject commit ec44159e8b8d00c7fb8efb0ddd4f7556cdff0484
diff --git a/eval/cli.py b/eval/cli.py
@@ -67,7 +67,7 @@ def version():
 @cli.command()
 @click.option(
     "--env-url",
-    default="http://localhost:5000",
+    default="http://localhost:5001",
     help="Android World Environment URL to use.",
 )
 def list_tasks(env_url):
@@ -81,7 +81,7 @@ def list_tasks(env_url):
 @cli.command()
 @click.option(
     "--env-url",
-    default="http://localhost:5000",
+    default="http://localhost:5001",
     help="Android World Environment URL to use.",
 )
 @click.option("--env-serial", default="emulator-5554", help="Device serial to use.")
@@ -110,7 +110,7 @@ def disable_overlay(env_serial):
 @cli.command()
 @click.option(
     "--env-url",
-    default="http://localhost:5000",
+    default="http://localhost:5001",
     help="Android World Environment URL to use.",
 )
 @click.option("--env-serial", default="emulator-5554", help="Device serial to use.")
@@ -124,6 +124,7 @@ def disable_overlay(env_serial):
 )
 @click.option("--llm-provider", default="Gemini", help="LLM provider to use.")
 @click.option("--llm-model", default="gemini-2.5-pro", help="LLM model to use.")
+@click.option("--api-base", default=None, help="Base URL for API (e.g., OpenAI-compatible API).")
 @click.option("--vision", is_flag=True, help="Enable vision.")
 @click.option("--reasoning", is_flag=True, help="Enable reasoning.")
 @click.option("--reflection", is_flag=True, help="Enable reflection.")
@@ -144,6 +145,7 @@ async def run(
     n_task_combinations,
     llm_provider,
     llm_model,
+    api_base,
     vision,
     reasoning,
     reflection,
@@ -185,7 +187,11 @@ async def run(
     logger.info(f"Found tasks: {', '.join(task_list)} ({len(task_list)})")
 
     logger.debug(f"Loading LLM: {llm_provider} {llm_model} {temperature}")
-    llm = load_llm(llm_provider, model=llm_model, temperature=temperature)
+    llm_kwargs = {"model": llm_model, "temperature": temperature}
+    if api_base:
+        llm_kwargs["api_base"] = api_base
+        logger.debug(f"Using custom API base: {api_base}")
+    llm = load_llm(llm_provider, **llm_kwargs)
     logger.debug("LLM loaded successfully")
 
     for task_name in task_list:
diff --git a/eval/env/boot.py b/eval/env/boot.py
@@ -24,13 +24,20 @@
 
 def ensure_connected(serial: str) -> AdbDevice:
     try:
-        res = adb.connect(serial)
-        if res.count("failed") > 0 or res.count("unable") > 0:
-            raise res
+        # For emulator devices (emulator-*), they're already connected locally
+        # Only try network connect for IP addresses
+        if ":" in serial or not serial.startswith("emulator-"):
+            res = adb.connect(serial)
+            if res.count("failed") > 0 or res.count("unable") > 0:
+                raise RuntimeError(f"Failed to connect: {res}")
+        
+        # Verify device is available
+        device = adb.device(serial)
+        # Test if device is accessible
+        device.shell("echo test")
+        return device
     except Exception as e:
         raise RuntimeError(f"Device {serial} is not connected: {e}")
-    
-    return adb.device(serial)
 
 
 def install_portal(device: AdbDevice):
diff --git a/eval/env/client.py b/eval/env/client.py
@@ -61,7 +61,7 @@ def parse_element(data: dict[str, Any]) -> representation_utils.UIElement:
 class AndroidEnvClient:
     """Client for interacting with the Android environment server."""
 
-    def __init__(self, base_url: str = "http://localhost:5000"):
+    def __init__(self, base_url: str = "http://localhost:5001"):
         logger.info(
             "Setting up Android environment using Docker - Initial setup may take"
             " 5-10 minutes. Please wait..."
diff --git a/eval/runner.py b/eval/runner.py
@@ -57,18 +57,41 @@ async def run_task_on_env(
     )
 
     tools = AndroidWorldTools(device_serial, env)
+    
+    # Import new config classes for DroidAgent
+    from droidrun.config_manager.config_manager import (
+        DroidrunConfig,
+        AgentConfig,
+        DeviceConfig,
+        LoggingConfig,
+        TracingConfig,
+        ManagerConfig,
+        ExecutorConfig,
+        CodeActConfig,
+    )
+    
+    # Build config for new DroidAgent API
+    agent_config = AgentConfig(
+        reasoning=reasoning,
+        max_steps=max_steps,
+        manager=ManagerConfig(vision=vision),
+        executor=ExecutorConfig(vision=vision),
+        codeact=CodeActConfig(vision=vision),
+    )
+    
+    config = DroidrunConfig(
+        agent=agent_config,
+        device=DeviceConfig(),
+        logging=LoggingConfig(debug=debug, save_trajectory="none"),
+        tracing=TracingConfig(enabled=tracing),
+    )
+    
     agent = DroidAgent(
         goal=task_goal,
-        llm=llm,
+        config=config,
+        llms=llm,  # New API accepts single LLM for all agents
         tools=tools,
-        reasoning=reasoning,
-        enable_tracing=tracing,
-        debug=debug,
-        max_steps=max_steps,
         timeout=timeout,
-        save_trajectories="none",
-        reflection=reflection,
-        vision=vision,
     )
 
     logger.debug("DroidAgent initialized successfully")
@@ -95,15 +118,25 @@ async def run_task_on_env(
         logger.warn(f"Droidrun timed out for task {task_name} {task_idx}: {e}")
         score = env.get_task_score(task_name, task_idx)
         logger.info(f"Task {task_name} {task_idx} score: {score}")
+        
+        # Create a simple result object for timeout
+        class TimeoutResult:
+            def __init__(self):
+                self.success = False
+                self.reason = f"Timeout after {timeout} seconds"
+                # Handle both old and new API for step counter
+                if hasattr(agent, 'step_counter'):
+                    self.steps = agent.step_counter
+                elif hasattr(agent, 'shared_state') and hasattr(agent.shared_state, 'step_number'):
+                    self.steps = agent.shared_state.step_number
+                else:
+                    self.steps = 0
+        
         result = get_task_result(
             task_result,
             agent,
             score=score,
-            agent_result={
-                "steps": agent.step_counter,
-                "success": False,
-                "reason": f"Timeout after {timeout} seconds",
-            },
+            agent_result=TimeoutResult(),
             device=device_serial,
         )
     except Exception as e:
diff --git a/eval/tracker.py b/eval/tracker.py
@@ -111,9 +111,17 @@ def get_task_result(
     task_result.success = score
 
     if agent_result is not None:
-        task_result.agent_success = agent_result["success"]
-        task_result.steps_taken = agent_result["steps"]
-        task_result.final_thought = agent_result["reason"]
+        # Handle both old dict format and new ResultEvent format
+        if hasattr(agent_result, 'success'):
+            # New API: ResultEvent object
+            task_result.agent_success = agent_result.success
+            task_result.steps_taken = agent_result.steps
+            task_result.final_thought = agent_result.reason
+        else:
+            # Old API: dict
+            task_result.agent_success = agent_result["success"]
+            task_result.steps_taken = agent_result["steps"]
+            task_result.final_thought = agent_result["reason"]
 
     if error is not None:
         task_result.error = error
@@ -126,7 +134,14 @@ def get_task_result(
     task_result.trajectory_stats = TrajectoryStats(
         **get_trajectory_statistics(task_result.trajectory)
     )
-    task_result.reasoning = agent.reasoning
+    # Handle both old and new API for reasoning attribute
+    if hasattr(agent, 'reasoning'):
+        task_result.reasoning = agent.reasoning
+    elif hasattr(agent, 'config') and hasattr(agent.config, 'agent'):
+        task_result.reasoning = agent.config.agent.reasoning
+    else:
+        task_result.reasoning = False
+        
     if device is not None:
         task_result.device = device
 
diff --git a/uv.lock b/uv.lock