OpenAdaptAI · abrichr · Mar 29, 2025 · Mar 28, 2025 · Mar 28, 2025 · Mar 29, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,50 @@
+# .github/workflows/ci.yml
+name: CI
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  lint_and_test:
+    runs-on: ubuntu-latest
+
+    steps:
+      # --- 1. Checkout Repository ---
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      # --- 2. Set up Python ---
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+
+      # --- 3. Install uv ---
+      - name: Install uv
+        run: curl -LsSf https://astral.sh/uv/install.sh | sh
+      - name: Add uv to PATH
+        run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH
+
+      # --- 4. Create Virtual Environment using uv --- ### ADDED STEP ###
+      - name: Create virtual environment
+        run: uv venv
+
+      # --- 5. Install Dependencies using uv --- ### Should now work ###
+      # uv pip install will now detect and use the .venv directory created above
+      - name: Install dependencies
+        run: uv pip install -e ".[test]"
+
+      # --- 6. Lint and Format Check with Ruff (via uv) ---
+      - name: Lint with Ruff
+        run: uv run ruff check .
+      - name: Check formatting with Ruff
+        run: uv run ruff format --check .
+
+      # --- 7. Run Tests with Pytest (via uv) ---
+      - name: Run tests
+        env:
+          ANTHROPIC_API_KEY: "ci_dummy_key"
+        run: uv run pytest tests/
diff --git a/demo.py b/demo.py
@@ -0,0 +1,195 @@
+# demo.py
+from typing import List
+import os
+import time
+
+from omnimcp.synthetic_ui import (
+    generate_login_screen,
+    # generate_logged_in_screen,
+    simulate_action,
+    draw_highlight,
+)
+from omnimcp.core import plan_action_for_ui
+from omnimcp.utils import logger
+
+# --- Configuration ---
+OUTPUT_DIR = "demo_output_multistep"
+SAVE_IMAGES = True
+MAX_STEPS = 6
+
+
+def run_multi_step_demo():
+    """Runs the multi-step OmniMCP demo using synthetic UI."""
+    logger.info("Starting OmniMCP Multi-Step Demo...")
+    os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+    # 1. Initial State & Goal
+    logger.info("Generating initial login screen...")
+    # Add save_path to ensure initial image is saved if needed for consistency checks
+    image, elements = generate_login_screen(
+        save_path=os.path.join(OUTPUT_DIR, "step_0_state_initial.png")
+    )
+    user_goal = "Log in using username 'testuser' and password 'password123'"
+    logger.info(f"User Goal: '{user_goal}'")
+
+    action_history: List[str] = []
+    goal_achieved_flag = False  # Use a flag to signal completion after the step runs
+
+    # --- Main Loop ---
+    for step in range(MAX_STEPS):
+        logger.info(f"\n--- Step {step + 1}/{MAX_STEPS} ---")
+
+        # Save/Show current state *before* planning/highlighting
+        current_state_img_path = os.path.join(OUTPUT_DIR, f"step_{step}_state.png")
+        if SAVE_IMAGES:
+            # Re-save the current state image at the start of each loop iteration
+            image.save(current_state_img_path)
+            logger.info(f"Saved current state to {current_state_img_path}")
+        # else: image.show(title=f"Step {step+1} - Current State")
+
+        # 2. Plan Next Action
+        logger.info("Planning action with LLM...")
+        try:
+            llm_plan, target_element = plan_action_for_ui(
+                elements, user_goal, action_history
+            )
+
+            logger.info(f"LLM Reasoning: {llm_plan.reasoning}")
+            logger.info(
+                f"LLM Proposed Action: {llm_plan.action} on Element ID: {llm_plan.element_id}"
+            )
+            if llm_plan.text_to_type:
+                logger.info(f"Text to Type: '{llm_plan.text_to_type}'")
+            logger.info(f"LLM Goal Complete Assessment: {llm_plan.is_goal_complete}")
+
+            # 3. Check for Goal Completion Flag (but don't break yet)
+            if llm_plan.is_goal_complete:
+                logger.info(
+                    "LLM flag indicates goal should be complete after this action."
+                )
+                goal_achieved_flag = True  # Set flag to break after this step
+
+            # Check if target element is valid before proceeding
+            # (Even if goal complete, we might need a target for logging/visualization)
+            if not target_element:
+                logger.error(
+                    f"LLM chose an invalid element ID ({llm_plan.element_id}). Stopping execution."
+                )
+                break
+
+            # 4. Visualize Planned Action (for the action planned in this step)
+            highlight_img_path = os.path.join(OUTPUT_DIR, f"step_{step}_highlight.png")
+            # Pass the llm_plan to the draw_highlight function
+            highlighted_image = draw_highlight(
+                image,
+                target_element,
+                plan=llm_plan,  # Pass the plan object here
+                color="lime",
+                width=4,
+                dim_factor=0.5,  # Adjust dimming if needed
+            )
+            if SAVE_IMAGES:
+                highlighted_image.save(highlight_img_path)
+                logger.info(
+                    f"Saved highlighted action with text to {highlight_img_path}"
+                )
+            # else: highlighted_image.show(title=f"Step {step+1} - Action Target")
+
+            # Record action for history *before* simulation changes state
+            action_desc = f"Action: {llm_plan.action}"
+            if llm_plan.text_to_type:
+                action_desc += f" '{llm_plan.text_to_type}'"
+            action_desc += (
+                f" on Element ID {target_element.id} ('{target_element.content}')"
+            )
+            action_history.append(action_desc)
+
+            # 5. Simulate Action -> Get New State (ALWAYS run this for the planned step)
+            logger.info("Simulating action...")
+            # Extract username now in case login is successful in this step
+            username = next(
+                (
+                    el.content
+                    for el in elements
+                    if el.id == 0 and el.type == "text_field"
+                ),
+                "User",
+            )
+
+            new_image, new_elements = simulate_action(
+                image, elements, llm_plan, username_for_login=username
+            )
+
+            # Check if state actually changed
+            # Simple check: Did the image object or element list reference change?
+            # A more robust check might involve image diff or deep element comparison
+            state_changed = (id(new_image) != id(image)) or (
+                id(new_elements) != id(elements)
+            )
+            # Add a basic content check for elements as deepcopy might create new list object always
+            if not state_changed and len(elements) == len(new_elements):
+                # Primitive check if element contents are roughly the same
+                if all(
+                    e1.to_dict() == e2.to_dict()
+                    for e1, e2 in zip(elements, new_elements)
+                ):
+                    state_changed = False
+                else:
+                    state_changed = (
+                        True  # Content differs even if list object ID didn't
+                    )
+
+            image, elements = (
+                new_image,
+                new_elements,
+            )  # Update state regardless for next loop iteration
+
+            if state_changed:
+                logger.info(
+                    f"State updated for next step. New element count: {len(elements)}"
+                )
+            else:
+                logger.warning(
+                    "Simulation did not result in a detectable state change."
+                )
+                # Decide whether to stop or continue if state doesn't change
+                # For now, let's continue but log it. Add 'break' here if needed.
+
+            # 6. NOW check the flag to break *after* simulation
+            if goal_achieved_flag:
+                logger.success(
+                    "Goal completion flag was set, ending loop after simulation."
+                )
+                break
+
+            # Pause briefly
+            time.sleep(1)
+
+        except Exception as e:
+            logger.error(f"Error during step {step + 1}: {e}", exc_info=True)
+            break  # Stop on error
+
+    # --- End of Loop ---
+    logger.info("\n--- Multi-Step Demo Finished ---")
+    # Check the flag, not just loop completion condition
+    if goal_achieved_flag:
+        logger.success("Overall goal marked as achieved by LLM during execution.")
+    elif step == MAX_STEPS - 1:
+        logger.warning(
+            f"Reached maximum steps ({MAX_STEPS}) without goal completion flag being set."
+        )
+    else:
+        logger.error(
+            "Execution stopped prematurely (check logs for errors or lack of state change)."
+        )
+
+    # Save final state (which is the state *after* the last successful simulation)
+    final_state_img_path = os.path.join(OUTPUT_DIR, "final_state.png")
+    if SAVE_IMAGES:
+        image.save(final_state_img_path)
+        logger.info(f"Saved final state to {final_state_img_path}")
+    # else: image.show(title="Final State")
+
+
+if __name__ == "__main__":
+    run_multi_step_demo()
diff --git a/demo_output/login_screen.png b/demo_output/login_screen.png
diff --git a/demo_output/login_screen_highlighted.png b/demo_output/login_screen_highlighted.png
diff --git a/demo_output_multistep/final_state.png b/demo_output_multistep/final_state.png
diff --git a/demo_output_multistep/step_0_highlight.png b/demo_output_multistep/step_0_highlight.png
diff --git a/demo_output_multistep/step_0_state.png b/demo_output_multistep/step_0_state.png
diff --git a/demo_output_multistep/step_0_state_initial.png b/demo_output_multistep/step_0_state_initial.png
diff --git a/demo_output_multistep/step_1_highlight.png b/demo_output_multistep/step_1_highlight.png
diff --git a/demo_output_multistep/step_1_state.png b/demo_output_multistep/step_1_state.png
diff --git a/demo_output_multistep/step_2_highlight.png b/demo_output_multistep/step_2_highlight.png
diff --git a/demo_output_multistep/step_2_state.png b/demo_output_multistep/step_2_state.png
diff --git a/omnimcp/completions.py b/omnimcp/completions.py
@@ -0,0 +1,139 @@
+# omnimcp/completions.py
+import json
+import time
+from typing import Dict, List, Type, TypeVar
+import anthropic
+from pydantic import BaseModel, ValidationError
+from tenacity import (
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_random_exponential,
+)
+
+from .config import config  # Assuming config has ANTHROPIC_API_KEY
+from .utils import logger  # Reuse logger from utils
+
+# Check for API key
+if not config.ANTHROPIC_API_KEY:
+    raise ValueError("ANTHROPIC_API_KEY not found in environment or .env file.")
+
+# Initialize Anthropic client
+client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
+
+# Type variable for the Pydantic response model
+T = TypeVar("T", bound=BaseModel)
+
+# Define specific exceptions we might want to retry on differently
+RETRYABLE_ERRORS = (
+    anthropic.RateLimitError,
+    anthropic.APIConnectionError,
+    anthropic.InternalServerError,
+)
+
+MAX_RETRIES = 3
+DEFAULT_MODEL = "claude-3-haiku-20240307"  # Or use Opus/Sonnet if needed
+
+
+@retry(
+    retry=retry_if_exception_type(RETRYABLE_ERRORS),
+    wait=wait_random_exponential(min=1, max=30),
+    stop=stop_after_attempt(MAX_RETRIES),
+    before_sleep=lambda retry_state: logger.warning(
+        f"LLM API Error (Attempt {retry_state.attempt_number}/{MAX_RETRIES}): "
+        f"{retry_state.outcome.exception()}. Retrying...",
+    ),
+)
+def call_llm_api(
+    messages: List[Dict[str, str]],
+    response_format: Type[T],
+    model: str = DEFAULT_MODEL,
+    temperature: float = 0.1,  # Lower temperature for more deterministic output
+    system_prompt: str | None = None,  # <-- Add system_prompt argument here
+) -> T:
+    """
+    Calls the Anthropic API, expecting a JSON response conforming to the pydantic model.
+
+    Args:
+        messages: List of message dictionaries (system prompt, user message).
+        response_format: The Pydantic model class for the expected JSON structure.
+        model: The Anthropic model to use.
+        temperature: The sampling temperature.
+        system_prompt: Optional system prompt string. <-- Added description
+
+    Returns:
+        An instance of the response_format Pydantic model.
+
+    Raises:
+        anthropic.APIError: If a non-retryable API error occurs.
+        ValueError: If the response is not valid JSON or doesn't match the schema.
+        Exception: After exceeding retry attempts for retryable errors.
+    """
+    logger.debug(
+        f"Calling Anthropic API (model: {model}) with {len(messages)} messages."
+    )
+    if system_prompt:
+        logger.debug(
+            f"System Prompt: {system_prompt[:100]}..."
+        )  # Log beginning of system prompt
+    start_time = time.time()
+
+    try:
+        response = client.messages.create(
+            model=model,
+            messages=messages,
+            system=system_prompt,  # <-- Pass system_prompt to the API call
+            max_tokens=1024,  # Adjust as needed
+            temperature=temperature,
+        )
+
+        duration_ms = int((time.time() - start_time) * 1000)
+        logger.debug(f"LLM API call completed in {duration_ms}ms.")
+
+        # Extract the text content
+        if not response.content:
+            logger.error("Received empty content list from API.")
+            raise ValueError("LLM response content is empty.")
+        response_text = response.content[0].text.strip()
+        logger.debug(f"Raw LLM response text:\n{response_text}")
+
+        # Clean potential markdown code fences
+        if response_text.startswith("```json"):
+            response_text = response_text[7:]
+        if response_text.endswith("```"):
+            response_text = response_text[:-3]
+        response_text = response_text.strip()
+
+        # Parse and validate the JSON response using the Pydantic model
+        try:
+            parsed_response = response_format.model_validate_json(response_text)
+            logger.info(
+                f"Successfully parsed LLM response into {response_format.__name__}."
+            )
+            return parsed_response
+        except ValidationError as e:
+            logger.error(
+                f"Failed to validate LLM JSON response against schema {response_format.__name__}."
+            )
+            logger.error(f"Validation Errors: {e}")
+            logger.error(f"Raw response was: {response_text}")
+            raise ValueError(
+                f"LLM response did not match the expected format: {e}"
+            ) from e
+        except json.JSONDecodeError as e:
+            logger.error("Failed to decode LLM response as JSON.")
+            logger.error(f"Raw response was: {response_text}")
+            raise ValueError(f"LLM response was not valid JSON: {e}") from e
+
+    except RETRYABLE_ERRORS as e:
+        logger.warning(f"Encountered retryable API error: {type(e).__name__} - {e}")
+        raise
+    except anthropic.APIError as e:
+        logger.error(f"Non-retryable Anthropic API error: {type(e).__name__} - {e}")
+        raise
+    except Exception as e:
+        logger.error(
+            f"Unexpected error during LLM API call: {type(e).__name__} - {e}",
+            exc_info=True,
+        )
+        raise