Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# .github/workflows/ci.yml
name: CI

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

jobs:
lint_and_test:
runs-on: ubuntu-latest

steps:
# --- 1. Checkout Repository ---
- name: Checkout repository
uses: actions/checkout@v4

# --- 2. Set up Python ---
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: '3.10'

# --- 3. Install uv ---
- name: Install uv
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Add uv to PATH
run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH

# --- 4. Create Virtual Environment using uv --- ### ADDED STEP ###
- name: Create virtual environment
run: uv venv

# --- 5. Install Dependencies using uv --- ### Should now work ###
# uv pip install will now detect and use the .venv directory created above
- name: Install dependencies
run: uv pip install -e ".[test]"

# --- 6. Lint and Format Check with Ruff (via uv) ---
- name: Lint with Ruff
run: uv run ruff check .
- name: Check formatting with Ruff
run: uv run ruff format --check .

# --- 7. Run Tests with Pytest (via uv) ---
- name: Run tests
env:
ANTHROPIC_API_KEY: "ci_dummy_key"
run: uv run pytest tests/
195 changes: 195 additions & 0 deletions demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
# demo.py
from typing import List
import os
import time

from omnimcp.synthetic_ui import (
generate_login_screen,
# generate_logged_in_screen,
simulate_action,
draw_highlight,
)
from omnimcp.core import plan_action_for_ui
from omnimcp.utils import logger

# --- Configuration ---
OUTPUT_DIR = "demo_output_multistep"
SAVE_IMAGES = True
MAX_STEPS = 6


def run_multi_step_demo():
"""Runs the multi-step OmniMCP demo using synthetic UI."""
logger.info("Starting OmniMCP Multi-Step Demo...")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# 1. Initial State & Goal
logger.info("Generating initial login screen...")
# Add save_path to ensure initial image is saved if needed for consistency checks
image, elements = generate_login_screen(
save_path=os.path.join(OUTPUT_DIR, "step_0_state_initial.png")
)
user_goal = "Log in using username 'testuser' and password 'password123'"
logger.info(f"User Goal: '{user_goal}'")

action_history: List[str] = []
goal_achieved_flag = False # Use a flag to signal completion after the step runs

# --- Main Loop ---
for step in range(MAX_STEPS):
logger.info(f"\n--- Step {step + 1}/{MAX_STEPS} ---")

# Save/Show current state *before* planning/highlighting
current_state_img_path = os.path.join(OUTPUT_DIR, f"step_{step}_state.png")
if SAVE_IMAGES:
# Re-save the current state image at the start of each loop iteration
image.save(current_state_img_path)
logger.info(f"Saved current state to {current_state_img_path}")
# else: image.show(title=f"Step {step+1} - Current State")

# 2. Plan Next Action
logger.info("Planning action with LLM...")
try:
llm_plan, target_element = plan_action_for_ui(
elements, user_goal, action_history
)

logger.info(f"LLM Reasoning: {llm_plan.reasoning}")
logger.info(
f"LLM Proposed Action: {llm_plan.action} on Element ID: {llm_plan.element_id}"
)
if llm_plan.text_to_type:
logger.info(f"Text to Type: '{llm_plan.text_to_type}'")
logger.info(f"LLM Goal Complete Assessment: {llm_plan.is_goal_complete}")

# 3. Check for Goal Completion Flag (but don't break yet)
if llm_plan.is_goal_complete:
logger.info(
"LLM flag indicates goal should be complete after this action."
)
goal_achieved_flag = True # Set flag to break after this step

# Check if target element is valid before proceeding
# (Even if goal complete, we might need a target for logging/visualization)
if not target_element:
logger.error(
f"LLM chose an invalid element ID ({llm_plan.element_id}). Stopping execution."
)
break

# 4. Visualize Planned Action (for the action planned in this step)
highlight_img_path = os.path.join(OUTPUT_DIR, f"step_{step}_highlight.png")
# Pass the llm_plan to the draw_highlight function
highlighted_image = draw_highlight(
image,
target_element,
plan=llm_plan, # Pass the plan object here
color="lime",
width=4,
dim_factor=0.5, # Adjust dimming if needed
)
if SAVE_IMAGES:
highlighted_image.save(highlight_img_path)
logger.info(
f"Saved highlighted action with text to {highlight_img_path}"
)
# else: highlighted_image.show(title=f"Step {step+1} - Action Target")

# Record action for history *before* simulation changes state
action_desc = f"Action: {llm_plan.action}"
if llm_plan.text_to_type:
action_desc += f" '{llm_plan.text_to_type}'"
action_desc += (
f" on Element ID {target_element.id} ('{target_element.content}')"
)
action_history.append(action_desc)

# 5. Simulate Action -> Get New State (ALWAYS run this for the planned step)
logger.info("Simulating action...")
# Extract username now in case login is successful in this step
username = next(
(
el.content
for el in elements
if el.id == 0 and el.type == "text_field"
),
"User",
)

new_image, new_elements = simulate_action(
image, elements, llm_plan, username_for_login=username
)

# Check if state actually changed
# Simple check: Did the image object or element list reference change?
# A more robust check might involve image diff or deep element comparison
state_changed = (id(new_image) != id(image)) or (
id(new_elements) != id(elements)
)
# Add a basic content check for elements as deepcopy might create new list object always
if not state_changed and len(elements) == len(new_elements):
# Primitive check if element contents are roughly the same
if all(
e1.to_dict() == e2.to_dict()
for e1, e2 in zip(elements, new_elements)
):
state_changed = False
else:
state_changed = (
True # Content differs even if list object ID didn't
)

image, elements = (
new_image,
new_elements,
) # Update state regardless for next loop iteration

if state_changed:
logger.info(
f"State updated for next step. New element count: {len(elements)}"
)
else:
logger.warning(
"Simulation did not result in a detectable state change."
)
# Decide whether to stop or continue if state doesn't change
# For now, let's continue but log it. Add 'break' here if needed.

# 6. NOW check the flag to break *after* simulation
if goal_achieved_flag:
logger.success(
"Goal completion flag was set, ending loop after simulation."
)
break

# Pause briefly
time.sleep(1)

except Exception as e:
logger.error(f"Error during step {step + 1}: {e}", exc_info=True)
break # Stop on error

# --- End of Loop ---
logger.info("\n--- Multi-Step Demo Finished ---")
# Check the flag, not just loop completion condition
if goal_achieved_flag:
logger.success("Overall goal marked as achieved by LLM during execution.")
elif step == MAX_STEPS - 1:
logger.warning(
f"Reached maximum steps ({MAX_STEPS}) without goal completion flag being set."
)
else:
logger.error(
"Execution stopped prematurely (check logs for errors or lack of state change)."
)

# Save final state (which is the state *after* the last successful simulation)
final_state_img_path = os.path.join(OUTPUT_DIR, "final_state.png")
if SAVE_IMAGES:
image.save(final_state_img_path)
logger.info(f"Saved final state to {final_state_img_path}")
# else: image.show(title="Final State")


if __name__ == "__main__":
run_multi_step_demo()
Binary file added demo_output/login_screen.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added demo_output/login_screen_highlighted.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added demo_output_multistep/final_state.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added demo_output_multistep/step_0_highlight.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added demo_output_multistep/step_0_state.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added demo_output_multistep/step_0_state_initial.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added demo_output_multistep/step_1_highlight.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added demo_output_multistep/step_1_state.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added demo_output_multistep/step_2_highlight.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added demo_output_multistep/step_2_state.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
139 changes: 139 additions & 0 deletions omnimcp/completions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
# omnimcp/completions.py
import json
import time
from typing import Dict, List, Type, TypeVar
import anthropic
from pydantic import BaseModel, ValidationError
from tenacity import (
retry,
retry_if_exception_type,
stop_after_attempt,
wait_random_exponential,
)

from .config import config # Assuming config has ANTHROPIC_API_KEY
from .utils import logger # Reuse logger from utils

# Check for API key
if not config.ANTHROPIC_API_KEY:
raise ValueError("ANTHROPIC_API_KEY not found in environment or .env file.")

# Initialize Anthropic client
client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)

# Type variable for the Pydantic response model
T = TypeVar("T", bound=BaseModel)

# Define specific exceptions we might want to retry on differently
RETRYABLE_ERRORS = (
anthropic.RateLimitError,
anthropic.APIConnectionError,
anthropic.InternalServerError,
)

MAX_RETRIES = 3
DEFAULT_MODEL = "claude-3-haiku-20240307" # Or use Opus/Sonnet if needed


@retry(
retry=retry_if_exception_type(RETRYABLE_ERRORS),
wait=wait_random_exponential(min=1, max=30),
stop=stop_after_attempt(MAX_RETRIES),
before_sleep=lambda retry_state: logger.warning(
f"LLM API Error (Attempt {retry_state.attempt_number}/{MAX_RETRIES}): "
f"{retry_state.outcome.exception()}. Retrying...",
),
)
def call_llm_api(
messages: List[Dict[str, str]],
response_format: Type[T],
model: str = DEFAULT_MODEL,
temperature: float = 0.1, # Lower temperature for more deterministic output
system_prompt: str | None = None, # <-- Add system_prompt argument here
) -> T:
"""
Calls the Anthropic API, expecting a JSON response conforming to the pydantic model.

Args:
messages: List of message dictionaries (system prompt, user message).
response_format: The Pydantic model class for the expected JSON structure.
model: The Anthropic model to use.
temperature: The sampling temperature.
system_prompt: Optional system prompt string. <-- Added description

Returns:
An instance of the response_format Pydantic model.

Raises:
anthropic.APIError: If a non-retryable API error occurs.
ValueError: If the response is not valid JSON or doesn't match the schema.
Exception: After exceeding retry attempts for retryable errors.
"""
logger.debug(
f"Calling Anthropic API (model: {model}) with {len(messages)} messages."
)
if system_prompt:
logger.debug(
f"System Prompt: {system_prompt[:100]}..."
) # Log beginning of system prompt
start_time = time.time()

try:
response = client.messages.create(
model=model,
messages=messages,
system=system_prompt, # <-- Pass system_prompt to the API call
max_tokens=1024, # Adjust as needed
temperature=temperature,
)

duration_ms = int((time.time() - start_time) * 1000)
logger.debug(f"LLM API call completed in {duration_ms}ms.")

# Extract the text content
if not response.content:
logger.error("Received empty content list from API.")
raise ValueError("LLM response content is empty.")
response_text = response.content[0].text.strip()
logger.debug(f"Raw LLM response text:\n{response_text}")

# Clean potential markdown code fences
if response_text.startswith("```json"):
response_text = response_text[7:]
if response_text.endswith("```"):
response_text = response_text[:-3]
response_text = response_text.strip()

# Parse and validate the JSON response using the Pydantic model
try:
parsed_response = response_format.model_validate_json(response_text)
logger.info(
f"Successfully parsed LLM response into {response_format.__name__}."
)
return parsed_response
except ValidationError as e:
logger.error(
f"Failed to validate LLM JSON response against schema {response_format.__name__}."
)
logger.error(f"Validation Errors: {e}")
logger.error(f"Raw response was: {response_text}")
raise ValueError(
f"LLM response did not match the expected format: {e}"
) from e
except json.JSONDecodeError as e:
logger.error("Failed to decode LLM response as JSON.")
logger.error(f"Raw response was: {response_text}")
raise ValueError(f"LLM response was not valid JSON: {e}") from e

except RETRYABLE_ERRORS as e:
logger.warning(f"Encountered retryable API error: {type(e).__name__} - {e}")
raise
except anthropic.APIError as e:
logger.error(f"Non-retryable Anthropic API error: {type(e).__name__} - {e}")
raise
except Exception as e:
logger.error(
f"Unexpected error during LLM API call: {type(e).__name__} - {e}",
exc_info=True,
)
raise
Loading