Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 0 additions & 97 deletions test_deploy_and_parse.py

This file was deleted.

71 changes: 71 additions & 0 deletions tests/test_deploy_and_parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# tests/test_deploy_and_parse.py

"""
A simple script to test OmniParser deployment, screenshotting,
parsing, and mapping to UIElements using VisualState.
"""

import sys
import asyncio # Needed for async VisualState.update()

# Import config first to trigger .env loading
from omnimcp.utils import logger # Assuming logger is configured

# Import the classes needed
from omnimcp.omniparser.client import OmniParserClient
from omnimcp.omnimcp import VisualState # Import VisualState

if __name__ == "__main__":
logger.info("--- Starting OmniParser Integration Test ---")

# 1. Initialize Client (Triggers auto-deploy/discovery)
logger.info("Initializing OmniParserClient...")
parser_client = None
try:
parser_client = OmniParserClient(auto_deploy=True)
logger.success(
f"OmniParserClient ready. Server URL: {parser_client.server_url}"
)
except Exception as e:
logger.error(f"Failed to initialize OmniParserClient: {e}", exc_info=True)
sys.exit(1)

# 2. Initialize VisualState
logger.info("Initializing VisualState...")
visual_state_manager = VisualState(parser_client=parser_client)

# 3. Update Visual State (Takes screenshot, parses, maps)
logger.info(
"Updating visual state (this takes screenshot, calls parser, maps results)..."
)
try:
# Run the async update function
asyncio.run(visual_state_manager.update())

if not visual_state_manager.elements:
logger.warning("VisualState update completed, but no elements were mapped.")
logger.warning(
"Check OmniParser logs on the server or previous log messages for parser errors."
)
else:
logger.success(
f"VisualState update successful. Mapped {len(visual_state_manager.elements)} elements."
)
logger.info("First 5 mapped UI Elements:")
for i, element in enumerate(visual_state_manager.elements[:5]):
# Use a more readable format, perhaps to_prompt_repr or just key attributes
print(
f" {i}: ID={element.id}, Type={element.type}, Content='{element.content[:50]}...', Bounds={element.bounds}"
)

# You could now potentially pass visual_state_manager.elements to a planner
# logger.info("Next step would be to call the planner with these elements.")

except Exception as e:
logger.error(f"Error during VisualState update: {e}", exc_info=True)
sys.exit(1)

logger.info("--- Test Finished ---")
logger.info(
"Reminder: Run 'python omnimcp/omniparser/server.py stop' to shut down the EC2 instance."
)
79 changes: 79 additions & 0 deletions tests/test_mapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# tests/test_mapper.py

import pytest

from omnimcp.omniparser.mapper import map_omniparser_to_uielements
from omnimcp.types import Bounds

# Sample based on partial output from previous run
SAMPLE_OMNIPARSER_JSON = {
"parsed_content_list": [
{
"type": "textbox", # Example type
"bbox": [0.1, 0.1, 0.5, 0.2], # x_min, y_min, x_max, y_max
"content": "Some Text",
"confidence": 0.95,
"attributes": {},
},
{
"type": "button",
"bbox": [0.4, 0.4, 0.6, 0.5],
"content": "Click Me",
# Missing confidence/attributes
},
{ # Example with invalid bounds
"type": "icon",
"bbox": [1.1, 0.1, 1.2, 0.2],
"content": "Bad Icon",
},
{ # Example with missing bbox
"type": "text",
"content": "Text with no box",
},
]
# Add other top-level keys if they exist in real output
}

IMG_WIDTH = 1000
IMG_HEIGHT = 800


def test_mapper_basic():
elements = map_omniparser_to_uielements(
SAMPLE_OMNIPARSER_JSON, IMG_WIDTH, IMG_HEIGHT
)

# Expect 2 valid elements (textbox, button), the others skipped
assert len(elements) == 2

# Check first element (textbox)
assert elements[0].id == 0
assert elements[0].type == "textbox"
assert elements[0].content == "Some Text"
assert elements[0].confidence == 0.95
# Check calculated bounds (x, y, w, h)
expected_bounds_0: Bounds = (0.1, 0.1, 0.5 - 0.1, 0.2 - 0.1)
assert elements[0].bounds == pytest.approx(
expected_bounds_0
) # Use approx for float comparison

# Check second element (button)
assert elements[1].id == 1
assert elements[1].type == "button"
assert elements[1].content == "Click Me"
assert elements[1].confidence == 0.0 # Default confidence
expected_bounds_1: Bounds = (0.4, 0.4, 0.6 - 0.4, 0.5 - 0.4)
assert elements[1].bounds == pytest.approx(expected_bounds_1)


# Add more tests for edge cases, different types, etc.
def test_mapper_empty_input():
elements = map_omniparser_to_uielements({}, IMG_WIDTH, IMG_HEIGHT)
assert len(elements) == 0
elements = map_omniparser_to_uielements(
{"parsed_content_list": []}, IMG_WIDTH, IMG_HEIGHT
)
assert len(elements) == 0


# TODO: more test cases