diff --git a/test_deploy_and_parse.py b/test_deploy_and_parse.py deleted file mode 100644 index 8a77f3e..0000000 --- a/test_deploy_and_parse.py +++ /dev/null @@ -1,97 +0,0 @@ -# test_deploy_and_parse.py - -""" -A simple script to test OmniParser deployment and basic image parsing. -Reuses config loading from omnimcp.config. -""" - -import sys -import json -from PIL import Image - -# Import config first to trigger .env loading -from omnimcp.config import config -from omnimcp.utils import logger, take_screenshot -from omnimcp.omniparser.client import OmniParserClient - - -if __name__ == "__main__": - logger.info("--- Starting OmniParser Deployment and Parse Test ---") - - # Optional: Check if config loaded AWS keys (for user feedback) - # Note: boto3 might still find credentials via ~/.aws/credentials even if not in .env/env vars - if config.AWS_ACCESS_KEY_ID and config.AWS_SECRET_ACCESS_KEY and config.AWS_REGION: - logger.info( - f"AWS config loaded via pydantic-settings (Region: {config.AWS_REGION})." - ) - else: - logger.warning( - "AWS credentials/region not found via config (env vars or .env)." - ) - logger.warning( - "Ensure credentials are configured where boto3 can find them (e.g., ~/.aws/credentials, env vars)." - ) - - # 1. Initialize Client (Triggers auto-deploy/discovery) - logger.info( - "Initializing OmniParserClient (this may take several minutes if deploying)..." - ) - try: - parser_client = OmniParserClient( - auto_deploy=True - ) # auto_deploy=True is default - logger.success( - f"OmniParserClient ready. Connected to server: {parser_client.server_url}" - ) - except Exception as e: - logger.error(f"Failed to initialize OmniParserClient: {e}", exc_info=True) - logger.error( - "Please check AWS credentials configuration and network connectivity." - ) - sys.exit(1) - - # 2. Take Screenshot - logger.info("Taking screenshot...") - try: - screenshot: Image.Image = take_screenshot() - logger.success("Screenshot taken successfully.") - try: - screenshot_path = "test_deploy_screenshot.png" - screenshot.save(screenshot_path) - logger.info(f"Saved screenshot for debugging to: {screenshot_path}") - except Exception as save_e: - logger.warning(f"Could not save debug screenshot: {save_e}") - except Exception as e: - logger.error(f"Failed to take screenshot: {e}", exc_info=True) - sys.exit(1) - - # 3. Parse Image - logger.info(f"Sending screenshot to OmniParser at {parser_client.server_url}...") - results = None - try: - results = parser_client.parse_image(screenshot) - logger.success("Received response from OmniParser.") - except Exception as e: - logger.error( - f"Unexpected error during client.parse_image call: {e}", exc_info=True - ) - sys.exit(1) - - # 4. Print Results - if isinstance(results, dict) and "error" in results: - logger.error(f"OmniParser server returned an error: {results['error']}") - elif isinstance(results, dict): - logger.success("OmniParser returned a successful response.") - logger.info("Raw JSON Result:") - try: - print(json.dumps(results, indent=2)) - except Exception as json_e: - logger.error(f"Could not format result as JSON: {json_e}") - print(results) - else: - logger.warning( - f"Received unexpected result format from OmniParser client: {type(results)}" - ) - print(results) - - logger.info("--- Test Finished ---") diff --git a/tests/test_deploy_and_parse.py b/tests/test_deploy_and_parse.py new file mode 100644 index 0000000..946fefb --- /dev/null +++ b/tests/test_deploy_and_parse.py @@ -0,0 +1,71 @@ +# tests/test_deploy_and_parse.py + +""" +A simple script to test OmniParser deployment, screenshotting, +parsing, and mapping to UIElements using VisualState. +""" + +import sys +import asyncio # Needed for async VisualState.update() + +# Import config first to trigger .env loading +from omnimcp.utils import logger # Assuming logger is configured + +# Import the classes needed +from omnimcp.omniparser.client import OmniParserClient +from omnimcp.omnimcp import VisualState # Import VisualState + +if __name__ == "__main__": + logger.info("--- Starting OmniParser Integration Test ---") + + # 1. Initialize Client (Triggers auto-deploy/discovery) + logger.info("Initializing OmniParserClient...") + parser_client = None + try: + parser_client = OmniParserClient(auto_deploy=True) + logger.success( + f"OmniParserClient ready. Server URL: {parser_client.server_url}" + ) + except Exception as e: + logger.error(f"Failed to initialize OmniParserClient: {e}", exc_info=True) + sys.exit(1) + + # 2. Initialize VisualState + logger.info("Initializing VisualState...") + visual_state_manager = VisualState(parser_client=parser_client) + + # 3. Update Visual State (Takes screenshot, parses, maps) + logger.info( + "Updating visual state (this takes screenshot, calls parser, maps results)..." + ) + try: + # Run the async update function + asyncio.run(visual_state_manager.update()) + + if not visual_state_manager.elements: + logger.warning("VisualState update completed, but no elements were mapped.") + logger.warning( + "Check OmniParser logs on the server or previous log messages for parser errors." + ) + else: + logger.success( + f"VisualState update successful. Mapped {len(visual_state_manager.elements)} elements." + ) + logger.info("First 5 mapped UI Elements:") + for i, element in enumerate(visual_state_manager.elements[:5]): + # Use a more readable format, perhaps to_prompt_repr or just key attributes + print( + f" {i}: ID={element.id}, Type={element.type}, Content='{element.content[:50]}...', Bounds={element.bounds}" + ) + + # You could now potentially pass visual_state_manager.elements to a planner + # logger.info("Next step would be to call the planner with these elements.") + + except Exception as e: + logger.error(f"Error during VisualState update: {e}", exc_info=True) + sys.exit(1) + + logger.info("--- Test Finished ---") + logger.info( + "Reminder: Run 'python omnimcp/omniparser/server.py stop' to shut down the EC2 instance." + ) diff --git a/tests/test_mapper.py b/tests/test_mapper.py new file mode 100644 index 0000000..cba72b3 --- /dev/null +++ b/tests/test_mapper.py @@ -0,0 +1,79 @@ +# tests/test_mapper.py + +import pytest + +from omnimcp.omniparser.mapper import map_omniparser_to_uielements +from omnimcp.types import Bounds + +# Sample based on partial output from previous run +SAMPLE_OMNIPARSER_JSON = { + "parsed_content_list": [ + { + "type": "textbox", # Example type + "bbox": [0.1, 0.1, 0.5, 0.2], # x_min, y_min, x_max, y_max + "content": "Some Text", + "confidence": 0.95, + "attributes": {}, + }, + { + "type": "button", + "bbox": [0.4, 0.4, 0.6, 0.5], + "content": "Click Me", + # Missing confidence/attributes + }, + { # Example with invalid bounds + "type": "icon", + "bbox": [1.1, 0.1, 1.2, 0.2], + "content": "Bad Icon", + }, + { # Example with missing bbox + "type": "text", + "content": "Text with no box", + }, + ] + # Add other top-level keys if they exist in real output +} + +IMG_WIDTH = 1000 +IMG_HEIGHT = 800 + + +def test_mapper_basic(): + elements = map_omniparser_to_uielements( + SAMPLE_OMNIPARSER_JSON, IMG_WIDTH, IMG_HEIGHT + ) + + # Expect 2 valid elements (textbox, button), the others skipped + assert len(elements) == 2 + + # Check first element (textbox) + assert elements[0].id == 0 + assert elements[0].type == "textbox" + assert elements[0].content == "Some Text" + assert elements[0].confidence == 0.95 + # Check calculated bounds (x, y, w, h) + expected_bounds_0: Bounds = (0.1, 0.1, 0.5 - 0.1, 0.2 - 0.1) + assert elements[0].bounds == pytest.approx( + expected_bounds_0 + ) # Use approx for float comparison + + # Check second element (button) + assert elements[1].id == 1 + assert elements[1].type == "button" + assert elements[1].content == "Click Me" + assert elements[1].confidence == 0.0 # Default confidence + expected_bounds_1: Bounds = (0.4, 0.4, 0.6 - 0.4, 0.5 - 0.4) + assert elements[1].bounds == pytest.approx(expected_bounds_1) + + +# Add more tests for edge cases, different types, etc. +def test_mapper_empty_input(): + elements = map_omniparser_to_uielements({}, IMG_WIDTH, IMG_HEIGHT) + assert len(elements) == 0 + elements = map_omniparser_to_uielements( + {"parsed_content_list": []}, IMG_WIDTH, IMG_HEIGHT + ) + assert len(elements) == 0 + + +# TODO: more test cases