Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions omnimcp/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# omnimcp/config.py

"""Configuration management for OmniMCP."""

import os
Expand All @@ -9,18 +11,21 @@

class OmniMCPConfig(BaseSettings):
"""Configuration settings for OmniMCP."""

# Claude API configuration
ANTHROPIC_API_KEY: Optional[str] = None


# Auto-shutdown OmniParser after 60min inactivity
INACTIVITY_TIMEOUT_MINUTES: int = 60

# OmniParser configuration
OMNIPARSER_URL: Optional[str] = None

# AWS deployment settings (for remote OmniParser)
AWS_ACCESS_KEY_ID: Optional[str] = None
AWS_SECRET_ACCESS_KEY: Optional[str] = None
AWS_REGION: Optional[str] = "us-west-2"

# OmniParser deployment configuration
PROJECT_NAME: str = "omniparser"
REPO_URL: str = "https://github.com/microsoft/OmniParser.git"
Expand All @@ -30,19 +35,20 @@ class OmniMCPConfig(BaseSettings):
AWS_EC2_USER: str = "ubuntu"
PORT: int = 8000 # FastAPI port
COMMAND_TIMEOUT: int = 600 # 10 minutes

# Debug settings
DEBUG: bool = False
LOG_LEVEL: str = "INFO"

class Config:
"""Pydantic settings configuration."""

env_file = ".env"
env_file_encoding = "utf-8"

# Allow extra fields in the settings
extra = "ignore"

# Properties for OmniParser deployment
@property
def CONTAINER_NAME(self) -> str:
Expand All @@ -68,4 +74,4 @@ def AWS_EC2_SECURITY_GROUP(self) -> str:


# Create a global config instance
config = OmniMCPConfig()
config = OmniMCPConfig()
2 changes: 2 additions & 0 deletions omnimcp/omniparser/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# omnimcp/ominparser/Dockerfile

FROM nvidia/cuda:12.3.1-devel-ubuntu22.04

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
Expand Down
26 changes: 13 additions & 13 deletions omnimcp/omniparser/client.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# omnimcp/omniparser/client.py

"""Client module for interacting with the OmniParser server."""

import base64
Expand Down Expand Up @@ -36,14 +38,15 @@ def _ensure_server(self) -> None:

# Check if any instances are running
import boto3
ec2 = boto3.resource('ec2')

ec2 = boto3.resource("ec2")
instances = ec2.instances.filter(
Filters=[
{'Name': 'tag:Name', 'Values': ['omniparser']},
{'Name': 'instance-state-name', 'Values': ['running']}
{"Name": "tag:Name", "Values": ["omniparser"]},
{"Name": "instance-state-name", "Values": ["running"]},
]
)

instance = next(iter(instances), None)
if instance and instance.public_ip_address:
self.server_url = f"http://{instance.public_ip_address}:8000"
Expand All @@ -57,8 +60,8 @@ def _ensure_server(self) -> None:
for i in range(max_retries):
instances = ec2.instances.filter(
Filters=[
{'Name': 'tag:Name', 'Values': ['omniparser']},
{'Name': 'instance-state-name', 'Values': ['running']}
{"Name": "tag:Name", "Values": ["omniparser"]},
{"Name": "instance-state-name", "Values": ["running"]},
]
)
instance = next(iter(instances), None)
Expand All @@ -69,9 +72,7 @@ def _ensure_server(self) -> None:
else:
raise RuntimeError("Failed to deploy server")
else:
raise RuntimeError(
"No server URL provided and auto_deploy is disabled"
)
raise RuntimeError("No server URL provided and auto_deploy is disabled")

# Verify server is responsive
self._check_server()
Expand Down Expand Up @@ -101,7 +102,7 @@ def parse_image(self, image: Image.Image) -> Dict:
response = requests.post(
f"{self.server_url}/parse/",
json={"base64_image": image_bytes},
timeout=30
timeout=30,
)
response.raise_for_status()
return response.json()
Expand All @@ -112,14 +113,13 @@ def parse_image(self, image: Image.Image) -> Dict:
def _image_to_base64(image: Image.Image) -> str:
"""Convert PIL Image to base64 string."""
import io

buffered = io.BytesIO()
image.save(buffered, format="PNG")
return base64.b64encode(buffered.getvalue()).decode()

def visualize_results(
self,
image: Image.Image,
parsed_content: List[Dict]
self, image: Image.Image, parsed_content: List[Dict]
) -> Image.Image:
"""Visualize parsing results on the image.

Expand Down
Loading