Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions src/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +0,0 @@
from src.engine.ov_genai.llm import OVGenAI_LLM
from src.engine.ov_genai.vlm import OVGenAI_VLM
from src.engine.ov_genai.whisper import OVGenAI_Whisper
from src.engine.openvino.kokoro import OV_Kokoro

__all__ = ["OVGenAI_LLM", "OVGenAI_VLM", "OVGenAI_Whisper", "OV_Kokoro"]
76 changes: 51 additions & 25 deletions src/cli/openarc_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,43 @@
from rich.text import Text
from rich.progress import Progress, SpinnerColumn, TextColumn

from .launch_server import start_server
from .server_config import ServerConfig
from .device_query import DeviceDataQuery, DeviceDiagnosticQuery
from .benchmark import OpenArcBenchmarks, BenchmarkDB

click.rich_click.STYLE_OPTIONS_TABLE_LEADING = 1
click.rich_click.STYLE_OPTIONS_TABLE_BOX = "SIMPLE"
click.rich_click.STYLE_COMMANDS_TABLE_SHOW_LINES = True
click.rich_click.STYLE_COMMANDS_TABLE_BORDER_STYLE = "red"
click.rich_click.STYLE_COMMANDS_TABLE_ROW_STYLES = ["magenta", "yellow", "cyan", "green"]

console = Console()
# Initialize managers
server_config = ServerConfig()
benchmark_db = BenchmarkDB()


class CLIContext:
"""Context object for lazy-loading heavy dependencies."""
__slots__ = ('_server_config', '_benchmark_db')

def __init__(self):
self._server_config = None
self._benchmark_db = None

@property
def server_config(self):
"""Lazy-load ServerConfig only when needed."""
if self._server_config is None:
from .server_config import ServerConfig
self._server_config = ServerConfig()
return self._server_config

@property
def benchmark_db(self):
"""Lazy-load BenchmarkDB only when needed."""
if self._benchmark_db is None:
from .benchmark import BenchmarkDB
self._benchmark_db = BenchmarkDB()
return self._benchmark_db


class OpenArcCLI:
def __init__(self, base_url=None, api_key=None):
if base_url is None:
def __init__(self, base_url=None, api_key=None, server_config=None):
if base_url is None and server_config is not None:
base_url = server_config.get_base_url()
self.base_url = base_url
self.api_key = api_key or os.getenv('OPENARC_API_KEY')
Expand Down Expand Up @@ -83,7 +100,8 @@ def get_help(self, ctx):
return super().get_help(ctx)

@click.group(cls=ColoredAsciiArtGroup)
def cli():
@click.pass_context
def cli(ctx):
"""
Use this application to interface with the OpenArc server.

Expand All @@ -108,6 +126,7 @@ def cli():

To get started add --help to one of the commands below to view its documentation.
"""
ctx.ensure_object(CLIContext)

@cli.command()
@click.option('--model-name', '--mn',
Expand Down Expand Up @@ -150,7 +169,7 @@ def add(ctx, model_path, model_name, engine, model_type, device, runtime_config,
"vlm_type": vlm_type if vlm_type else None
}

server_config.save_model_config(model_name, load_config)
ctx.obj.server_config.save_model_config(model_name, load_config)
console.print(f"[green]Model configuration saved:[/green] {model_name}")
console.print(f"[dim]Use 'openarc load {model_name}' to load this model.[/dim]")

Expand All @@ -164,7 +183,7 @@ def load(ctx, model_names):
openarc load model1
openarc load Dolphin-X1 kokoro whisper
"""
cli_instance = OpenArcCLI()
cli_instance = OpenArcCLI(server_config=ctx.obj.server_config)

model_names = list(model_names)

Expand All @@ -185,7 +204,7 @@ def load(ctx, model_names):
console.print(f"[blue]loading[/blue] {name}")

# Get saved configuration
saved_config = server_config.get_model_config(name)
saved_config = ctx.obj.server_config.get_model_config(name)

if not saved_config:
console.print(f"[red]Model configuration not found:[/red] {name}")
Expand Down Expand Up @@ -243,7 +262,7 @@ def unload(ctx, model_names):
openarc unload model1
openarc unload Dolphin-X1 kokoro whisper
"""
cli_instance = OpenArcCLI()
cli_instance = OpenArcCLI(server_config=ctx.obj.server_config)

model_names = list(model_names)

Expand Down Expand Up @@ -317,20 +336,20 @@ def list_configs(ctx, remove, model_name):
ctx.exit(1)

# Check if model exists before trying to remove
if not server_config.model_exists(model_name):
if not ctx.obj.server_config.model_exists(model_name):
console.print(f"{model_name}[red] not found:[/red]")
console.print("[yellow]Use 'openarc list' to see available configurations.[/yellow]")
ctx.exit(1)

# Remove the configuration
if server_config.remove_model_config(model_name):
if ctx.obj.server_config.remove_model_config(model_name):
console.print(f"[green]Model configuration removed:[/green] {model_name}")
else:
console.print(f"[red]Failed to remove model configuration:[/red] {model_name}")
ctx.exit(1)
return

models = server_config.get_all_models()
models = ctx.obj.server_config.get_all_models()

if not models:
console.print("[yellow]No model configurations found.[/yellow]")
Expand Down Expand Up @@ -370,7 +389,7 @@ def list_configs(ctx, remove, model_name):
@click.pass_context
def status(ctx):
"""- GET Status of loaded models."""
cli_instance = OpenArcCLI()
cli_instance = OpenArcCLI(server_config=ctx.obj.server_config)

url = f"{cli_instance.base_url}/openarc/status"

Expand Down Expand Up @@ -442,7 +461,7 @@ def bench(ctx, model_name, input_tokens, max_tokens, runs):
openarc bench Dolphin-X1 --p 16,32,64 --n 128,256
openarc bench Dolphin-X1 -p 16 -p 32 -n 128 -n 256
"""
cli_instance = OpenArcCLI()
cli_instance = OpenArcCLI(server_config=ctx.obj.server_config)

# Parse input_tokens and max_tokens (handle comma-separated and multiple invocations)
p_values = []
Expand Down Expand Up @@ -478,7 +497,7 @@ def bench(ctx, model_name, input_tokens, max_tokens, runs):
ctx.exit(1)

# Get model path from config to generate input tokens
model_config = server_config.get_model_config(model_name)
model_config = ctx.obj.server_config.get_model_config(model_name)
if not model_config:
console.print(f"[red]Model configuration not found for '{model_name}'[/red]")
console.print("[yellow]Cannot generate random tokens without model path.[/yellow]")
Expand All @@ -495,6 +514,9 @@ def bench(ctx, model_name, input_tokens, max_tokens, runs):
console.print(f"max tokens: {n_values}")
console.print(f"runs: {runs}\n")

# Lazy load OpenArcBenchmarks
from .benchmark import OpenArcBenchmarks

# Generate unique run_id for this benchmark session
run_id = str(uuid.uuid4())

Expand Down Expand Up @@ -555,7 +577,7 @@ def bench(ctx, model_name, input_tokens, max_tokens, runs):
results.append(result)

# Save result to database
benchmark_db.save_result(model_name, result, run_id)
ctx.obj.benchmark_db.save_result(model_name, result, run_id)

except Exception as e:
console.print(f"\n[yellow]Error in run {r+1}: {e}[/yellow]")
Expand Down Expand Up @@ -619,6 +641,7 @@ def device_properties(ctx):
"""

try:
from .device_query import DeviceDataQuery
console.print("[blue]Querying device data for all devices...[/blue]")
device_query = DeviceDataQuery()
available_devices = device_query.get_available_devices()
Expand Down Expand Up @@ -656,6 +679,7 @@ def device_detect(ctx):
"""

try:
from .device_query import DeviceDiagnosticQuery
console.print("[blue]Detecting OpenVINO devices...[/blue]")
diagnostic = DeviceDiagnosticQuery()
available_devices = diagnostic.get_available_devices()
Expand Down Expand Up @@ -701,7 +725,8 @@ def serve():
required=False,
help="Load models on startup. Specify once followed by space-separated model names.")
@click.argument('startup_models', nargs=-1, required=False)
def start(host, openarc_port, load_models, startup_models):
@click.pass_context
def start(ctx, host, openarc_port, load_models, startup_models):
"""
- 'start' reads --host and --openarc-port from config or defaults to 0.0.0.0:8000

Expand All @@ -711,7 +736,7 @@ def start(host, openarc_port, load_models, startup_models):
openarc serve start --lm Dolphin-X1 kokoro whisper
"""
# Save server configuration for other CLI commands to use
config_path = server_config.save_server_config(host, openarc_port)
config_path = ctx.obj.server_config.save_server_config(host, openarc_port)
console.print(f"[dim]Configuration saved to: {config_path}[/dim]")

# Handle startup models
Expand All @@ -722,7 +747,7 @@ def start(host, openarc_port, load_models, startup_models):
models_to_load.extend(startup_models)

if models_to_load:
saved_model_names = server_config.get_model_names()
saved_model_names = ctx.obj.server_config.get_model_names()
missing = [m for m in models_to_load if m not in saved_model_names]

if missing:
Expand All @@ -735,6 +760,7 @@ def start(host, openarc_port, load_models, startup_models):
console.print(f"[blue]Models to load on startup:[/blue] {', '.join(models_to_load)}\n")

console.print(f"[green]Starting OpenArc server on {host}:{openarc_port}[/green]")
from .launch_server import start_server
start_server(host=host, openarc_port=openarc_port)


Expand Down