SearchSavior · SearchSavior · Feb 11, 2026 · Feb 8, 2026 · Feb 8, 2026 · Feb 8, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -127,7 +127,7 @@ echo ""
 echo "================================================"
 
 # Start server in background
-openarc serve start --host 0.0.0.0 --openarc-port 8000 &
+openarc serve start --host 0.0.0.0 --port 8000 &
 SERVER_PID=$!
 
 # Auto-load model if specified

diff --git a/README.md b/README.md
@@ -351,14 +351,19 @@ openarc add --model-name <model-name> --model-path <path/to/model> --engine ovge
 
 Reads added configurations from `openarc_config.json`.
 
-Display all saved configurations:
+Display all added models:
 ```
 openarc list
 ```
 
+Display config metadata for a specific model:
+```
+openarc list <model-name> -v
+```
+
 Remove a configuration:
 ```
-openarc list --remove --model-name <model-name>
+openarc list --remove <model-name>
 ```
 
 </details>
@@ -378,7 +383,7 @@ openarc serve start # defauls to 0.0.0.0:8000
 Configure host and port
 
 ```
-openarc serve start --host --openarc-port
+openarc serve start --host --port
 ```
 
 To load models on startup:

diff --git a/examples/openvino_genai/ov_genai_AutoTokenizer.py b/examples/openvino_genai/ov_genai_AutoTokenizer.py
@@ -2,17 +2,17 @@
 from openvino_genai import GenerationConfig, LLMPipeline
 from transformers import AutoTokenizer
 
-model_dir = "/mnt/Ironwolf-4TB/Models/OpenVINO/Qwen/Qwen3-REAP-25B-A3B-int4_asym-ov"
+model_dir = "/mnt/Ironwolf-4TB/Models/OpenVINO/DeepSeek-V2-Lite-Chat-int4_asym-ov"
 
 pipe = LLMPipeline(
     model_dir,       # Path to the model directory. Remember this will not pull from hub like in transformers
-    device="GPU.0"
+    device="CPU"
 
 )
 tokenizer = AutoTokenizer.from_pretrained(model_dir)
 
 generation_config = GenerationConfig(
-    max_new_tokens=128
+    max_new_tokens=24
 )
 
 prompt = "You're the fastest Llama this side of the equator. What's your favorite food? try to imagine"

diff --git a/pyproject.toml b/pyproject.toml
@@ -45,7 +45,7 @@ build-backend = "setuptools.build_meta"
 packages = ["src"]
 
 [project.scripts]
-openarc = "src.cli.openarc_cli:cli"
+openarc = "src.cli:cli"
 
 [tool.uv]
 dev-dependencies = []

diff --git a/src/cli/__init__.py b/src/cli/__init__.py
@@ -0,0 +1,6 @@
+"""
+OpenArc CLI - Command-line interface for OpenArc server operations.
+"""
+from .main import cli
+
+__all__ = ['cli']
diff --git a/src/cli/groups/__init__.py b/src/cli/groups/__init__.py
@@ -0,0 +1,3 @@
+"""
+Command groups for OpenArc CLI.
+"""
diff --git a/src/cli/groups/add.py b/src/cli/groups/add.py
@@ -0,0 +1,102 @@
+"""
+Add command - Add a model configuration to the config file.
+"""
+import json
+
+import click
+
+from ..main import cli, console
+from ..utils import validate_model_path
+
+
+@cli.command()
+@click.option('--model-name', '--mn',
+    required=True,
+    help='Public facing name of the model.')
+@click.option('--model-path', '--m',
+    required=True, 
+    help='Path to OpenVINO IR converted model.')
+@click.option('--engine', '--en',
+    type=click.Choice(['ovgenai', 'openvino', 'optimum']),
+    required=True,
+    help='Engine used to load the model (ovgenai, openvino, optimum)')
+@click.option('--model-type', '--mt',
+    type=click.Choice(['llm', 'vlm', 'whisper', 'kokoro', 'emb', 'rerank']),
+    required=True,
+    help='Model type (llm, vlm, whisper, kokoro, emb, rerank)')
+@click.option('--device', '--d',
+    required=True,
+    help='Device(s) to load the model on.')
+@click.option("--runtime-config", "--rtc",
+    default=None,
+    help='OpenVINO runtime configuration as JSON string (e.g., \'{"MODEL_DISTRIBUTION_POLICY": "PIPELINE_PARALLEL"}\').')
+@click.option('--vlm-type', '--vt',
+    type=click.Choice(['internvl2', 'llava15', 'llavanext', 'minicpmv26', 'phi3vision', 'phi4mm', 'qwen2vl', 'qwen25vl', 'gemma3']),
+    required=False,
+    default=None,
+    help='Vision model type. Used to map correct vision tokens.')
+@click.option('--draft-model-path', '--dmp',
+    required=False,
+    default=None,
+    help='Path to draft model for speculative decoding.')
+@click.option('--draft-device', '--dd',
+    required=False,
+    default=None,
+    help='Draft model device.')
+@click.option('--num-assistant-tokens', '--nat',
+    required=False,
+    default=None,
+    type=int,
+    help='Number of tokens draft model generates per step.')
+@click.option('--assistant-confidence-threshold', '--act',
+    required=False,
+    default=None,
+    type=float,
+    help='Confidence threshold for accepting draft tokens.')
+@click.pass_context
+def add(ctx, model_path, model_name, engine, model_type, device, runtime_config, vlm_type, draft_model_path, draft_device, num_assistant_tokens, assistant_confidence_threshold):
+    """- Add a model configuration to the config file."""
+
+    # Validate model path
+    if not validate_model_path(model_path):
+        console.print(f"[red]Model file check failed! {model_path} does not contain openvino model files OR your chosen path is malformed. Verify chosen path is correct and acquired model files match source on the hub, or the destination of converted model.[/red]")
+        ctx.exit(1)
+
+    # Parse runtime_config if provided
+    parsed_runtime_config = {}
+    if runtime_config:
+        try:
+            parsed_runtime_config = json.loads(runtime_config)
+            if not isinstance(parsed_runtime_config, dict):
+                console.print(f"[red]Error: runtime_config must be a JSON object (dictionary), got {type(parsed_runtime_config).__name__}[/red]")
+                console.print('[yellow]Example format: \'{"MODEL_DISTRIBUTION_POLICY": "PIPELINE_PARALLEL"}\'[/yellow]')
+                ctx.exit(1)
+        except json.JSONDecodeError as e:
+            console.print(f"[red]Error parsing runtime_config JSON:[/red] {e}")
+            console.print('[yellow]Example format: \'{"MODEL_DISTRIBUTION_POLICY": "PIPELINE_PARALLEL"}\'[/yellow]')
+            ctx.exit(1)
+
+    # Build and save configuration
+    load_config = {
+        "model_name": model_name,
+        "model_path": model_path,  
+        "model_type": model_type,  
+        "engine": engine,    
+        "device": device,
+        "runtime_config": parsed_runtime_config,
+        "vlm_type": vlm_type if vlm_type else None
+    }
+
+    # Add speculative decoding options if provided
+    if draft_model_path:
+        load_config["draft_model_path"] = draft_model_path
+    if draft_device:
+        load_config["draft_device"] = draft_device
+    if num_assistant_tokens is not None:
+        load_config["num_assistant_tokens"] = num_assistant_tokens
+    if assistant_confidence_threshold is not None:
+        load_config["assistant_confidence_threshold"] = assistant_confidence_threshold
+
+    ctx.obj.server_config.save_model_config(model_name, load_config)
+    console.print(f"[green]Model configuration saved:[/green] {model_name}")
+    console.print(f"[dim]Use 'openarc load {model_name}' to load this model.[/dim]")