From de9faccdfa562ec0d27d676ec5d613fa4b2eff7d Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Tue, 20 May 2025 19:18:25 +0000 Subject: [PATCH 01/20] initial file structure created. Populated with unimplemented files Signed-off-by: Madison Evans --- comps/router/deployment/docker_compose/compose.yaml | 0 .../router/deployment/docker_compose/configs/routellm_config.yaml | 0 comps/router/deployment/docker_compose/configs/router.yaml | 0 .../deployment/docker_compose/configs/semantic_router_config.yaml | 0 comps/router/deployment/docker_compose/deploy_router.sh | 0 comps/router/src/Dockerfile | 0 comps/router/src/README.md | 0 comps/router/src/integrations/controllers/base_controller.py | 0 comps/router/src/integrations/controllers/controller_factory.py | 0 .../controllers/routellm_controller/routellm_controller.py | 0 .../semantic_router_controller/semantic_router_controller.py | 0 comps/router/src/opea_router_microservice.py | 0 comps/router/src/requirements.txt | 0 13 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 comps/router/deployment/docker_compose/compose.yaml create mode 100644 comps/router/deployment/docker_compose/configs/routellm_config.yaml create mode 100644 comps/router/deployment/docker_compose/configs/router.yaml create mode 100644 comps/router/deployment/docker_compose/configs/semantic_router_config.yaml create mode 100644 comps/router/deployment/docker_compose/deploy_router.sh create mode 100644 comps/router/src/Dockerfile create mode 100644 comps/router/src/README.md create mode 100644 comps/router/src/integrations/controllers/base_controller.py create mode 100644 comps/router/src/integrations/controllers/controller_factory.py create mode 100644 comps/router/src/integrations/controllers/routellm_controller/routellm_controller.py create mode 100644 comps/router/src/integrations/controllers/semantic_router_controller/semantic_router_controller.py create mode 100644 comps/router/src/opea_router_microservice.py create mode 100644 comps/router/src/requirements.txt diff --git a/comps/router/deployment/docker_compose/compose.yaml b/comps/router/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/router/deployment/docker_compose/configs/routellm_config.yaml b/comps/router/deployment/docker_compose/configs/routellm_config.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/router/deployment/docker_compose/configs/router.yaml b/comps/router/deployment/docker_compose/configs/router.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/router/deployment/docker_compose/configs/semantic_router_config.yaml b/comps/router/deployment/docker_compose/configs/semantic_router_config.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/router/deployment/docker_compose/deploy_router.sh b/comps/router/deployment/docker_compose/deploy_router.sh new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/router/src/Dockerfile b/comps/router/src/Dockerfile new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/router/src/README.md b/comps/router/src/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/router/src/integrations/controllers/base_controller.py b/comps/router/src/integrations/controllers/base_controller.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/router/src/integrations/controllers/controller_factory.py b/comps/router/src/integrations/controllers/controller_factory.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/router/src/integrations/controllers/routellm_controller/routellm_controller.py b/comps/router/src/integrations/controllers/routellm_controller/routellm_controller.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/router/src/integrations/controllers/semantic_router_controller/semantic_router_controller.py b/comps/router/src/integrations/controllers/semantic_router_controller/semantic_router_controller.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/router/src/opea_router_microservice.py b/comps/router/src/opea_router_microservice.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/router/src/requirements.txt b/comps/router/src/requirements.txt new file mode 100644 index 0000000000..e69de29bb2 From 2763712e6de5782f185cba5aab2f8912e6a4eec8 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Tue, 20 May 2025 20:02:49 +0000 Subject: [PATCH 02/20] added relevant code to files within comps/router/deployment Signed-off-by: Madison Evans --- .../deployment/docker_compose/compose.yaml | 32 +++++++++++++ .../configs/routellm_config.yaml | 28 +++++++++++ .../docker_compose/configs/router.yaml | 11 +++++ .../configs/semantic_router_config.yaml | 17 +++++++ .../docker_compose/deploy_router.sh | 47 +++++++++++++++++++ 5 files changed, 135 insertions(+) diff --git a/comps/router/deployment/docker_compose/compose.yaml b/comps/router/deployment/docker_compose/compose.yaml index e69de29bb2..6444723dd3 100644 --- a/comps/router/deployment/docker_compose/compose.yaml +++ b/comps/router/deployment/docker_compose/compose.yaml @@ -0,0 +1,32 @@ +services: + router_service: + build: + context: ../../../.. + dockerfile: comps/router/src/Dockerfile + + image: "${REGISTRY_AND_REPO:-opea/router}:${TAG:-latest}" + container_name: opea_router + + volumes: + - ./configs:/app/configs + + environment: + CONFIG_PATH: /app/configs/router.yaml + + WEAK_ENDPOINT: ${WEAK_ENDPOINT:-http://opea_router:8000/weak} + STRONG_ENDPOINT: ${STRONG_ENDPOINT:-http://opea_router:8000/strong} + WEAK_MODEL_ID: ${WEAK_MODEL_ID:-openai/gpt-3.5-turbo} + STRONG_MODEL_ID: ${STRONG_MODEL_ID:-openai/gpt-4} + + HF_TOKEN: ${HF_TOKEN:?set HF_TOKEN} + OPENAI_API_KEY: ${OPENAI_API_KEY:?set OPENAI_API_KEY} + + CONTROLLER_TYPE: ${CONTROLLER_TYPE:-routellm} + + ports: + - "6000:6000" + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/router/deployment/docker_compose/configs/routellm_config.yaml b/comps/router/deployment/docker_compose/configs/routellm_config.yaml index e69de29bb2..166faefb7e 100644 --- a/comps/router/deployment/docker_compose/configs/routellm_config.yaml +++ b/comps/router/deployment/docker_compose/configs/routellm_config.yaml @@ -0,0 +1,28 @@ +# which embedder backend to use ("huggingface" or "openai") +embedding_provider: "huggingface" + +# export ROUTELLM_EMBEDDING_MODEL_NAME="your-org/other-embed" +embedding_model_name: "intfloat/e5-base-v2" + +routing_algorithm: "mf" +threshold: 0.3 + +config: + sw_ranking: + arena_battle_datasets: + - "lmsys/lmsys-arena-human-preference-55k" + - "routellm/gpt4_judge_battles" + arena_embedding_datasets: + - "routellm/arena_battles_embeddings" + - "routellm/gpt4_judge_battles_embeddings" + + causal_llm: + checkpoint_path: "routellm/causal_llm_gpt4_augmented" + + bert: + checkpoint_path: "routellm/bert_gpt4_augmented" + + mf: + checkpoint_path: "madison-evans/routellm-e5-base-v2" + use_openai_embeddings: false + diff --git a/comps/router/deployment/docker_compose/configs/router.yaml b/comps/router/deployment/docker_compose/configs/router.yaml index e69de29bb2..96ee065174 100644 --- a/comps/router/deployment/docker_compose/configs/router.yaml +++ b/comps/router/deployment/docker_compose/configs/router.yaml @@ -0,0 +1,11 @@ +model_map: + weak: + endpoint: "${WEAK_ENDPOINT:-http://opea_router:8000/weak}" + model_id: "${WEAK_MODEL_ID}" + strong: + endpoint: "${STRONG_ENDPOINT:-http://opea_router:8000/strong}" + model_id: "${STRONG_MODEL_ID}" + +controller_config_paths: + routellm: "/app/configs/routellm_config.yaml" + semantic_router: "/app/configs/semantic_router_config.yaml" diff --git a/comps/router/deployment/docker_compose/configs/semantic_router_config.yaml b/comps/router/deployment/docker_compose/configs/semantic_router_config.yaml index e69de29bb2..c98b9f7a4d 100644 --- a/comps/router/deployment/docker_compose/configs/semantic_router_config.yaml +++ b/comps/router/deployment/docker_compose/configs/semantic_router_config.yaml @@ -0,0 +1,17 @@ +embedding_provider: "huggingface" + +embedding_models: + huggingface: "BAAI/bge-base-en-v1.5" + openai: "text-embedding-ada-002" + +routes: + - name: "strong" + utterances: + - "Prove the Pythagorean theorem using geometric arguments..." + - "Explain the Calvin cycle..." + - "Discuss the ethical implications of deploying AI..." + - name: "weak" + utterances: + - "Hello, how are you?" + - "What's 2 + 2?" + - "Can you tell me a funny joke?" diff --git a/comps/router/deployment/docker_compose/deploy_router.sh b/comps/router/deployment/docker_compose/deploy_router.sh index e69de29bb2..f3e03476a1 100644 --- a/comps/router/deployment/docker_compose/deploy_router.sh +++ b/comps/router/deployment/docker_compose/deploy_router.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +# ======================== +# OPEA Router Deploy Script +# ======================== + +# Load environment variables from a .env file if present +if [ -f .env ]; then + echo "[INFO] Loading environment variables from .env" + export $(grep -v '^#' .env | xargs) +fi + +# Required variables +REQUIRED_VARS=("HF_TOKEN" "OPENAI_API_KEY") + +# Validate that all required variables are set +for VAR in "${REQUIRED_VARS[@]}"; do + if [ -z "${!VAR}" ]; then + echo "[ERROR] $VAR is not set. Please set it in your environment or .env file." + exit 1 + fi +done + +# Default values for Docker image +REGISTRY_AND_REPO=${REGISTRY_AND_REPO:-opea/router} +TAG=${TAG:-latest} + +# Export them so Docker Compose can see them +export REGISTRY_AND_REPO +export TAG + +# Print summary +echo "[INFO] Starting deployment with the following config:" +echo " Image: ${REGISTRY_AND_REPO}:${TAG}" +echo " HF_TOKEN: ***${HF_TOKEN: -4}" +echo " OPENAI_API_KEY: ***${OPENAI_API_KEY: -4}" +echo "" + +# Compose up +echo "[INFO] Launching Docker Compose service..." +docker compose -f compose.yaml up --build + +# Wait a moment then check status +sleep 2 +docker ps --filter "name=opea_router" + +echo "[SUCCESS] Router service deployed and running on http://localhost:6000" From 358e027d1f21137fabb2154b0db147492781c9d0 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Tue, 20 May 2025 20:11:22 +0000 Subject: [PATCH 03/20] added Dockerfile, opea_router_microservice.py, README.md, and requirements.txt contents Signed-off-by: Madison Evans --- comps/router/src/Dockerfile | 31 ++++ comps/router/src/README.md | 94 ++++++++++++ comps/router/src/opea_router_microservice.py | 96 ++++++++++++ comps/router/src/requirements.txt | 146 +++++++++++++++++++ 4 files changed, 367 insertions(+) diff --git a/comps/router/src/Dockerfile b/comps/router/src/Dockerfile index e69de29bb2..3bafc90c3d 100644 --- a/comps/router/src/Dockerfile +++ b/comps/router/src/Dockerfile @@ -0,0 +1,31 @@ +FROM python:3.10-slim + +# Install git +RUN apt-get update \ + && apt-get install -y git \ + && rm -rf /var/lib/apt/lists/* + +# Add a non-root user +RUN useradd -m -s /bin/bash user \ + && chown -R user /home/user + +# Copy the *entire* comps/ package +WORKDIR /home/user +COPY comps /home/user/comps + +# Install deps from the router’s requirements.txt +RUN pip install --no-cache-dir --upgrade pip \ + && pip install --no-cache-dir -r /home/user/comps/router/src/requirements.txt + +# Make imports work +ENV PYTHONPATH=/home/user + +# Switch to non-root +USER user + +# Expose the port +EXPOSE 6000 + +# Run the microservice +WORKDIR /home/user/comps/router/src +CMD ["python", "opea_router_microservice.py"] diff --git a/comps/router/src/README.md b/comps/router/src/README.md index e69de29bb2..bdcb6c3e52 100644 --- a/comps/router/src/README.md +++ b/comps/router/src/README.md @@ -0,0 +1,94 @@ +> Location: comps/router/src/README.md + +A lightweight HTTP service that routes incoming text prompts to the most appropriate LLM back‑end (e.g. strong vs weak) and returns the target inference endpoint. It is built on the OPEA micro‑service SDK and can switch between two controller back‑ends: + +- RouteLLM (matrix‑factorisation, dataset‑driven) +- Semantic‑Router (encoder‑based semantic similarity) + +The router is stateless; it inspects the prompt, consults the configured controller, and replies with a single URL such as http://opea_router:8000/strong. + +## Build + +``` +# From repo root 📂 +# Build the container image directly +$ docker build -t opea/router:latest -f comps/router/src/Dockerfile . +``` + +Alternatively, the Docker Compose workflow below will build the image for you. + +``` +# Navigate to the compose bundle +$ cd comps/router/deployment/docker_compose + +# Populate required secrets (or create a .env file) +$ export HF_TOKEN="" +$ export OPENAI_API_KEY="" + +# Optional: point to custom inference endpoints / models +$ export WEAK_ENDPOINT=http://my‑llm‑gateway:8000/weak +$ export STRONG_ENDPOINT=http://my‑llm‑gateway:8000/strong +$ export CONTROLLER_TYPE=routellm # or semantic_router + +# Launch (using the helper script) +$ ./deploy_router.sh +``` + +*The service listens on http://localhost:6000 (host‑mapped from container port 6000). Logs stream to STDOUT; use Ctrl‑C to stop or docker compose down to clean up.* + +## API Usage + +| Method | URL | Body schema | Success response | +|--------|------------|------------------------------------|----------------------------------------------| +| `POST` | `/v1/route`| `{ "text": "" }` | `200 OK` → `{ "url": "" }` | + + +**Example** + +``` +curl -X POST http://localhost:6000/v1/route \ + -H "Content-Type: application/json" \ + -d '{"text": "Explain the Calvin cycle in photosynthesis."}' +``` + +Expected JSON *(assuming the strong model wins the routing decision)*: + +``` +{ + "url": "http://opea_router:8000/strong" +} +``` + +## Configuration Reference + +| Variable / file | Purpose | Default | Where set | +|------------------------------------------|---------------------------------------------------|-------------------------------------------|--------------------| +| `HF_TOKEN` | Hugging Face auth token for encoder models | — | `.env` / shell | +| `OPENAI_API_KEY` | OpenAI key (only if `embedding_provider: openai`) | — | `.env` / shell | +| `CONTROLLER_TYPE` | `routellm` or `semantic_router` | `routellm` | env / `router.yaml`| +| `CONFIG_PATH` | Path to global router YAML | `/app/configs/router.yaml` | Compose env | +| `WEAK_ENDPOINT` / `STRONG_ENDPOINT` | Final inference URLs | container DNS | Compose env | +| `WEAK_MODEL_ID` / `STRONG_MODEL_ID` | Model IDs forwarded to controllers | `openai/gpt-3.5-turbo`, `openai/gpt-4` | Compose env | + + +## Troubleshooting + +`HF_TOKEN` is not set – export the token or place it in a .env file next to compose.yaml. + +Unknown controller type – `CONTROLLER_TYPE` must be either routellm or semantic_router and a matching entry must exist in controller_config_paths. + +Routed model `` not in `model_map` – make sure model_map in router.yaml lists both strong and weak with the correct model_id values. + +Use docker compose logs -f router_service for real‑time debugging. + + +## Testing + +Includes an end-to-end script for the RouteLLM controller: + +```bash +chmod +x tests/router/test_router_routellm.sh +export HF_TOKEN="" +export OPENAI_API_KEY="" +tests/router/test_router_routellm.sh +``` \ No newline at end of file diff --git a/comps/router/src/opea_router_microservice.py b/comps/router/src/opea_router_microservice.py index e69de29bb2..062011d737 100644 --- a/comps/router/src/opea_router_microservice.py +++ b/comps/router/src/opea_router_microservice.py @@ -0,0 +1,96 @@ +import os +import logging +import yaml +from comps import ( + CustomLogger, + TextDoc, + ServiceType, + register_microservice, + opea_microservices, +) +from comps.router.src.integrations.controllers.controller_factory import ControllerFactory +from pydantic import BaseModel, Field + +# Data model for endpoint response +class RouteEndpointDoc(BaseModel): + url: str = Field(..., description="URL of the chosen inference endpoint") + +# Set up logging +logger = CustomLogger("opea_router_microservice") +logflag = os.getenv("LOGFLAG", False) + +CONFIG_PATH = os.getenv("CONFIG_PATH") + +_config_data = {} +_controller_factory = None +_controller = None + +def _load_config(): + global _config_data, _controller_factory, _controller + + try: + with open(CONFIG_PATH, "r") as f: + new_data = yaml.safe_load(f) or {} + except Exception as e: + logger.error(f"Failed to load config: {e}") + raise RuntimeError(f"Failed to load config: {e}") + + _config_data = new_data + logger.info(f"[Router] Loaded config data from: {CONFIG_PATH}") + + if _controller_factory is None: + _controller_factory = ControllerFactory() + + model_map = _config_data.get("model_map", {}) + controller_type = os.getenv("CONTROLLER_TYPE") or _config_data.get("controller_type", "routellm") + + # look up the correct controller-config path + try: + controller_config_path = _config_data["controller_config_paths"][controller_type] + except KeyError: + raise RuntimeError(f"No config path for controller_type='{controller_type}' in global config") + + + _controller = _controller_factory.factory( + controller_config=controller_config_path, + model_map=model_map + ) + + logger.info("[Router] Controller re-initialized successfully.") + +# Initial config load at startup +_load_config() + +@register_microservice( + name="opea_service@router", + service_type=ServiceType.LLM, + endpoint="/v1/route", + host="0.0.0.0", + port=6000, + input_datatype=TextDoc, + output_datatype=RouteEndpointDoc, +) +def route_microservice(input: TextDoc) -> RouteEndpointDoc: + """ + Microservice that decides which model endpoint is best for the given text input. + Returns only the route URL (does not forward). + """ + if not _controller: + raise RuntimeError("Controller is not initialized — config load failed?") + + query_content = input.text + messages = [{"content": query_content}] + + try: + endpoint = _controller.route(messages) + if not endpoint: + raise ValueError("No suitable model endpoint found.") + return RouteEndpointDoc(url=endpoint) + + except Exception as e: + logger.error(f"[Router] Error during model routing: {e}") + raise + +if __name__ == "__main__": + logger.info("OPEA Router Microservice is starting...") + opea_microservices["opea_service@router"].start() diff --git a/comps/router/src/requirements.txt b/comps/router/src/requirements.txt index e69de29bb2..5c943fcdd9 100644 --- a/comps/router/src/requirements.txt +++ b/comps/router/src/requirements.txt @@ -0,0 +1,146 @@ +aiofiles==24.1.0 +aiohappyeyeballs==2.6.1 +aiohttp==3.11.18 +aiosignal==1.3.2 +annotated-types==0.7.0 +anyio==4.9.0 +attrs==25.3.0 +aurelio-sdk==0.0.19 +av==14.4.0 +cachetools==5.5.2 +certifi==2025.4.26 +charset-normalizer==3.4.2 +click==8.2.0 +colorama==0.4.6 +colorlog==6.9.0 +dataclasses-json==0.6.7 +Deprecated==1.2.18 +distro==1.9.0 +docarray==0.41.0 +docx2txt==0.9 +durationpy==0.10 +embreex==2.17.7.post6 +fastapi==0.115.12 +filelock==3.18.0 +frozenlist==1.6.0 +fsspec==2025.5.0 +google-auth==2.40.1 +googleapis-common-protos==1.70.0 +greenlet==3.2.2 +grpcio==1.71.0 +h11==0.16.0 +httpcore==1.0.9 +httptools==0.6.4 +httpx==0.28.1 +httpx-sse==0.4.0 +huggingface-hub==0.31.4 +idna==3.10 +importlib_metadata==8.6.1 +jax==0.6.0 +jaxlib==0.6.0 +Jinja2==3.1.6 +jiter==0.10.0 +jsonpatch==1.33 +jsonpointer==3.0.0 +jsonschema==4.23.0 +jsonschema-specifications==2025.4.1 +kubernetes==32.0.1 +langchain==0.3.25 +langchain-community==0.3.24 +langchain-core==0.3.60 +langchain-text-splitters==0.3.8 +langsmith==0.3.42 +litellm==1.70.0 +lxml==5.4.0 +lz4==4.4.4 +manifold3d==3.1.0 +mapbox_earcut==1.0.3 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +marshmallow==3.26.1 +mdurl==0.1.2 +ml_dtypes==0.5.1 +multidict==6.4.4 +mypy_extensions==1.1.0 +networkx==3.4.2 +numpy==2.2.6 +oauthlib==3.2.2 +openai==1.75.0 +opentelemetry-api==1.33.1 +opentelemetry-exporter-otlp==1.33.1 +opentelemetry-exporter-otlp-proto-common==1.33.1 +opentelemetry-exporter-otlp-proto-grpc==1.33.1 +opentelemetry-exporter-otlp-proto-http==1.33.1 +opentelemetry-proto==1.33.1 +opentelemetry-sdk==1.33.1 +opentelemetry-semantic-conventions==0.54b1 +opt_einsum==3.4.0 +orjson==3.10.18 +packaging==24.2 +pandas==2.2.3 +pillow==11.2.1 +prometheus-fastapi-instrumentator==7.1.0 +prometheus_client==0.22.0 +propcache==0.3.1 +protobuf==5.29.4 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pycollada==0.9 +pydantic==2.11.4 +pydantic-settings==2.9.1 +pydantic_core==2.33.2 +pydub==0.25.1 +Pygments==2.19.1 +pypdf==5.5.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.0 +python-multipart==0.0.20 +pytz==2025.2 +PyYAML==6.0.2 +referencing==0.36.2 +regex==2024.11.6 +requests==2.32.3 +requests-oauthlib==2.0.0 +requests-toolbelt==1.0.0 +rich==14.0.0 +rpds-py==0.25.0 +rsa==4.9.1 +rtree==1.4.0 +scipy==1.15.3 +semantic-router==0.1.8 +shapely==2.1.1 +shortuuid==1.0.13 +six==1.17.0 +sniffio==1.3.1 +SQLAlchemy==2.0.41 +starlette==0.46.2 +svg.path==6.3 +tenacity==9.1.2 +tiktoken==0.9.0 +tokenizers==0.21.1 +tornado==6.5 +tqdm==4.67.1 +trimesh==4.6.10 +types-Pillow==10.2.0.20240822 +types-requests==2.31.0.6 +types-urllib3==1.26.25.14 +typing-inspect==0.9.0 +typing-inspection==0.4.0 +typing_extensions==4.13.2 +tzdata==2025.2 +urllib3==1.26.20 +uvicorn==0.34.2 +uvloop==0.21.0 +vhacdx==0.0.8.post2 +watchfiles==1.0.5 +websocket-client==1.8.0 +websockets==15.0.1 +wrapt==1.17.2 +xxhash==3.5.0 +yarl==1.20.0 +zipp==3.21.0 +zstandard==0.23.0 + +# Include the forked RouteLLM repository +git+https://github.com/SAPD-Intel/RouteLLM.git + From 4fc96906df2021a5f41ee4343922692cba3c02cd Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Tue, 20 May 2025 20:13:26 +0000 Subject: [PATCH 04/20] added controller components for router instances Signed-off-by: Madison Evans --- .../controllers/base_controller.py | 15 ++++ .../controllers/controller_factory.py | 43 ++++++++++ .../routellm_controller.py | 72 ++++++++++++++++ .../semantic_router_controller.py | 86 +++++++++++++++++++ 4 files changed, 216 insertions(+) diff --git a/comps/router/src/integrations/controllers/base_controller.py b/comps/router/src/integrations/controllers/base_controller.py index e69de29bb2..2aeb4a4384 100644 --- a/comps/router/src/integrations/controllers/base_controller.py +++ b/comps/router/src/integrations/controllers/base_controller.py @@ -0,0 +1,15 @@ +from abc import ABC, abstractmethod + + +class BaseController(ABC): + """ + An abstract base controller class providing a framework for routing and + endpoint retrieval functionality. + """ + + @abstractmethod + def route(self, messages, **kwargs): + """ + Determines the appropriate routing based on input messages. + """ + pass diff --git a/comps/router/src/integrations/controllers/controller_factory.py b/comps/router/src/integrations/controllers/controller_factory.py index e69de29bb2..f8ac523e4a 100644 --- a/comps/router/src/integrations/controllers/controller_factory.py +++ b/comps/router/src/integrations/controllers/controller_factory.py @@ -0,0 +1,43 @@ +from typing import Dict +import yaml +from comps.router.src.integrations.controllers.routellm_controller.routellm_controller import RouteLLMController +from comps.router.src.integrations.controllers.semantic_router_controller.semantic_router_controller import SemanticRouterController +import os +from dotenv import load_dotenv + +load_dotenv() + +HF_TOKEN = os.getenv('HF_TOKEN', '') +OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', '') +CONTROLLER_TYPE = os.getenv('CONTROLLER_TYPE', None) + +class ControllerFactory: + + @staticmethod + def get_controller_config(config_filename: str) -> Dict: + try: + with open(config_filename, "r") as file: + config = yaml.safe_load(file) + return config + except FileNotFoundError as e: + raise FileNotFoundError(f"Configuration file '{config_filename}' not found.") from e + except yaml.YAMLError as e: + raise ValueError(f"Error parsing the configuration file: {e}") from e + + @staticmethod + def factory(controller_config: str, model_map: Dict): + """Returns an instance of the appropriate controller based on the controller_type.""" + + config = ControllerFactory.get_controller_config(controller_config) + + if CONTROLLER_TYPE == "routellm": + return RouteLLMController(config=config, api_key=OPENAI_API_KEY, hf_token=HF_TOKEN, model_map=model_map) + + elif CONTROLLER_TYPE == "semantic_router": + return SemanticRouterController( + config=config, + api_key=OPENAI_API_KEY, + model_map=model_map + ) + else: + raise ValueError(f"Unknown controller type: {CONTROLLER_TYPE}") diff --git a/comps/router/src/integrations/controllers/routellm_controller/routellm_controller.py b/comps/router/src/integrations/controllers/routellm_controller/routellm_controller.py index e69de29bb2..8f5ee99971 100644 --- a/comps/router/src/integrations/controllers/routellm_controller/routellm_controller.py +++ b/comps/router/src/integrations/controllers/routellm_controller/routellm_controller.py @@ -0,0 +1,72 @@ +import logging +import os +from comps.router.src.integrations.controllers.base_controller import BaseController +from routellm.controller import Controller as RouteLLM_Controller + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) + +class RouteLLMController(BaseController): + def __init__(self, config, hf_token=None, api_key=None, model_map=None): + self.config = config + self.model_map = model_map or {} + + # Determine embedding provider + provider = config.get("embedding_provider", "huggingface").lower() + + # Resolve embedding model: env override ↔️ config default + env_var = "ROUTELLM_EMBEDDING_MODEL_NAME" + default_model = config.get("embedding_model_name") + self.embedding_model = os.getenv(env_var, default_model) + if not self.embedding_model: + raise ValueError(f"No embedding_model_name in config and {env_var} not set") + logging.info(f"[RouteLLM] using {provider} embedding model: {self.embedding_model}") + + # Inject into nested mf config + nested = self.config.setdefault("config", {}) + mf = nested.setdefault("mf", {}) + mf["embedding_model_name"] = self.embedding_model + + # Validate routing settings + self.routing_algorithm = config.get("routing_algorithm") + if not self.routing_algorithm: + raise ValueError("routing_algorithm must be specified in configuration") + self.threshold = config.get("threshold", 0.2) + + # Extract strong/weak model IDs + strong_model = self.model_map.get("strong", {}).get("model_id") + weak_model = self.model_map.get("weak", {}).get("model_id") + if not strong_model or not weak_model: + raise ValueError("model_map must include both 'strong' and 'weak' entries") + + # Prepare Env for OpenAI if needed + if provider == "openai": + if not api_key: + raise ValueError("api_key is required for OpenAI embeddings") + os.environ["OPENAI_API_KEY"] = api_key + + # Initialize the underlying controller (keyword args to match signature) + self.controller = RouteLLM_Controller( + routers=[self.routing_algorithm], + strong_model=strong_model, + weak_model=weak_model, + config=nested, + hf_token=hf_token if provider == "huggingface" else None, + api_key= api_key if provider == "openai" else None, + ) + + def route(self, messages): + routed_name = self.controller.get_routed_model( + messages, + router=self.routing_algorithm, + threshold=self.threshold, + ) + endpoint_key = next( + (k for k, v in self.model_map.items() if v.get("model_id") == routed_name), + None + ) + if not endpoint_key: + raise ValueError(f"Routed model '{routed_name}' not in model_map") + return self.model_map[endpoint_key]["endpoint"] diff --git a/comps/router/src/integrations/controllers/semantic_router_controller/semantic_router_controller.py b/comps/router/src/integrations/controllers/semantic_router_controller/semantic_router_controller.py index e69de29bb2..80000c7012 100644 --- a/comps/router/src/integrations/controllers/semantic_router_controller/semantic_router_controller.py +++ b/comps/router/src/integrations/controllers/semantic_router_controller/semantic_router_controller.py @@ -0,0 +1,86 @@ +import logging +import os +from comps.cores.telemetry.opea_telemetry import opea_telemetry +from comps.router.src.integrations.controllers.base_controller import BaseController +from semantic_router.routers import SemanticRouter +from semantic_router.encoders import OpenAIEncoder, HuggingFaceEncoder +from semantic_router import Route +# from decorators import log_latency +from dotenv import load_dotenv + +load_dotenv() +hf_token = os.getenv("HF_TOKEN", "") +openai_api_key = os.getenv("OPENAI_API_KEY", "") + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) + +class SemanticRouterController(BaseController): + def __init__(self, config, api_key=None, model_map=None): + self.config = config + self.model_map = model_map or {} + + # 1) grab provider + model mapping + provider = config.get("embedding_provider", "").lower() + models = config.get("embedding_models", {}) + + if provider not in {"huggingface", "openai"}: + raise ValueError(f"Unsupported embedding_provider: '{provider}'") + if provider not in models: + raise ValueError(f"No embedding_models entry for provider '{provider}'") + + model_name = models[provider] + logging.info(f"SemanticRouter using {provider} encoder '{model_name}'") + + if provider == "huggingface": + hf_token = os.getenv("HF_TOKEN", "") + self.encoder = HuggingFaceEncoder( + name=model_name, + model_kwargs={"token": hf_token}, + tokenizer_kwargs={"token": hf_token}, + ) + else: + if not api_key: + raise ValueError("valid api key is required for selected model provider") + os.environ["OPENAI_API_KEY"] = api_key + self.encoder = OpenAIEncoder(model=model_name) + + # 4) build your routing layer + self._build_route_layer() + + def _build_route_layer(self): + # Build routes from the local controller config + routes = self.config.get("routes", []) + route_list = [Route(name=route["name"], utterances=route["utterances"]) for route in routes] + + # Reinitialize SemanticRouter to clear previous embeddings when switching models + self.route_layer = SemanticRouter(encoder=self.encoder, routes=route_list) + logging.info("[DEBUG] Successfully re-initialized SemanticRouter with fresh embeddings.") + + @opea_telemetry + def route(self, messages): + """ + Determines which inference endpoint to use based on the provided messages. + It looks up the model_map to retrieve the nested endpoint value. + """ + query = messages[0]["content"] + + route_choice = self.route_layer(query) + endpoint_key = route_choice.name + + if not endpoint_key: + routes = self.config.get("routes", []) + if routes: + endpoint_key = routes[0]["name"] + else: + raise ValueError("No routes available in the configuration.") + + # Lookup the endpoint in the model_map + model_entry = self.model_map.get(endpoint_key) + if model_entry is None: + raise ValueError(f"Inference endpoint '{endpoint_key}' not found in global model_map.") + + # Return the endpoint from the model map + return model_entry["endpoint"] From de5cfee900ff853fb7e35522e82ffb34ddcdc9a7 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Tue, 20 May 2025 20:14:33 +0000 Subject: [PATCH 05/20] added initial routellm controller test script in router directory Signed-off-by: Madison Evans --- tests/router/test_router_routellm_on_xeon.sh | 59 ++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 tests/router/test_router_routellm_on_xeon.sh diff --git a/tests/router/test_router_routellm_on_xeon.sh b/tests/router/test_router_routellm_on_xeon.sh new file mode 100644 index 0000000000..a16b4b0a03 --- /dev/null +++ b/tests/router/test_router_routellm_on_xeon.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# End-to-end test – Router micro-service, RouteLLM controller (CPU/Xeon) +set -xeuo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +WORKPATH="$(cd "$SCRIPT_DIR/../.." && pwd)" +host_ip=$(hostname -I | awk '{print $1}') +LOG_PATH="$WORKPATH/tests" +ROUTER_PORT=6000 +CONTAINER=opea_router + +# Required secrets +: "${HF_TOKEN:?Need HF_TOKEN}" +: "${OPENAI_API_KEY:?Need OPENAI_API_KEY}" + +# Set default image info (matches deploy script) +REGISTRY_AND_REPO=${REGISTRY_AND_REPO:-opea/router} +TAG=${TAG:-latest} + +export HF_TOKEN OPENAI_API_KEY REGISTRY_AND_REPO TAG + +build_image() { + cd "$WORKPATH" + docker build --no-cache -t "${REGISTRY_AND_REPO}:${TAG}" \ + -f comps/router/src/Dockerfile . +} + +start_router() { + cd "$WORKPATH/comps/router/deployment/docker_compose" + docker compose -f compose.yaml up router_service -d + sleep 20 +} + +validate() { + # weak route + rsp=$(curl -s http://${host_ip}:${ROUTER_PORT}/v1/route \ + -X POST -H 'Content-Type: application/json' \ + -d '{"text":"What is 2 + 2?"}') + [[ $rsp == *"weak"* ]] || { echo "weak routing failed ($rsp)"; exit 1; } + + # strong route + hard='Explain Gödel’s incompleteness theorem in formal terms.' + rsp=$(curl -s http://${host_ip}:${ROUTER_PORT}/v1/route \ + -X POST -H 'Content-Type: application/json' \ + -d "{\"text\":\"$hard\"}") + [[ $rsp == *"strong"* ]] || { echo "strong routing failed ($rsp)"; exit 1; } +} + +cleanup() { + cd "$WORKPATH/comps/router/deployment/docker_compose" + docker compose -f compose.yaml down --remove-orphans +} + +trap cleanup EXIT +cleanup +build_image +start_router +validate +echo "✅ RouteLLM controller test passed." From 56b8b2df24f9ec32d41574f2d3ea6b3362b8fa53 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Tue, 20 May 2025 20:23:58 +0000 Subject: [PATCH 06/20] fixed requirements.txt issue Signed-off-by: Madison Evans --- comps/router/src/requirements.txt | 174 ++++++------------------------ 1 file changed, 32 insertions(+), 142 deletions(-) diff --git a/comps/router/src/requirements.txt b/comps/router/src/requirements.txt index 5c943fcdd9..998f5f7581 100644 --- a/comps/router/src/requirements.txt +++ b/comps/router/src/requirements.txt @@ -1,145 +1,35 @@ -aiofiles==24.1.0 -aiohappyeyeballs==2.6.1 -aiohttp==3.11.18 -aiosignal==1.3.2 -annotated-types==0.7.0 -anyio==4.9.0 -attrs==25.3.0 -aurelio-sdk==0.0.19 -av==14.4.0 -cachetools==5.5.2 -certifi==2025.4.26 -charset-normalizer==3.4.2 -click==8.2.0 -colorama==0.4.6 -colorlog==6.9.0 -dataclasses-json==0.6.7 -Deprecated==1.2.18 -distro==1.9.0 -docarray==0.41.0 -docx2txt==0.9 -durationpy==0.10 -embreex==2.17.7.post6 -fastapi==0.115.12 -filelock==3.18.0 -frozenlist==1.6.0 -fsspec==2025.5.0 -google-auth==2.40.1 -googleapis-common-protos==1.70.0 -greenlet==3.2.2 -grpcio==1.71.0 -h11==0.16.0 -httpcore==1.0.9 -httptools==0.6.4 -httpx==0.28.1 -httpx-sse==0.4.0 -huggingface-hub==0.31.4 -idna==3.10 -importlib_metadata==8.6.1 -jax==0.6.0 -jaxlib==0.6.0 -Jinja2==3.1.6 -jiter==0.10.0 -jsonpatch==1.33 -jsonpointer==3.0.0 -jsonschema==4.23.0 -jsonschema-specifications==2025.4.1 -kubernetes==32.0.1 -langchain==0.3.25 -langchain-community==0.3.24 -langchain-core==0.3.60 -langchain-text-splitters==0.3.8 -langsmith==0.3.42 -litellm==1.70.0 -lxml==5.4.0 -lz4==4.4.4 -manifold3d==3.1.0 -mapbox_earcut==1.0.3 -markdown-it-py==3.0.0 -MarkupSafe==3.0.2 -marshmallow==3.26.1 -mdurl==0.1.2 -ml_dtypes==0.5.1 -multidict==6.4.4 -mypy_extensions==1.1.0 -networkx==3.4.2 -numpy==2.2.6 -oauthlib==3.2.2 -openai==1.75.0 -opentelemetry-api==1.33.1 -opentelemetry-exporter-otlp==1.33.1 -opentelemetry-exporter-otlp-proto-common==1.33.1 -opentelemetry-exporter-otlp-proto-grpc==1.33.1 -opentelemetry-exporter-otlp-proto-http==1.33.1 -opentelemetry-proto==1.33.1 -opentelemetry-sdk==1.33.1 -opentelemetry-semantic-conventions==0.54b1 -opt_einsum==3.4.0 -orjson==3.10.18 -packaging==24.2 -pandas==2.2.3 -pillow==11.2.1 -prometheus-fastapi-instrumentator==7.1.0 -prometheus_client==0.22.0 -propcache==0.3.1 -protobuf==5.29.4 -pyasn1==0.6.1 -pyasn1_modules==0.4.2 -pycollada==0.9 -pydantic==2.11.4 -pydantic-settings==2.9.1 -pydantic_core==2.33.2 -pydub==0.25.1 -Pygments==2.19.1 -pypdf==5.5.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.1.0 -python-multipart==0.0.20 -pytz==2025.2 -PyYAML==6.0.2 -referencing==0.36.2 -regex==2024.11.6 -requests==2.32.3 -requests-oauthlib==2.0.0 -requests-toolbelt==1.0.0 -rich==14.0.0 -rpds-py==0.25.0 -rsa==4.9.1 -rtree==1.4.0 -scipy==1.15.3 -semantic-router==0.1.8 -shapely==2.1.1 -shortuuid==1.0.13 -six==1.17.0 -sniffio==1.3.1 -SQLAlchemy==2.0.41 -starlette==0.46.2 -svg.path==6.3 -tenacity==9.1.2 -tiktoken==0.9.0 -tokenizers==0.21.1 -tornado==6.5 -tqdm==4.67.1 -trimesh==4.6.10 -types-Pillow==10.2.0.20240822 -types-requests==2.31.0.6 -types-urllib3==1.26.25.14 -typing-inspect==0.9.0 -typing-inspection==0.4.0 -typing_extensions==4.13.2 -tzdata==2025.2 -urllib3==1.26.20 -uvicorn==0.34.2 -uvloop==0.21.0 -vhacdx==0.0.8.post2 -watchfiles==1.0.5 -websocket-client==1.8.0 -websockets==15.0.1 -wrapt==1.17.2 -xxhash==3.5.0 -yarl==1.20.0 -zipp==3.21.0 -zstandard==0.23.0 +# Basic web and async +fastapi +uvicorn[standard] +aiofiles +aiohttp +httpx + +# Data handling and parsing +pydantic +requests +python-dotenv +python-multipart +pyyaml +pypdf +docx2txt + +# Larger frameworks and libs +semantic-router +langchain +langchain-community +docarray[full] + +# Observability and telemetry +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +prometheus-fastapi-instrumentator + +# K8s, images, misc +kubernetes +pillow +shortuuid # Include the forked RouteLLM repository git+https://github.com/SAPD-Intel/RouteLLM.git From 23aeeeb46541abf62c0f08fdba2d210542e2003c Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Tue, 20 May 2025 20:26:28 +0000 Subject: [PATCH 07/20] added HUGGINGFACEHUB_API_TOKEN as an env variable Signed-off-by: Madison Evans --- .../deployment/docker_compose/compose.yaml | 3 ++ .../configs/routellm_config.yaml | 4 +- .../docker_compose/configs/router.yaml | 3 ++ .../configs/semantic_router_config.yaml | 3 ++ .../docker_compose/deploy_router.sh | 7 +++- comps/router/src/Dockerfile | 3 ++ comps/router/src/README.md | 37 ++++++++-------- .../controllers/base_controller.py | 13 +++--- .../controllers/controller_factory.py | 32 +++++++------- .../routellm_controller.py | 19 +++++---- .../semantic_router_controller.py | 24 +++++++---- comps/router/src/opea_router_microservice.py | 29 ++++++++----- comps/router/src/requirements.txt | 42 +++++++++---------- tests/router/test_router_routellm_on_xeon.sh | 3 ++ 14 files changed, 130 insertions(+), 92 deletions(-) diff --git a/comps/router/deployment/docker_compose/compose.yaml b/comps/router/deployment/docker_compose/compose.yaml index 6444723dd3..392bfd5007 100644 --- a/comps/router/deployment/docker_compose/compose.yaml +++ b/comps/router/deployment/docker_compose/compose.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + services: router_service: build: diff --git a/comps/router/deployment/docker_compose/configs/routellm_config.yaml b/comps/router/deployment/docker_compose/configs/routellm_config.yaml index 166faefb7e..605d3c35c1 100644 --- a/comps/router/deployment/docker_compose/configs/routellm_config.yaml +++ b/comps/router/deployment/docker_compose/configs/routellm_config.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + # which embedder backend to use ("huggingface" or "openai") embedding_provider: "huggingface" @@ -25,4 +28,3 @@ config: mf: checkpoint_path: "madison-evans/routellm-e5-base-v2" use_openai_embeddings: false - diff --git a/comps/router/deployment/docker_compose/configs/router.yaml b/comps/router/deployment/docker_compose/configs/router.yaml index 96ee065174..7118ecc9a7 100644 --- a/comps/router/deployment/docker_compose/configs/router.yaml +++ b/comps/router/deployment/docker_compose/configs/router.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + model_map: weak: endpoint: "${WEAK_ENDPOINT:-http://opea_router:8000/weak}" diff --git a/comps/router/deployment/docker_compose/configs/semantic_router_config.yaml b/comps/router/deployment/docker_compose/configs/semantic_router_config.yaml index c98b9f7a4d..97edc416b1 100644 --- a/comps/router/deployment/docker_compose/configs/semantic_router_config.yaml +++ b/comps/router/deployment/docker_compose/configs/semantic_router_config.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + embedding_provider: "huggingface" embedding_models: diff --git a/comps/router/deployment/docker_compose/deploy_router.sh b/comps/router/deployment/docker_compose/deploy_router.sh index f3e03476a1..d7795cb6ec 100644 --- a/comps/router/deployment/docker_compose/deploy_router.sh +++ b/comps/router/deployment/docker_compose/deploy_router.sh @@ -1,5 +1,8 @@ #!/bin/bash +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + # ======================== # OPEA Router Deploy Script # ======================== @@ -21,6 +24,8 @@ for VAR in "${REQUIRED_VARS[@]}"; do fi done +export HUGGINGFACEHUB_API_TOKEN="$HF_TOKEN" + # Default values for Docker image REGISTRY_AND_REPO=${REGISTRY_AND_REPO:-opea/router} TAG=${TAG:-latest} @@ -38,7 +43,7 @@ echo "" # Compose up echo "[INFO] Launching Docker Compose service..." -docker compose -f compose.yaml up --build +docker compose -f compose.yaml up --build # Wait a moment then check status sleep 2 diff --git a/comps/router/src/Dockerfile b/comps/router/src/Dockerfile index 3bafc90c3d..d0dffd9496 100644 --- a/comps/router/src/Dockerfile +++ b/comps/router/src/Dockerfile @@ -1,3 +1,6 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + FROM python:3.10-slim # Install git diff --git a/comps/router/src/README.md b/comps/router/src/README.md index bdcb6c3e52..20b2809c66 100644 --- a/comps/router/src/README.md +++ b/comps/router/src/README.md @@ -1,13 +1,13 @@ > Location: comps/router/src/README.md -A lightweight HTTP service that routes incoming text prompts to the most appropriate LLM back‑end (e.g. strong vs weak) and returns the target inference endpoint. It is built on the OPEA micro‑service SDK and can switch between two controller back‑ends: +A lightweight HTTP service that routes incoming text prompts to the most appropriate LLM back‑end (e.g. strong vs weak) and returns the target inference endpoint. It is built on the OPEA micro‑service SDK and can switch between two controller back‑ends: - RouteLLM (matrix‑factorisation, dataset‑driven) - Semantic‑Router (encoder‑based semantic similarity) The router is stateless; it inspects the prompt, consults the configured controller, and replies with a single URL such as http://opea_router:8000/strong. -## Build +## Build ``` # From repo root 📂 @@ -34,14 +34,13 @@ $ export CONTROLLER_TYPE=routellm # or semantic_router $ ./deploy_router.sh ``` -*The service listens on http://localhost:6000 (host‑mapped from container port 6000). Logs stream to STDOUT; use Ctrl‑C to stop or docker compose down to clean up.* +_The service listens on http://localhost:6000 (host‑mapped from container port 6000). Logs stream to STDOUT; use Ctrl‑C to stop or docker compose down to clean up._ ## API Usage -| Method | URL | Body schema | Success response | -|--------|------------|------------------------------------|----------------------------------------------| -| `POST` | `/v1/route`| `{ "text": "" }` | `200 OK` → `{ "url": "" }` | - +| Method | URL | Body schema | Success response | +| ------ | ----------- | ----------------------------- | ---------------------------------------------- | +| `POST` | `/v1/route` | `{ "text": "" }` | `200 OK` → `{ "url": "" }` | **Example** @@ -51,7 +50,7 @@ curl -X POST http://localhost:6000/v1/route \ -d '{"text": "Explain the Calvin cycle in photosynthesis."}' ``` -Expected JSON *(assuming the strong model wins the routing decision)*: +Expected JSON _(assuming the strong model wins the routing decision)_: ``` { @@ -59,17 +58,16 @@ Expected JSON *(assuming the strong model wins the routing decision)*: } ``` -## Configuration Reference - -| Variable / file | Purpose | Default | Where set | -|------------------------------------------|---------------------------------------------------|-------------------------------------------|--------------------| -| `HF_TOKEN` | Hugging Face auth token for encoder models | — | `.env` / shell | -| `OPENAI_API_KEY` | OpenAI key (only if `embedding_provider: openai`) | — | `.env` / shell | -| `CONTROLLER_TYPE` | `routellm` or `semantic_router` | `routellm` | env / `router.yaml`| -| `CONFIG_PATH` | Path to global router YAML | `/app/configs/router.yaml` | Compose env | -| `WEAK_ENDPOINT` / `STRONG_ENDPOINT` | Final inference URLs | container DNS | Compose env | -| `WEAK_MODEL_ID` / `STRONG_MODEL_ID` | Model IDs forwarded to controllers | `openai/gpt-3.5-turbo`, `openai/gpt-4` | Compose env | +## Configuration Reference +| Variable / file | Purpose | Default | Where set | +| ----------------------------------- | ------------------------------------------------- | -------------------------------------- | ------------------- | +| `HF_TOKEN` | Hugging Face auth token for encoder models | — | `.env` / shell | +| `OPENAI_API_KEY` | OpenAI key (only if `embedding_provider: openai`) | — | `.env` / shell | +| `CONTROLLER_TYPE` | `routellm` or `semantic_router` | `routellm` | env / `router.yaml` | +| `CONFIG_PATH` | Path to global router YAML | `/app/configs/router.yaml` | Compose env | +| `WEAK_ENDPOINT` / `STRONG_ENDPOINT` | Final inference URLs | container DNS | Compose env | +| `WEAK_MODEL_ID` / `STRONG_MODEL_ID` | Model IDs forwarded to controllers | `openai/gpt-3.5-turbo`, `openai/gpt-4` | Compose env | ## Troubleshooting @@ -81,7 +79,6 @@ Routed model `` not in `model_map` – make sure model_map in router.yaml Use docker compose logs -f router_service for real‑time debugging. - ## Testing Includes an end-to-end script for the RouteLLM controller: @@ -91,4 +88,4 @@ chmod +x tests/router/test_router_routellm.sh export HF_TOKEN="" export OPENAI_API_KEY="" tests/router/test_router_routellm.sh -``` \ No newline at end of file +``` diff --git a/comps/router/src/integrations/controllers/base_controller.py b/comps/router/src/integrations/controllers/base_controller.py index 2aeb4a4384..2601274970 100644 --- a/comps/router/src/integrations/controllers/base_controller.py +++ b/comps/router/src/integrations/controllers/base_controller.py @@ -1,15 +1,14 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + from abc import ABC, abstractmethod class BaseController(ABC): - """ - An abstract base controller class providing a framework for routing and - endpoint retrieval functionality. - """ + """An abstract base controller class providing a framework for routing and + endpoint retrieval functionality.""" @abstractmethod def route(self, messages, **kwargs): - """ - Determines the appropriate routing based on input messages. - """ + """Determines the appropriate routing based on input messages.""" pass diff --git a/comps/router/src/integrations/controllers/controller_factory.py b/comps/router/src/integrations/controllers/controller_factory.py index f8ac523e4a..a9c41f927e 100644 --- a/comps/router/src/integrations/controllers/controller_factory.py +++ b/comps/router/src/integrations/controllers/controller_factory.py @@ -1,15 +1,23 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os from typing import Dict + import yaml -from comps.router.src.integrations.controllers.routellm_controller.routellm_controller import RouteLLMController -from comps.router.src.integrations.controllers.semantic_router_controller.semantic_router_controller import SemanticRouterController -import os from dotenv import load_dotenv +from comps.router.src.integrations.controllers.routellm_controller.routellm_controller import RouteLLMController +from comps.router.src.integrations.controllers.semantic_router_controller.semantic_router_controller import ( + SemanticRouterController, +) + load_dotenv() -HF_TOKEN = os.getenv('HF_TOKEN', '') -OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', '') -CONTROLLER_TYPE = os.getenv('CONTROLLER_TYPE', None) +HF_TOKEN = os.getenv("HF_TOKEN", "") +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") +CONTROLLER_TYPE = os.getenv("CONTROLLER_TYPE", None) + class ControllerFactory: @@ -23,21 +31,17 @@ def get_controller_config(config_filename: str) -> Dict: raise FileNotFoundError(f"Configuration file '{config_filename}' not found.") from e except yaml.YAMLError as e: raise ValueError(f"Error parsing the configuration file: {e}") from e - + @staticmethod def factory(controller_config: str, model_map: Dict): """Returns an instance of the appropriate controller based on the controller_type.""" config = ControllerFactory.get_controller_config(controller_config) - + if CONTROLLER_TYPE == "routellm": return RouteLLMController(config=config, api_key=OPENAI_API_KEY, hf_token=HF_TOKEN, model_map=model_map) - + elif CONTROLLER_TYPE == "semantic_router": - return SemanticRouterController( - config=config, - api_key=OPENAI_API_KEY, - model_map=model_map - ) + return SemanticRouterController(config=config, api_key=OPENAI_API_KEY, model_map=model_map) else: raise ValueError(f"Unknown controller type: {CONTROLLER_TYPE}") diff --git a/comps/router/src/integrations/controllers/routellm_controller/routellm_controller.py b/comps/router/src/integrations/controllers/routellm_controller/routellm_controller.py index 8f5ee99971..1c17d96c4f 100644 --- a/comps/router/src/integrations/controllers/routellm_controller/routellm_controller.py +++ b/comps/router/src/integrations/controllers/routellm_controller/routellm_controller.py @@ -1,13 +1,19 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + import logging import os -from comps.router.src.integrations.controllers.base_controller import BaseController + from routellm.controller import Controller as RouteLLM_Controller +from comps.router.src.integrations.controllers.base_controller import BaseController + logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) + class RouteLLMController(BaseController): def __init__(self, config, hf_token=None, api_key=None, model_map=None): self.config = config @@ -37,7 +43,7 @@ def __init__(self, config, hf_token=None, api_key=None, model_map=None): # Extract strong/weak model IDs strong_model = self.model_map.get("strong", {}).get("model_id") - weak_model = self.model_map.get("weak", {}).get("model_id") + weak_model = self.model_map.get("weak", {}).get("model_id") if not strong_model or not weak_model: raise ValueError("model_map must include both 'strong' and 'weak' entries") @@ -53,8 +59,8 @@ def __init__(self, config, hf_token=None, api_key=None, model_map=None): strong_model=strong_model, weak_model=weak_model, config=nested, - hf_token=hf_token if provider == "huggingface" else None, - api_key= api_key if provider == "openai" else None, + hf_token=hf_token if provider == "huggingface" else None, + api_key=api_key if provider == "openai" else None, ) def route(self, messages): @@ -63,10 +69,7 @@ def route(self, messages): router=self.routing_algorithm, threshold=self.threshold, ) - endpoint_key = next( - (k for k, v in self.model_map.items() if v.get("model_id") == routed_name), - None - ) + endpoint_key = next((k for k, v in self.model_map.items() if v.get("model_id") == routed_name), None) if not endpoint_key: raise ValueError(f"Routed model '{routed_name}' not in model_map") return self.model_map[endpoint_key]["endpoint"] diff --git a/comps/router/src/integrations/controllers/semantic_router_controller/semantic_router_controller.py b/comps/router/src/integrations/controllers/semantic_router_controller/semantic_router_controller.py index 80000c7012..271b16fa05 100644 --- a/comps/router/src/integrations/controllers/semantic_router_controller/semantic_router_controller.py +++ b/comps/router/src/integrations/controllers/semantic_router_controller/semantic_router_controller.py @@ -1,12 +1,17 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + import logging import os -from comps.cores.telemetry.opea_telemetry import opea_telemetry -from comps.router.src.integrations.controllers.base_controller import BaseController -from semantic_router.routers import SemanticRouter -from semantic_router.encoders import OpenAIEncoder, HuggingFaceEncoder -from semantic_router import Route + # from decorators import log_latency from dotenv import load_dotenv +from semantic_router import Route +from semantic_router.encoders import HuggingFaceEncoder, OpenAIEncoder +from semantic_router.routers import SemanticRouter + +from comps.cores.telemetry.opea_telemetry import opea_telemetry +from comps.router.src.integrations.controllers.base_controller import BaseController load_dotenv() hf_token = os.getenv("HF_TOKEN", "") @@ -17,6 +22,7 @@ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) + class SemanticRouterController(BaseController): def __init__(self, config, api_key=None, model_map=None): self.config = config @@ -24,7 +30,7 @@ def __init__(self, config, api_key=None, model_map=None): # 1) grab provider + model mapping provider = config.get("embedding_provider", "").lower() - models = config.get("embedding_models", {}) + models = config.get("embedding_models", {}) if provider not in {"huggingface", "openai"}: raise ValueError(f"Unsupported embedding_provider: '{provider}'") @@ -41,7 +47,7 @@ def __init__(self, config, api_key=None, model_map=None): model_kwargs={"token": hf_token}, tokenizer_kwargs={"token": hf_token}, ) - else: + else: if not api_key: raise ValueError("valid api key is required for selected model provider") os.environ["OPENAI_API_KEY"] = api_key @@ -61,8 +67,8 @@ def _build_route_layer(self): @opea_telemetry def route(self, messages): - """ - Determines which inference endpoint to use based on the provided messages. + """Determines which inference endpoint to use based on the provided messages. + It looks up the model_map to retrieve the nested endpoint value. """ query = messages[0]["content"] diff --git a/comps/router/src/opea_router_microservice.py b/comps/router/src/opea_router_microservice.py index 062011d737..829f895f40 100644 --- a/comps/router/src/opea_router_microservice.py +++ b/comps/router/src/opea_router_microservice.py @@ -1,20 +1,27 @@ -import os +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + import logging +import os + import yaml +from pydantic import BaseModel, Field + from comps import ( CustomLogger, - TextDoc, ServiceType, - register_microservice, + TextDoc, opea_microservices, + register_microservice, ) from comps.router.src.integrations.controllers.controller_factory import ControllerFactory -from pydantic import BaseModel, Field + # Data model for endpoint response class RouteEndpointDoc(BaseModel): url: str = Field(..., description="URL of the chosen inference endpoint") + # Set up logging logger = CustomLogger("opea_router_microservice") logflag = os.getenv("LOGFLAG", False) @@ -25,6 +32,7 @@ class RouteEndpointDoc(BaseModel): _controller_factory = None _controller = None + def _load_config(): global _config_data, _controller_factory, _controller @@ -50,17 +58,15 @@ def _load_config(): except KeyError: raise RuntimeError(f"No config path for controller_type='{controller_type}' in global config") - - _controller = _controller_factory.factory( - controller_config=controller_config_path, - model_map=model_map - ) + _controller = _controller_factory.factory(controller_config=controller_config_path, model_map=model_map) logger.info("[Router] Controller re-initialized successfully.") + # Initial config load at startup _load_config() + @register_microservice( name="opea_service@router", service_type=ServiceType.LLM, @@ -71,8 +77,8 @@ def _load_config(): output_datatype=RouteEndpointDoc, ) def route_microservice(input: TextDoc) -> RouteEndpointDoc: - """ - Microservice that decides which model endpoint is best for the given text input. + """Microservice that decides which model endpoint is best for the given text input. + Returns only the route URL (does not forward). """ if not _controller: @@ -91,6 +97,7 @@ def route_microservice(input: TextDoc) -> RouteEndpointDoc: logger.error(f"[Router] Error during model routing: {e}") raise + if __name__ == "__main__": logger.info("OPEA Router Microservice is starting...") opea_microservices["opea_service@router"].start() diff --git a/comps/router/src/requirements.txt b/comps/router/src/requirements.txt index 998f5f7581..9db871e3f7 100644 --- a/comps/router/src/requirements.txt +++ b/comps/router/src/requirements.txt @@ -1,36 +1,36 @@ -# Basic web and async -fastapi -uvicorn[standard] aiofiles aiohttp -httpx - -# Data handling and parsing -pydantic -requests -python-dotenv -python-multipart -pyyaml -pypdf +docarray[full] docx2txt +# Basic web and async +fastapi -# Larger frameworks and libs -semantic-router +# Include the forked RouteLLM repository +git+https://github.com/SAPD-Intel/RouteLLM.git +httpx + +# K8s, images, misc +kubernetes langchain langchain-community -docarray[full] # Observability and telemetry opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk +pillow prometheus-fastapi-instrumentator -# K8s, images, misc -kubernetes -pillow -shortuuid +# Data handling and parsing +pydantic +pypdf +python-dotenv +python-multipart +pyyaml +requests -# Include the forked RouteLLM repository -git+https://github.com/SAPD-Intel/RouteLLM.git +# Larger frameworks and libs +semantic-router +shortuuid +uvicorn[standard] diff --git a/tests/router/test_router_routellm_on_xeon.sh b/tests/router/test_router_routellm_on_xeon.sh index a16b4b0a03..43794b664b 100644 --- a/tests/router/test_router_routellm_on_xeon.sh +++ b/tests/router/test_router_routellm_on_xeon.sh @@ -1,4 +1,7 @@ #!/usr/bin/env bash +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + # End-to-end test – Router micro-service, RouteLLM controller (CPU/Xeon) set -xeuo pipefail From 5aab6bc1b650454ffb2be7ed46cfa0706699d4c8 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Wed, 21 May 2025 15:01:47 +0000 Subject: [PATCH 08/20] removed hard OPENAI dependency and made OPENAI_API_KEY default to empty str Signed-off-by: Madison Evans --- comps/router/deployment/docker_compose/compose.yaml | 2 +- comps/router/deployment/docker_compose/deploy_router.sh | 2 +- tests/router/test_router_routellm_on_xeon.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/comps/router/deployment/docker_compose/compose.yaml b/comps/router/deployment/docker_compose/compose.yaml index 392bfd5007..209ec68534 100644 --- a/comps/router/deployment/docker_compose/compose.yaml +++ b/comps/router/deployment/docker_compose/compose.yaml @@ -22,7 +22,7 @@ services: STRONG_MODEL_ID: ${STRONG_MODEL_ID:-openai/gpt-4} HF_TOKEN: ${HF_TOKEN:?set HF_TOKEN} - OPENAI_API_KEY: ${OPENAI_API_KEY:?set OPENAI_API_KEY} + OPENAI_API_KEY: ${OPENAI_API_KEY:-""} CONTROLLER_TYPE: ${CONTROLLER_TYPE:-routellm} diff --git a/comps/router/deployment/docker_compose/deploy_router.sh b/comps/router/deployment/docker_compose/deploy_router.sh index d7795cb6ec..7af015a752 100644 --- a/comps/router/deployment/docker_compose/deploy_router.sh +++ b/comps/router/deployment/docker_compose/deploy_router.sh @@ -14,7 +14,7 @@ if [ -f .env ]; then fi # Required variables -REQUIRED_VARS=("HF_TOKEN" "OPENAI_API_KEY") +REQUIRED_VARS=("HF_TOKEN") # Validate that all required variables are set for VAR in "${REQUIRED_VARS[@]}"; do diff --git a/tests/router/test_router_routellm_on_xeon.sh b/tests/router/test_router_routellm_on_xeon.sh index 43794b664b..2ecae73724 100644 --- a/tests/router/test_router_routellm_on_xeon.sh +++ b/tests/router/test_router_routellm_on_xeon.sh @@ -14,7 +14,7 @@ CONTAINER=opea_router # Required secrets : "${HF_TOKEN:?Need HF_TOKEN}" -: "${OPENAI_API_KEY:?Need OPENAI_API_KEY}" +: "${OPENAI_API_KEY:=""}" # Set default image info (matches deploy script) REGISTRY_AND_REPO=${REGISTRY_AND_REPO:-opea/router} From b4c86b065fdc8636a634826326ab6e03710db833 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Wed, 21 May 2025 15:23:58 +0000 Subject: [PATCH 09/20] removed empty str fallback for OPENAI_API_KEY var Signed-off-by: Madison Evans --- tests/router/test_router_routellm_on_xeon.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/router/test_router_routellm_on_xeon.sh b/tests/router/test_router_routellm_on_xeon.sh index 2ecae73724..e236951ea7 100644 --- a/tests/router/test_router_routellm_on_xeon.sh +++ b/tests/router/test_router_routellm_on_xeon.sh @@ -14,7 +14,7 @@ CONTAINER=opea_router # Required secrets : "${HF_TOKEN:?Need HF_TOKEN}" -: "${OPENAI_API_KEY:=""}" +: "${OPENAI_API_KEY:=}" # Set default image info (matches deploy script) REGISTRY_AND_REPO=${REGISTRY_AND_REPO:-opea/router} From 9262b057918471c32198e175673db16c5bb47dcf Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Wed, 21 May 2025 15:45:47 +0000 Subject: [PATCH 10/20] target localhost in RouteLLM E2E test to avoid Docker network issues Signed-off-by: Madison Evans --- tests/router/test_router_routellm_on_xeon.sh | 24 ++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tests/router/test_router_routellm_on_xeon.sh b/tests/router/test_router_routellm_on_xeon.sh index e236951ea7..89a5f5b940 100644 --- a/tests/router/test_router_routellm_on_xeon.sh +++ b/tests/router/test_router_routellm_on_xeon.sh @@ -7,7 +7,7 @@ set -xeuo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" WORKPATH="$(cd "$SCRIPT_DIR/../.." && pwd)" -host_ip=$(hostname -I | awk '{print $1}') +host=127.0.0.1 LOG_PATH="$WORKPATH/tests" ROUTER_PORT=6000 CONTAINER=opea_router @@ -16,7 +16,6 @@ CONTAINER=opea_router : "${HF_TOKEN:?Need HF_TOKEN}" : "${OPENAI_API_KEY:=}" -# Set default image info (matches deploy script) REGISTRY_AND_REPO=${REGISTRY_AND_REPO:-opea/router} TAG=${TAG:-latest} @@ -36,16 +35,22 @@ start_router() { validate() { # weak route - rsp=$(curl -s http://${host_ip}:${ROUTER_PORT}/v1/route \ - -X POST -H 'Content-Type: application/json' \ - -d '{"text":"What is 2 + 2?"}') + rsp=$( + curl -s --noproxy localhost,127.0.0.1 \ + -X POST http://${host}:${ROUTER_PORT}/v1/route \ + -H 'Content-Type: application/json' \ + -d '{"text":"What is 2 + 2?"}' + ) [[ $rsp == *"weak"* ]] || { echo "weak routing failed ($rsp)"; exit 1; } # strong route - hard='Explain Gödel’s incompleteness theorem in formal terms.' - rsp=$(curl -s http://${host_ip}:${ROUTER_PORT}/v1/route \ - -X POST -H 'Content-Type: application/json' \ - -d "{\"text\":\"$hard\"}") + hard='Explain the Gödel incompleteness theorem in formal terms.' + rsp=$( + curl -s --noproxy localhost,127.0.0.1 \ + -X POST http://${host}:${ROUTER_PORT}/v1/route \ + -H 'Content-Type: application/json' \ + -d "{\"text\":\"$hard\"}" + ) [[ $rsp == *"strong"* ]] || { echo "strong routing failed ($rsp)"; exit 1; } } @@ -59,4 +64,5 @@ cleanup build_image start_router validate + echo "✅ RouteLLM controller test passed." From 64c850724111d49e391897d79985ec26822af189 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Wed, 21 May 2025 17:39:19 +0000 Subject: [PATCH 11/20] fixed e2e test issue for routellm test Signed-off-by: Madison Evans --- tests/router/test_router_routellm_on_xeon.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) mode change 100644 => 100755 tests/router/test_router_routellm_on_xeon.sh diff --git a/tests/router/test_router_routellm_on_xeon.sh b/tests/router/test_router_routellm_on_xeon.sh old mode 100644 new mode 100755 index 89a5f5b940..4880bcb8c4 --- a/tests/router/test_router_routellm_on_xeon.sh +++ b/tests/router/test_router_routellm_on_xeon.sh @@ -29,6 +29,9 @@ build_image() { start_router() { cd "$WORKPATH/comps/router/deployment/docker_compose" + + export CONTROLLER_TYPE=routellm + docker compose -f compose.yaml up router_service -d sleep 20 } @@ -44,7 +47,7 @@ validate() { [[ $rsp == *"weak"* ]] || { echo "weak routing failed ($rsp)"; exit 1; } # strong route - hard='Explain the Gödel incompleteness theorem in formal terms.' + hard='Given a 100x100 grid where each cell is independently colored black or white such that for every cell the sum of black cells in its row, column, and both main diagonals is a distinct prime number, determine whether there exists a unique configuration of the grid that satisfies this condition and, if so, compute the total number of black cells in that configuration.' rsp=$( curl -s --noproxy localhost,127.0.0.1 \ -X POST http://${host}:${ROUTER_PORT}/v1/route \ From cf1622c425d3a5e142f97a71565277ff7293c6a1 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Wed, 28 May 2025 18:58:04 +0000 Subject: [PATCH 12/20] changed the checkpoint path for the custom mf model weights. Now using 'routellm-e5-base-V2' under OPEA HF group Signed-off-by: Madison Evans --- .../deployment/docker_compose/configs/routellm_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comps/router/deployment/docker_compose/configs/routellm_config.yaml b/comps/router/deployment/docker_compose/configs/routellm_config.yaml index 605d3c35c1..5f9e8a0014 100644 --- a/comps/router/deployment/docker_compose/configs/routellm_config.yaml +++ b/comps/router/deployment/docker_compose/configs/routellm_config.yaml @@ -26,5 +26,5 @@ config: checkpoint_path: "routellm/bert_gpt4_augmented" mf: - checkpoint_path: "madison-evans/routellm-e5-base-v2" + checkpoint_path: "OPEA/routellm-e5-base-v2" use_openai_embeddings: false From efdd653d7eb3b1d3796893f6d40344915aa99e25 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Wed, 28 May 2025 19:05:07 +0000 Subject: [PATCH 13/20] moved RouteEndpointDoc class into 'api_protocol.py' under cores/proto Signed-off-by: Madison Evans --- comps/cores/proto/api_protocol.py | 3 +++ comps/router/src/opea_router_microservice.py | 10 +--------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py index ff3988b8e9..012c0bb115 100644 --- a/comps/cores/proto/api_protocol.py +++ b/comps/cores/proto/api_protocol.py @@ -1013,3 +1013,6 @@ class FineTuningJobCheckpoint(BaseModel): step_number: Optional[int] = None """The step number that the checkpoint was created at.""" + +class RouteEndpointDoc(BaseModel): + url: str = Field(..., description="URL of the chosen inference endpoint") \ No newline at end of file diff --git a/comps/router/src/opea_router_microservice.py b/comps/router/src/opea_router_microservice.py index 829f895f40..29c16981ad 100644 --- a/comps/router/src/opea_router_microservice.py +++ b/comps/router/src/opea_router_microservice.py @@ -1,11 +1,8 @@ # Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import logging import os - import yaml -from pydantic import BaseModel, Field from comps import ( CustomLogger, @@ -15,12 +12,7 @@ register_microservice, ) from comps.router.src.integrations.controllers.controller_factory import ControllerFactory - - -# Data model for endpoint response -class RouteEndpointDoc(BaseModel): - url: str = Field(..., description="URL of the chosen inference endpoint") - +from comps.cores.proto.api_protocol import RouteEndpointDoc # Set up logging logger = CustomLogger("opea_router_microservice") From 2d8e71e3ba38d7ce875466984249f50fa0d21aa2 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Wed, 28 May 2025 19:14:23 +0000 Subject: [PATCH 14/20] added 'router-compose.yaml' to workflows/docker/compose Signed-off-by: Madison Evans --- .github/workflows/docker/compose/router-compose.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 .github/workflows/docker/compose/router-compose.yaml diff --git a/.github/workflows/docker/compose/router-compose.yaml b/.github/workflows/docker/compose/router-compose.yaml new file mode 100644 index 0000000000..e53789d4d2 --- /dev/null +++ b/.github/workflows/docker/compose/router-compose.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +services: + router: + build: + dockerfile: comps/router/src/Dockerfile + image: ${REGISTRY:-opea}/opea_router:${TAG:-latest} From 9eb977abc15d9b7c3f1ef23318cac4ac59b63727 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Wed, 28 May 2025 19:16:46 +0000 Subject: [PATCH 15/20] pre commit format updates Signed-off-by: Madison Evans --- comps/cores/proto/api_protocol.py | 3 ++- comps/router/src/opea_router_microservice.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py index 012c0bb115..691ece0fb9 100644 --- a/comps/cores/proto/api_protocol.py +++ b/comps/cores/proto/api_protocol.py @@ -1014,5 +1014,6 @@ class FineTuningJobCheckpoint(BaseModel): step_number: Optional[int] = None """The step number that the checkpoint was created at.""" + class RouteEndpointDoc(BaseModel): - url: str = Field(..., description="URL of the chosen inference endpoint") \ No newline at end of file + url: str = Field(..., description="URL of the chosen inference endpoint") diff --git a/comps/router/src/opea_router_microservice.py b/comps/router/src/opea_router_microservice.py index 29c16981ad..83960816f3 100644 --- a/comps/router/src/opea_router_microservice.py +++ b/comps/router/src/opea_router_microservice.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import os + import yaml from comps import ( @@ -11,8 +12,8 @@ opea_microservices, register_microservice, ) -from comps.router.src.integrations.controllers.controller_factory import ControllerFactory from comps.cores.proto.api_protocol import RouteEndpointDoc +from comps.router.src.integrations.controllers.controller_factory import ControllerFactory # Set up logging logger = CustomLogger("opea_router_microservice") From 8db8aa2d843dcc4ffac2b0b042d6c3b6e221f768 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Wed, 28 May 2025 19:58:16 +0000 Subject: [PATCH 16/20] removed the forked version of RouteLLM from requirements.txt dependency. Now pulls from the referenced repo and then applies the patch located at 'comps/router/src/hf_compatibility.patch' Signed-off-by: Madison Evans --- .../docker_compose/deploy_router.sh | 0 comps/router/src/Dockerfile | 7 +- comps/router/src/hf_compatibility.patch | 326 ++++++++++++++++++ comps/router/src/requirements.txt | 2 - 4 files changed, 332 insertions(+), 3 deletions(-) mode change 100644 => 100755 comps/router/deployment/docker_compose/deploy_router.sh create mode 100644 comps/router/src/hf_compatibility.patch diff --git a/comps/router/deployment/docker_compose/deploy_router.sh b/comps/router/deployment/docker_compose/deploy_router.sh old mode 100644 new mode 100755 diff --git a/comps/router/src/Dockerfile b/comps/router/src/Dockerfile index d0dffd9496..e5c91358d6 100644 --- a/comps/router/src/Dockerfile +++ b/comps/router/src/Dockerfile @@ -18,7 +18,12 @@ COPY comps /home/user/comps # Install deps from the router’s requirements.txt RUN pip install --no-cache-dir --upgrade pip \ - && pip install --no-cache-dir -r /home/user/comps/router/src/requirements.txt + && pip install --no-cache-dir -r /home/user/comps/router/src/requirements.txt \ + \ + && git clone --depth 1 https://github.com/lm-sys/RouteLLM.git /tmp/RouteLLM \ + && patch -p1 -d /tmp/RouteLLM < /home/user/comps/router/src/hf_compatibility.patch \ + && pip install --no-cache-dir /tmp/RouteLLM \ + && rm -rf /tmp/RouteLLM # Make imports work ENV PYTHONPATH=/home/user diff --git a/comps/router/src/hf_compatibility.patch b/comps/router/src/hf_compatibility.patch new file mode 100644 index 0000000000..585b78824f --- /dev/null +++ b/comps/router/src/hf_compatibility.patch @@ -0,0 +1,326 @@ +diff -ruN upstream-RouteLLM/routellm/controller.py patched-RouteLLM/routellm/controller.py +--- upstream-RouteLLM/routellm/controller.py 2025-05-28 19:32:46.029844725 +0000 ++++ patched-RouteLLM/routellm/controller.py 2025-05-28 19:32:14.595998148 +0000 +@@ -9,24 +9,6 @@ + + from routellm.routers.routers import ROUTER_CLS + +-# Default config for routers augmented using golden label data from GPT-4. +-# This is exactly the same as config.example.yaml. +-GPT_4_AUGMENTED_CONFIG = { +- "sw_ranking": { +- "arena_battle_datasets": [ +- "lmsys/lmsys-arena-human-preference-55k", +- "routellm/gpt4_judge_battles", +- ], +- "arena_embedding_datasets": [ +- "routellm/arena_battles_embeddings", +- "routellm/gpt4_judge_battles_embeddings", +- ], +- }, +- "causal_llm": {"checkpoint_path": "routellm/causal_llm_gpt4_augmented"}, +- "bert": {"checkpoint_path": "routellm/bert_gpt4_augmented"}, +- "mf": {"checkpoint_path": "routellm/mf_gpt4_augmented"}, +-} +- + + class RoutingError(Exception): + pass +@@ -48,7 +30,9 @@ + api_base: Optional[str] = None, + api_key: Optional[str] = None, + progress_bar: bool = False, ++ hf_token: Optional[str] = None, # Add hf_token as a parameter + ): ++ self.hf_token = hf_token # Store the hf_token + self.model_pair = ModelPair(strong=strong_model, weak=weak_model) + self.routers = {} + self.api_base = api_base +@@ -57,7 +41,7 @@ + self.progress_bar = progress_bar + + if config is None: +- config = GPT_4_AUGMENTED_CONFIG ++ raise ValueError("Config cannot be None. Please provide a valid configuration dictionary.") + + router_pbar = None + if progress_bar: +@@ -67,7 +51,8 @@ + for router in routers: + if router_pbar is not None: + router_pbar.set_description(f"Loading {router}") +- self.routers[router] = ROUTER_CLS[router](**config.get(router, {})) ++ self.routers[router] = ROUTER_CLS[router](hf_token=self.hf_token, **config.get(router, {})) ++ + + # Some Python magic to match the OpenAI Python SDK + self.chat = SimpleNamespace( +@@ -101,6 +86,14 @@ + f"Invalid model {model}. Model name must be of the format 'router-[router name]-[threshold]." + ) + return router, threshold ++ ++ def get_routed_model(self, messages: list, router: str, threshold: float) -> str: ++ """ ++ Get the routed model for a given message using the specified router and threshold. ++ """ ++ self._validate_router_threshold(router, threshold) ++ routed_model = self._get_routed_model_for_completion(messages, router, threshold) ++ return routed_model + + def _get_routed_model_for_completion( + self, messages: list, router: str, threshold: float +diff -ruN upstream-RouteLLM/routellm/routers/matrix_factorization/model.py patched-RouteLLM/routellm/routers/matrix_factorization/model.py +--- upstream-RouteLLM/routellm/routers/matrix_factorization/model.py 2025-05-28 19:32:46.084844456 +0000 ++++ patched-RouteLLM/routellm/routers/matrix_factorization/model.py 2025-05-28 19:32:14.651997875 +0000 +@@ -1,7 +1,14 @@ + import torch + from huggingface_hub import PyTorchModelHubMixin +- ++from transformers import AutoTokenizer, AutoModel + from routellm.routers.similarity_weighted.utils import OPENAI_CLIENT ++import logging ++ ++logging.basicConfig( ++ level=logging.INFO, ++ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ++) ++logger = logging.getLogger(__name__) + + MODEL_IDS = { + "RWKV-4-Raven-14B": 0, +@@ -70,7 +77,6 @@ + "zephyr-7b-beta": 63, + } + +- + class MFModel(torch.nn.Module, PyTorchModelHubMixin): + def __init__( + self, +@@ -79,51 +85,80 @@ + text_dim, + num_classes, + use_proj, ++ use_openai_embeddings=False, # Default: Hugging Face embeddings ++ embedding_model_name="BAAI/bge-base-en", # Match notebook ++ hf_token=None, # Hugging Face API token + ): + super().__init__() +- self._name = "TextMF" + self.use_proj = use_proj +- self.P = torch.nn.Embedding(num_models, dim) ++ self.use_openai_embeddings = use_openai_embeddings ++ self.hf_token = hf_token ++ self.embedding_model_name = embedding_model_name + +- self.embedding_model = "text-embedding-3-small" ++ # Model embedding matrix ++ self.P = torch.nn.Embedding(num_models, dim) + + if self.use_proj: +- self.text_proj = torch.nn.Sequential( +- torch.nn.Linear(text_dim, dim, bias=False) +- ) ++ self.text_proj = torch.nn.Linear(text_dim, dim, bias=False) + else: +- assert ( +- text_dim == dim +- ), f"text_dim {text_dim} must be equal to dim {dim} if not using projection" +- +- self.classifier = torch.nn.Sequential( +- torch.nn.Linear(dim, num_classes, bias=False) +- ) ++ assert text_dim == dim, f"text_dim {text_dim} must be equal to dim {dim} if not using projection" ++ ++ self.classifier = torch.nn.Linear(dim, num_classes, bias=False) ++ ++ if not self.use_openai_embeddings: ++ logger.info(f"Loading Hugging Face tokenizer and model: {self.embedding_model_name}") ++ ++ # Load tokenizer & model exactly as in the notebook ++ self.tokenizer = AutoTokenizer.from_pretrained( ++ self.embedding_model_name, ++ token=hf_token ++ ) ++ self.embedding_model = AutoModel.from_pretrained( ++ self.embedding_model_name, ++ token=hf_token ++ ) ++ self.embedding_model.eval() # Set to inference mode ++ self.embedding_model.to(self.get_device()) + + def get_device(self): + return self.P.weight.device + ++ def get_prompt_embedding(self, prompt): ++ """Generate sentence embedding using mean pooling (matches notebook).""" ++ ++ inputs = self.tokenizer( ++ prompt, ++ padding=True, ++ truncation=True, ++ return_tensors="pt" ++ ).to(self.get_device()) ++ ++ with torch.no_grad(): ++ outputs = self.embedding_model(**inputs) ++ last_hidden_state = outputs.last_hidden_state ++ ++ # Mean pooling over token embeddings ++ prompt_embed = last_hidden_state.mean(dim=1).squeeze() ++ ++ return prompt_embed ++ + def forward(self, model_id, prompt): + model_id = torch.tensor(model_id, dtype=torch.long).to(self.get_device()) +- + model_embed = self.P(model_id) + model_embed = torch.nn.functional.normalize(model_embed, p=2, dim=1) ++ prompt_embed = self.get_prompt_embedding(prompt) + +- prompt_embed = ( +- OPENAI_CLIENT.embeddings.create(input=[prompt], model=self.embedding_model) +- .data[0] +- .embedding +- ) +- prompt_embed = torch.tensor(prompt_embed, device=self.get_device()) +- prompt_embed = self.text_proj(prompt_embed) ++ if self.use_proj: ++ prompt_embed = self.text_proj(prompt_embed) + + return self.classifier(model_embed * prompt_embed).squeeze() + + @torch.no_grad() + def pred_win_rate(self, model_a, model_b, prompt): + logits = self.forward([model_a, model_b], prompt) +- winrate = torch.sigmoid(logits[0] - logits[1]).item() ++ raw_diff = logits[0] - logits[1] ++ winrate = torch.sigmoid(raw_diff).item() + return winrate + + def load(self, path): +- self.load_state_dict(torch.load(path)) ++ self.load_state_dict(torch.load(path)) +\ No newline at end of file +diff -ruN upstream-RouteLLM/routellm/routers/routers.py patched-RouteLLM/routellm/routers/routers.py +--- upstream-RouteLLM/routellm/routers/routers.py 2025-05-28 19:32:46.084844456 +0000 ++++ patched-RouteLLM/routellm/routers/routers.py 2025-05-28 19:32:14.651997875 +0000 +@@ -1,7 +1,7 @@ + import abc + import functools + import random +- ++from transformers import AutoTokenizer, AutoModel + import numpy as np + import torch + from datasets import concatenate_datasets, load_dataset +@@ -21,6 +21,13 @@ + compute_tiers, + preprocess_battles, + ) ++import logging ++ ++logging.basicConfig( ++ level=logging.INFO, ++ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ++) ++logger = logging.getLogger(__name__) + + + def no_parallel(cls): +@@ -211,18 +218,47 @@ + def __init__( + self, + checkpoint_path, +- # This is the model pair for scoring at inference time, +- # and can be different from the model pair used for routing. + strong_model="gpt-4-1106-preview", + weak_model="mixtral-8x7b-instruct-v0.1", + hidden_size=128, +- num_models=64, +- text_dim=1536, ++ num_models=None, ++ text_dim=None, + num_classes=1, + use_proj=True, ++ use_openai_embeddings=True, ++ embedding_model_name=None, ++ hf_token=None, + ): ++ """ ++ A simplified constructor that flattens the logic for: ++ 1) Setting num_models from MODEL_IDS, ++ 2) Determining embedding_model_name defaults, ++ 3) Setting text_dim for OpenAI vs. HF embeddings, ++ 4) Initializing the MFModel, ++ 5) Setting strong/weak model IDs. ++ """ + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + ++ # Default num_models to the length of MODEL_IDS if not provided ++ num_models = num_models or len(MODEL_IDS) ++ ++ # Decide which embedding model_name to use if none provided ++ if not embedding_model_name: ++ if use_openai_embeddings: ++ # e.g. "text-embedding-ada-002" or your default ++ embedding_model_name = "text-embedding-3-small" ++ else: ++ raise ValueError("Missing model id in config file. Please add a valid model id") ++ ++ # Decide text_dim if not provided ++ if text_dim is None: ++ if use_openai_embeddings: ++ # e.g., 1536 for text-embedding-ada-002 ++ text_dim = 1536 ++ else: ++ text_dim = self._infer_hf_text_dim(embedding_model_name) ++ ++ # Initialize the MFModel + self.model = MFModel.from_pretrained( + checkpoint_path, + dim=hidden_size, +@@ -230,14 +266,40 @@ + text_dim=text_dim, + num_classes=num_classes, + use_proj=use_proj, +- ) +- self.model = self.model.eval().to(device) ++ use_openai_embeddings=use_openai_embeddings, ++ embedding_model_name=embedding_model_name, ++ hf_token=hf_token, ++ ).eval().to(device) ++ ++ # Store strong/weak model IDs + self.strong_model_id = MODEL_IDS[strong_model] + self.weak_model_id = MODEL_IDS[weak_model] + ++ @staticmethod ++ def _infer_hf_text_dim(embedding_model_name: str) -> int: ++ """ ++ Helper to load a huggingface model and extract its hidden size. ++ Immediately frees model from memory. ++ """ ++ tokenizer = AutoTokenizer.from_pretrained(embedding_model_name) ++ hf_model = AutoModel.from_pretrained(embedding_model_name) ++ dim = hf_model.config.hidden_size ++ ++ del tokenizer ++ del hf_model ++ ++ return dim ++ + def calculate_strong_win_rate(self, prompt): ++ """ ++ Scores the prompt using the MF model to see how ++ often the 'strong' model is predicted to win ++ over the 'weak' model. ++ """ + winrate = self.model.pred_win_rate( +- self.strong_model_id, self.weak_model_id, prompt ++ self.strong_model_id, ++ self.weak_model_id, ++ prompt + ) + return winrate + diff --git a/comps/router/src/requirements.txt b/comps/router/src/requirements.txt index 9db871e3f7..378a31051e 100644 --- a/comps/router/src/requirements.txt +++ b/comps/router/src/requirements.txt @@ -5,8 +5,6 @@ docx2txt # Basic web and async fastapi -# Include the forked RouteLLM repository -git+https://github.com/SAPD-Intel/RouteLLM.git httpx # K8s, images, misc From beadac5c8f666114940fdef2045c2eaef80ef86a Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Wed, 28 May 2025 20:04:00 +0000 Subject: [PATCH 17/20] updated README to reflect the patch usage for modified RouteLLM repo Signed-off-by: Madison Evans --- comps/router/src/README.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/comps/router/src/README.md b/comps/router/src/README.md index 20b2809c66..a9d59d73b9 100644 --- a/comps/router/src/README.md +++ b/comps/router/src/README.md @@ -36,6 +36,33 @@ $ ./deploy_router.sh _The service listens on http://localhost:6000 (host‑mapped from container port 6000). Logs stream to STDOUT; use Ctrl‑C to stop or docker compose down to clean up._ +## RouteLLM compatibility patch + +The upstream **RouteLLM** project is geared toward OpenAI embeddings and GPT-4–augmented +checkpoints. +We include a small patch – `hf_compatibility.patch` – that: + +- adds a `hf_token` plumb-through, +- switches the Matrix-Factorisation router to Hugging Face sentence embeddings, +- removes hard-coded GPT-4 “golden-label” defaults. + +**Container users:** +The Dockerfile applies the patch automatically during `docker build`, so you don’t have to do anything. + +**Local development:** + +```bash +# 1. Clone upstream RouteLLM +git clone https://github.com/lm-sys/RouteLLM.git +cd RouteLLM + +# 2. Apply the patch shipped with this repo +patch -p1 < ../comps/router/src/hf_compatibility.patch + +# 3. Install the patched library +pip install -e . +``` + ## API Usage | Method | URL | Body schema | Success response | From 5316753d8e3bc40de95dc63ae0cef762cde3b0e9 Mon Sep 17 00:00:00 2001 From: Madison Evans Date: Fri, 30 May 2025 02:12:49 +0000 Subject: [PATCH 18/20] added H1 title to README Signed-off-by: Madison Evans --- comps/router/src/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/comps/router/src/README.md b/comps/router/src/README.md index a9d59d73b9..ee775a9ad1 100644 --- a/comps/router/src/README.md +++ b/comps/router/src/README.md @@ -1,3 +1,5 @@ +# Router Microservice + > Location: comps/router/src/README.md A lightweight HTTP service that routes incoming text prompts to the most appropriate LLM back‑end (e.g. strong vs weak) and returns the target inference endpoint. It is built on the OPEA micro‑service SDK and can switch between two controller back‑ends: From 96fcda367ae65c53fc762000960f94614e753476 Mon Sep 17 00:00:00 2001 From: Haim Barad Date: Mon, 9 Jun 2025 08:28:57 +0300 Subject: [PATCH 19/20] comply with formatting requests. Signed-off-by: Haim Barad --- .../workflows/docker/compose/router-compose.yaml | 2 +- .../router/deployment/docker_compose/compose.yaml | 2 +- .../docker_compose/configs/routellm_config.yaml | 1 - .../deployment/docker_compose/configs/router.yaml | 2 +- .../configs/semantic_router_config.yaml | 2 +- .../deployment/docker_compose/deploy_router.sh | 2 +- comps/router/src/Dockerfile | 15 +++------------ comps/router/src/README.md | 4 ++-- .../semantic_router_controller.py | 4 ++-- comps/router/src/requirements.txt | 13 +------------ 10 files changed, 13 insertions(+), 34 deletions(-) diff --git a/.github/workflows/docker/compose/router-compose.yaml b/.github/workflows/docker/compose/router-compose.yaml index e53789d4d2..a3aa7e8f7a 100644 --- a/.github/workflows/docker/compose/router-compose.yaml +++ b/.github/workflows/docker/compose/router-compose.yaml @@ -6,4 +6,4 @@ services: router: build: dockerfile: comps/router/src/Dockerfile - image: ${REGISTRY:-opea}/opea_router:${TAG:-latest} + image: ${REGISTRY:-opea}/opea-router:${TAG:-latest} diff --git a/comps/router/deployment/docker_compose/compose.yaml b/comps/router/deployment/docker_compose/compose.yaml index 209ec68534..430e5e2087 100644 --- a/comps/router/deployment/docker_compose/compose.yaml +++ b/comps/router/deployment/docker_compose/compose.yaml @@ -27,7 +27,7 @@ services: CONTROLLER_TYPE: ${CONTROLLER_TYPE:-routellm} ports: - - "6000:6000" + - "${ROUTER_PORT:-6000}:6000" restart: unless-stopped networks: diff --git a/comps/router/deployment/docker_compose/configs/routellm_config.yaml b/comps/router/deployment/docker_compose/configs/routellm_config.yaml index 5f9e8a0014..b387712b86 100644 --- a/comps/router/deployment/docker_compose/configs/routellm_config.yaml +++ b/comps/router/deployment/docker_compose/configs/routellm_config.yaml @@ -4,7 +4,6 @@ # which embedder backend to use ("huggingface" or "openai") embedding_provider: "huggingface" -# export ROUTELLM_EMBEDDING_MODEL_NAME="your-org/other-embed" embedding_model_name: "intfloat/e5-base-v2" routing_algorithm: "mf" diff --git a/comps/router/deployment/docker_compose/configs/router.yaml b/comps/router/deployment/docker_compose/configs/router.yaml index 7118ecc9a7..b9dd1eac56 100644 --- a/comps/router/deployment/docker_compose/configs/router.yaml +++ b/comps/router/deployment/docker_compose/configs/router.yaml @@ -10,5 +10,5 @@ model_map: model_id: "${STRONG_MODEL_ID}" controller_config_paths: - routellm: "/app/configs/routellm_config.yaml" + routellm: "/app/configs/routellm_config.yaml" semantic_router: "/app/configs/semantic_router_config.yaml" diff --git a/comps/router/deployment/docker_compose/configs/semantic_router_config.yaml b/comps/router/deployment/docker_compose/configs/semantic_router_config.yaml index 97edc416b1..fcfcec2689 100644 --- a/comps/router/deployment/docker_compose/configs/semantic_router_config.yaml +++ b/comps/router/deployment/docker_compose/configs/semantic_router_config.yaml @@ -5,7 +5,7 @@ embedding_provider: "huggingface" embedding_models: huggingface: "BAAI/bge-base-en-v1.5" - openai: "text-embedding-ada-002" + openai: "text-embedding-ada-002" routes: - name: "strong" diff --git a/comps/router/deployment/docker_compose/deploy_router.sh b/comps/router/deployment/docker_compose/deploy_router.sh index 7af015a752..fe29c94943 100755 --- a/comps/router/deployment/docker_compose/deploy_router.sh +++ b/comps/router/deployment/docker_compose/deploy_router.sh @@ -47,6 +47,6 @@ docker compose -f compose.yaml up --build # Wait a moment then check status sleep 2 -docker ps --filter "name=opea_router" +docker ps --filter "name=opea-router" echo "[SUCCESS] Router service deployed and running on http://localhost:6000" diff --git a/comps/router/src/Dockerfile b/comps/router/src/Dockerfile index e5c91358d6..ded4d44ea0 100644 --- a/comps/router/src/Dockerfile +++ b/comps/router/src/Dockerfile @@ -4,26 +4,17 @@ FROM python:3.10-slim # Install git -RUN apt-get update \ - && apt-get install -y git \ - && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/* # Add a non-root user -RUN useradd -m -s /bin/bash user \ - && chown -R user /home/user +RUN useradd -m -s /bin/bash user && chown -R user /home/user # Copy the *entire* comps/ package WORKDIR /home/user COPY comps /home/user/comps # Install deps from the router’s requirements.txt -RUN pip install --no-cache-dir --upgrade pip \ - && pip install --no-cache-dir -r /home/user/comps/router/src/requirements.txt \ - \ - && git clone --depth 1 https://github.com/lm-sys/RouteLLM.git /tmp/RouteLLM \ - && patch -p1 -d /tmp/RouteLLM < /home/user/comps/router/src/hf_compatibility.patch \ - && pip install --no-cache-dir /tmp/RouteLLM \ - && rm -rf /tmp/RouteLLM +RUN pip install --no-cache-dir --upgrade pip && pip install --no-cache-dir -r /home/user/comps/router/src/requirements.txt && git clone --depth 1 https://github.com/lm-sys/RouteLLM.git /tmp/RouteLLM && patch -p1 -d /tmp/RouteLLM < /home/user/comps/router/src/hf_compatibility.patch && pip install --no-cache-dir /tmp/RouteLLM && rm -rf /tmp/RouteLLM # Make imports work ENV PYTHONPATH=/home/user diff --git a/comps/router/src/README.md b/comps/router/src/README.md index ee775a9ad1..d0a43f3c8c 100644 --- a/comps/router/src/README.md +++ b/comps/router/src/README.md @@ -11,7 +11,7 @@ The router is stateless; it inspects the prompt, consults the configured control ## Build -``` +```bash # From repo root 📂 # Build the container image directly $ docker build -t opea/router:latest -f comps/router/src/Dockerfile . @@ -19,7 +19,7 @@ $ docker build -t opea/router:latest -f comps/router/src/Dockerfile . Alternatively, the Docker Compose workflow below will build the image for you. -``` +```bash # Navigate to the compose bundle $ cd comps/router/deployment/docker_compose diff --git a/comps/router/src/integrations/controllers/semantic_router_controller/semantic_router_controller.py b/comps/router/src/integrations/controllers/semantic_router_controller/semantic_router_controller.py index 271b16fa05..961f7a86ab 100644 --- a/comps/router/src/integrations/controllers/semantic_router_controller/semantic_router_controller.py +++ b/comps/router/src/integrations/controllers/semantic_router_controller/semantic_router_controller.py @@ -28,7 +28,7 @@ def __init__(self, config, api_key=None, model_map=None): self.config = config self.model_map = model_map or {} - # 1) grab provider + model mapping + # grab provider + model mapping provider = config.get("embedding_provider", "").lower() models = config.get("embedding_models", {}) @@ -53,7 +53,7 @@ def __init__(self, config, api_key=None, model_map=None): os.environ["OPENAI_API_KEY"] = api_key self.encoder = OpenAIEncoder(model=model_name) - # 4) build your routing layer + # build your routing layer self._build_route_layer() def _build_route_layer(self): diff --git a/comps/router/src/requirements.txt b/comps/router/src/requirements.txt index 378a31051e..00fb7fcc8f 100644 --- a/comps/router/src/requirements.txt +++ b/comps/router/src/requirements.txt @@ -2,33 +2,22 @@ aiofiles aiohttp docarray[full] docx2txt -# Basic web and async fastapi - httpx - -# K8s, images, misc kubernetes langchain langchain-community - -# Observability and telemetry opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk pillow prometheus-fastapi-instrumentator - -# Data handling and parsing pydantic pypdf python-dotenv python-multipart pyyaml requests - -# Larger frameworks and libs semantic-router shortuuid -uvicorn[standard] - +uvicorn[standard] \ No newline at end of file From 357ff339b26fd9ee34962c791cac6a8cc8e0a89b Mon Sep 17 00:00:00 2001 From: Haim Barad Date: Mon, 9 Jun 2025 13:59:49 +0300 Subject: [PATCH 20/20] fix pre-commit issues: remove trailing whitespace and add newline Signed-off-by: Haim Barad --- comps/router/src/README.md | 4 ++-- comps/router/src/requirements.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/comps/router/src/README.md b/comps/router/src/README.md index d0a43f3c8c..97cf56362c 100644 --- a/comps/router/src/README.md +++ b/comps/router/src/README.md @@ -41,14 +41,14 @@ _The service listens on http://localhost:6000 (host‑mapped from container port ## RouteLLM compatibility patch The upstream **RouteLLM** project is geared toward OpenAI embeddings and GPT-4–augmented -checkpoints. +checkpoints. We include a small patch – `hf_compatibility.patch` – that: - adds a `hf_token` plumb-through, - switches the Matrix-Factorisation router to Hugging Face sentence embeddings, - removes hard-coded GPT-4 “golden-label” defaults. -**Container users:** +**Container users:** The Dockerfile applies the patch automatically during `docker build`, so you don’t have to do anything. **Local development:** diff --git a/comps/router/src/requirements.txt b/comps/router/src/requirements.txt index 00fb7fcc8f..5b2bb91185 100644 --- a/comps/router/src/requirements.txt +++ b/comps/router/src/requirements.txt @@ -20,4 +20,4 @@ pyyaml requests semantic-router shortuuid -uvicorn[standard] \ No newline at end of file +uvicorn[standard]