opea-project · yinghu5 · May 16, 2025 · May 1, 2025 · May 2, 2025 · May 2, 2025
@@ -82,7 +82,7 @@ for line in resp.iter_lines(decode_unicode=True):
 
 **Note**:
 
-1. Currently only `reract_llama` agent is enabled for assistants APIs.
+1. Currently only `react_llama` agent is enabled for assistants APIs.
 2. Not all keywords of OpenAI APIs are supported yet.
 
 ### 1.5 Agent memory
@@ -110,6 +110,32 @@ Examples of python code for multi-turn conversations using agent memory:
 
 To run the two examples above, first launch the agent microservice using [this docker compose yaml](../../../tests/agent/reactllama.yaml).
 
+### 1.6 Run LLMs from OpenAI
+
+To run any model from OpenAI, just specify the environment variable `OPENAI_API_KEY`:
+
+```bash
+export OPENAI_API_KEY=<openai-api-key>
+```
+
+These also need to be passed in to the `docker run` command, or included in a YAML file when running `docker compose`.
+
+### 1.7 Run LLMs with OpenAI-compatible APIs on Remote Servers
+
+To run the text generation portion using LLMs deployed on a remote server, specify the following environment variables:
+
+```bash
+export api_key=<openai-api-key>
+export model=<model-card>
+export LLM_ENDPOINT_URL=<inference-endpoint>
+```
+
+These also need to be passed in to the `docker run` command, or included in a YAML file when running `docker compose`.
+
+#### Notes
+
+- For `LLM_ENDPOINT_URL`, there is no need to include `v1`.
+
 ## 🚀2. Start Agent Microservice
 
 ### 2.1 Build docker image for agent microservice

@@ -17,6 +17,9 @@
 if os.environ.get("llm_endpoint_url") is not None:
     env_config += ["--llm_endpoint_url", os.environ["llm_endpoint_url"]]
 
+if os.environ.get("api_key") is not None:
+    env_config += ["--api_key", os.environ["api_key"]]
+
 if os.environ.get("llm_engine") is not None:
     env_config += ["--llm_engine", os.environ["llm_engine"]]
 

@@ -7,6 +7,8 @@
 
 from .config import env_config
 
+LLM_ENDPOINT_URL_DEFAULT = "http://localhost:8080"
+
 
 def format_date(date):
     # input m/dd/yyyy hr:min
@@ -57,18 +59,20 @@ def setup_chat_model(args):
         "streaming": args.stream,
     }
     if args.llm_engine == "vllm" or args.llm_engine == "tgi":
-        openai_endpoint = f"{args.llm_endpoint_url}/v1"
-        llm = ChatOpenAI(
-            openai_api_key="EMPTY",
-            openai_api_base=openai_endpoint,
-            model_name=args.model,
-            request_timeout=args.timeout,
-            **params,
-        )
+        openai_key = "EMPTY"
     elif args.llm_engine == "openai":
-        llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params)
+        openai_key = args.api_key
     else:
-        raise ValueError("llm_engine must be vllm, tgi or openai")
+        raise ValueError("llm_engine must be vllm, tgi, or openai")
+
+    openai_endpoint = None if args.llm_endpoint_url is LLM_ENDPOINT_URL_DEFAULT else args.llm_endpoint_url + "/v1"
+    llm = ChatOpenAI(
+        openai_api_key=openai_key,
+        openai_api_base=openai_endpoint,
+        model_name=args.model,
+        request_timeout=args.timeout,
+        **params,
+    )
     return llm
 
 
@@ -162,6 +166,7 @@ def get_args():
     parser.add_argument("--model", type=str, default="meta-llama/Meta-Llama-3-8B-Instruct")
     parser.add_argument("--llm_engine", type=str, default="tgi")
     parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080")
+    parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server")
     parser.add_argument("--max_new_tokens", type=int, default=1024)
     parser.add_argument("--top_k", type=int, default=10)
     parser.add_argument("--top_p", type=float, default=0.95)