agentjido · meanderingstream · Nov 8, 2025 · Nov 9, 2025 · Nov 9, 2025
diff --git a/config/catalog_allow.exs b/config/catalog_allow.exs
@@ -137,6 +137,8 @@ zai_coder_models = :all
 # Cerebras - All models
 cerebras_models = :all
 
+vllm_models = []
+
 config :req_llm, :catalog,
   allow: %{
     anthropic: anthropic_models,
@@ -149,7 +151,8 @@ config :req_llm, :catalog,
     google_vertex_anthropic: google_vertex_anthropic_models,
     zai: zai_models,
     zai_coder: zai_coder_models,
-    cerebras: cerebras_models
+    cerebras: cerebras_models,
+    vllm: vllm_models
   },
   overrides: [],
   custom: []
diff --git a/config/test.exs b/config/test.exs
@@ -15,6 +15,7 @@ config :req_llm, :catalog,
     openrouter: :all,
     amazon_bedrock: :all,
     google_vertex_anthropic: :all,
+    vllm: :all,
     zai: :all,
     zai_coder: :all,
     cerebras: :all

diff --git a/lib/req_llm/catalog.ex b/lib/req_llm/catalog.ex
@@ -346,7 +346,7 @@ defmodule ReqLLM.Catalog do
 
       allowed_spec?(:anthropic, "claude-3-5-sonnet")
       # => true (if anthropic: :all in catalog)
-      
+
       allowed_spec?(:openai, "gpt-4o-mini")
       # => true (if matches pattern)
   """

diff --git a/lib/req_llm/model.ex b/lib/req_llm/model.ex
@@ -51,6 +51,7 @@ defmodule ReqLLM.Model do
     field(:modalities, %{input: [modality()], output: [modality()]} | nil)
     field(:capabilities, capabilities() | nil)
     field(:cost, cost() | nil)
+    field(:base_url, String.t(), enforce: false)
     field(:_metadata, map() | nil)
   end
 
@@ -72,6 +73,7 @@ defmodule ReqLLM.Model do
   - `:capabilities` - Model capabilities like `:reasoning`, `:tool_call`, `:temperature`, `:attachment`
   - `:cost` - Pricing information with `:input` and `:output` cost per 1K tokens
      Optional `:cached_input` cost per 1K tokens (defaults to `:input` rate if not specified)
+  - `:base_url - model specific base_url value.  Overrides the VLLM provider base_url value.
   - `:_metadata` - Additional provider-specific metadata
 
   ## Examples
@@ -97,6 +99,7 @@ defmodule ReqLLM.Model do
       modalities: Keyword.get(opts, :modalities),
       capabilities: Keyword.get(opts, :capabilities),
       cost: Keyword.get(opts, :cost),
+      base_url: Keyword.get(opts, :base_url),
       _metadata: Keyword.get(opts, :_metadata)
     }
   end

diff --git a/lib/req_llm/provider/generated/valid_providers.ex b/lib/req_llm/provider/generated/valid_providers.ex
@@ -55,6 +55,7 @@ defmodule ReqLLM.Provider.Generated.ValidProviders do
     :v0,
     :venice,
     :vercel,
+    :vllm,
     :vultr,
     :wandb,
     :xai,

diff --git a/lib/req_llm/provider/options.ex b/lib/req_llm/provider/options.ex
@@ -422,7 +422,8 @@ defmodule ReqLLM.Provider.Options do
   """
   @spec effective_base_url(module(), ReqLLM.Model.t(), keyword()) :: String.t()
   def effective_base_url(provider_mod, %ReqLLM.Model{} = model, opts) do
-    opts[:base_url] ||
+    model.base_url ||
+      opts[:base_url] ||
       base_url_from_application_config(model.provider) ||
       base_url_from_provider_metadata(model.provider) ||
       provider_mod.default_base_url()
@@ -766,7 +767,8 @@ defmodule ReqLLM.Provider.Options do
 
   defp inject_base_url_from_registry(opts, model, provider_mod) do
     Keyword.put_new_lazy(opts, :base_url, fn ->
-      base_url_from_application_config(model.provider) ||
+      model.base_url ||
+        base_url_from_application_config(model.provider) ||
         base_url_from_provider_metadata(model.provider) ||
         provider_mod.default_base_url()
     end)

diff --git a/lib/req_llm/provider/registry.ex b/lib/req_llm/provider/registry.ex
@@ -220,12 +220,15 @@ defmodule ReqLLM.Provider.Registry do
             cost =
               get_in(model_metadata, ["cost"]) |> ReqLLM.Metadata.map_string_keys_to_atoms()
 
+            base_url = get_in(model_metadata, ["base_url"])
+
             enhanced_model =
               ReqLLM.Model.new(provider_id, model_name,
                 limit: limit,
                 modalities: modalities,
                 capabilities: capabilities,
-                cost: cost
+                cost: cost,
+                base_url: base_url
               )
 
             # Add raw metadata for backward compatibility and additional fields
@@ -519,7 +522,7 @@ defmodule ReqLLM.Provider.Registry do
           "models" => %{"claude-3-sonnet" => %{"id" => "claude-3-sonnet", ...}}
         }
       }
-      
+
       ReqLLM.Provider.Registry.initialize(catalog)
       #=> :ok
 

diff --git a/lib/req_llm/providers/vllm.ex b/lib/req_llm/providers/vllm.ex
@@ -0,0 +1,22 @@
+defmodule ReqLLM.Providers.VLLM do
+  @moduledoc """
+  VLLM – fully OpenAI-compatible Chat Completions API.
+
+  The OPENAI_API_KEY is required but the contents can be ignored when starting the vLLM service.
+
+  ## Configuration
+
+      # Add to .env file (automatically loaded)
+      OPENAI_API_KEY=some_value...
+  """
+
+  @behaviour ReqLLM.Provider
+
+  use ReqLLM.Provider.DSL,
+    id: :vllm,
+    # Required to have a value, but generally not used.
+    base_url: "http://localhost:8005/v1",
+    metadata: "priv/models_dev/vllm.json",
+    default_env_key: "OPENAI_API_KEY",
+    provider_schema: []
+end
diff --git a/priv/models_dev/.catalog_manifest.json b/priv/models_dev/.catalog_manifest.json
@@ -56,6 +56,7 @@
     "priv/models_dev/v0.json",
     "priv/models_dev/venice.json",
     "priv/models_dev/vercel.json",
+    "priv/models_dev/vllm.json",
     "priv/models_dev/vultr.json",
     "priv/models_dev/wandb.json",
     "priv/models_dev/xai.json",

diff --git a/priv/models_dev/vllm.json b/priv/models_dev/vllm.json
@@ -0,0 +1,186 @@
+{
+  "models": [
+    {
+      "attachment": false,
+      "cost": {
+        "input": 2.0e-5,
+        "output": 0.0
+      },
+      "dimensions": {
+        "default": 1536,
+        "max": 1536,
+        "min": 1
+      },
+      "id": "test-only-text-embedding",
+      "knowledge": "2024-01",
+      "last_updated": "2024-01-25",
+      "limit": {
+        "context": 8191,
+        "output": 0
+      },
+      "modalities": {
+        "input": [
+          "text"
+        ],
+        "output": [
+          "embedding"
+        ]
+      },
+      "name": "Test Only to test text embedding metadata",
+      "open_weights": false,
+      "provider": "vllm",
+      "provider_model_id": "test-only-text-embedding",
+      "reasoning": false,
+      "release_date": "2024-01-25",
+      "temperature": false,
+      "tool_call": false,
+      "type": "embedding",
+      "base_url": "http://localhost:8004/v1"
+    },
+    {
+      "api": "chat",
+      "attachment": true,
+      "cost": {
+        "cache_read": 1.25,
+        "input": 2.5,
+        "output": 10
+      },
+      "id": "test-only-chat",
+      "knowledge": "2023-09",
+      "last_updated": "2024-08-06",
+      "limit": {
+        "context": 128000,
+        "output": 16384
+      },
+      "modalities": {
+        "input": [
+          "text",
+          "image"
+        ],
+        "output": [
+          "text"
+        ]
+      },
+      "name": "Test Only Model to check chat metadata",
+      "open_weights": false,
+      "provider": "vllm",
+      "provider_model_id": "test-only-chat",
+      "reasoning": false,
+      "release_date": "2024-05-13",
+      "supports_strict_tools": true,
+      "temperature": true,
+      "tool_call": true,
+      "base_url": "http://localhost:8006/v1"
+    },
+    {
+      "api": "responses",
+      "attachment": true,
+      "cost": {
+        "cache_read": 0.01,
+        "input": 0.05,
+        "output": 0.4
+      },
+      "id": "test-only-responses",
+      "knowledge": "2024-05-30",
+      "last_updated": "2025-08-07",
+      "limit": {
+        "context": 400000,
+        "output": 128000
+      },
+      "modalities": {
+        "input": [
+          "text",
+          "image"
+        ],
+        "output": [
+          "text"
+        ]
+      },
+      "name": "Test Only Model to check responses metadata",
+      "open_weights": false,
+      "provider": "vllm",
+      "provider_model_id": "test-only-responses",
+      "reasoning": true,
+      "release_date": "2025-08-07",
+      "supports_json_schema_response_format": true,
+      "temperature": false,
+      "tool_call": true,
+      "base_url": "http://localhost:8001/v1"
+    },
+    {
+      "api": "chat",
+      "attachment": false,
+      "cost": {
+        "cache_read": 1.25,
+        "input": 0.5,
+        "output": 1.5
+      },
+      "id": "test-only-max-completions",
+      "knowledge": "2021-09-01",
+      "last_updated": "2023-11-06",
+      "limit": {
+        "context": 16385,
+        "output": 4096
+      },
+      "modalities": {
+        "input": [
+          "text"
+        ],
+        "output": [
+          "text"
+        ]
+      },
+      "name": "Test Only Model to check max completions metadata",
+      "open_weights": false,
+      "provider": "vllm",
+      "provider_model_id": "test-only-max-completions",
+      "reasoning": false,
+      "release_date": "2023-03-01",
+      "temperature": true,
+      "tool_call": false,
+      "base_url": "http://localhost:8002/v1"
+    },
+    {
+      "api": "chat",
+      "attachment": true,
+      "cost": {
+        "cache_read": 1.25,
+        "input": 2.5,
+        "output": 10
+      },
+      "id": "test-only-chat-no-model-base-url",
+      "knowledge": "2023-09",
+      "last_updated": "2024-08-06",
+      "limit": {
+        "context": 128000,
+        "output": 16384
+      },
+      "modalities": {
+        "input": [
+          "text"
+        ],
+        "output": [
+          "text"
+        ]
+      },
+      "name": "Test Only Model to check that provider base_url is used when not model base url",
+      "open_weights": false,
+      "provider": "vllm",
+      "provider_model_id": "test-only-chat-no-model-base-url",
+      "reasoning": false,
+      "release_date": "2024-05-13",
+      "supports_strict_tools": true,
+      "temperature": true,
+      "tool_call": true
+    }
+  ],
+  "provider": {
+    "base_url": "http://localhost:8005/v1",
+    "doc": "Local AI model provider",
+    "env": [
+      "OPENAI_API_KEY"
+    ],
+    "id": "vllm",
+    "name": "vLLM"
+  }
+}
diff --git a/priv/models_local/vllm_exclude.json b/priv/models_local/vllm_exclude.json
@@ -0,0 +1,13 @@
+{
+  "provider": {
+    "id": "vllm"
+  },
+  "exclude": [
+    "test-only-text-embedding",
+    "test-only-chat",
+    "test-only-text-embedding",
+    "test-only-responses",
+    "test-only-max-completions",
+    "test-only-chat-no-model-base-url"
+  ]
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -55,6 +55,7 @@ defmodule ReqLLM.Provider.Generated.ValidProviders do @@
         :v0,
         :venice,
         :vercel,
+        :vllm,
         :vultr,
         :wandb,
         :xai,
@@ Expand Down @@