Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion config/catalog_allow.exs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ zai_coder_models = :all
# Cerebras - All models
cerebras_models = :all

vllm_models = []

config :req_llm, :catalog,
allow: %{
anthropic: anthropic_models,
Expand All @@ -149,7 +151,8 @@ config :req_llm, :catalog,
google_vertex_anthropic: google_vertex_anthropic_models,
zai: zai_models,
zai_coder: zai_coder_models,
cerebras: cerebras_models
cerebras: cerebras_models,
vllm: vllm_models
},
overrides: [],
custom: []
1 change: 1 addition & 0 deletions config/test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ config :req_llm, :catalog,
openrouter: :all,
amazon_bedrock: :all,
google_vertex_anthropic: :all,
vllm: :all,
zai: :all,
zai_coder: :all,
cerebras: :all
Expand Down
2 changes: 1 addition & 1 deletion lib/req_llm/catalog.ex
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ defmodule ReqLLM.Catalog do

allowed_spec?(:anthropic, "claude-3-5-sonnet")
# => true (if anthropic: :all in catalog)

allowed_spec?(:openai, "gpt-4o-mini")
# => true (if matches pattern)
"""
Expand Down
3 changes: 3 additions & 0 deletions lib/req_llm/model.ex
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ defmodule ReqLLM.Model do
field(:modalities, %{input: [modality()], output: [modality()]} | nil)
field(:capabilities, capabilities() | nil)
field(:cost, cost() | nil)
field(:base_url, String.t(), enforce: false)
field(:_metadata, map() | nil)
end

Expand All @@ -72,6 +73,7 @@ defmodule ReqLLM.Model do
- `:capabilities` - Model capabilities like `:reasoning`, `:tool_call`, `:temperature`, `:attachment`
- `:cost` - Pricing information with `:input` and `:output` cost per 1K tokens
Optional `:cached_input` cost per 1K tokens (defaults to `:input` rate if not specified)
- `:base_url - model specific base_url value. Overrides the VLLM provider base_url value.
- `:_metadata` - Additional provider-specific metadata

## Examples
Expand All @@ -97,6 +99,7 @@ defmodule ReqLLM.Model do
modalities: Keyword.get(opts, :modalities),
capabilities: Keyword.get(opts, :capabilities),
cost: Keyword.get(opts, :cost),
base_url: Keyword.get(opts, :base_url),
_metadata: Keyword.get(opts, :_metadata)
}
end
Expand Down
1 change: 1 addition & 0 deletions lib/req_llm/provider/generated/valid_providers.ex
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ defmodule ReqLLM.Provider.Generated.ValidProviders do
:v0,
:venice,
:vercel,
:vllm,
:vultr,
:wandb,
:xai,
Expand Down
6 changes: 4 additions & 2 deletions lib/req_llm/provider/options.ex
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,8 @@ defmodule ReqLLM.Provider.Options do
"""
@spec effective_base_url(module(), ReqLLM.Model.t(), keyword()) :: String.t()
def effective_base_url(provider_mod, %ReqLLM.Model{} = model, opts) do
opts[:base_url] ||
model.base_url ||
opts[:base_url] ||
base_url_from_application_config(model.provider) ||
base_url_from_provider_metadata(model.provider) ||
provider_mod.default_base_url()
Expand Down Expand Up @@ -766,7 +767,8 @@ defmodule ReqLLM.Provider.Options do

defp inject_base_url_from_registry(opts, model, provider_mod) do
Keyword.put_new_lazy(opts, :base_url, fn ->
base_url_from_application_config(model.provider) ||
model.base_url ||
base_url_from_application_config(model.provider) ||
base_url_from_provider_metadata(model.provider) ||
provider_mod.default_base_url()
end)
Expand Down
7 changes: 5 additions & 2 deletions lib/req_llm/provider/registry.ex
Original file line number Diff line number Diff line change
Expand Up @@ -220,12 +220,15 @@ defmodule ReqLLM.Provider.Registry do
cost =
get_in(model_metadata, ["cost"]) |> ReqLLM.Metadata.map_string_keys_to_atoms()

base_url = get_in(model_metadata, ["base_url"])

enhanced_model =
ReqLLM.Model.new(provider_id, model_name,
limit: limit,
modalities: modalities,
capabilities: capabilities,
cost: cost
cost: cost,
base_url: base_url
)

# Add raw metadata for backward compatibility and additional fields
Expand Down Expand Up @@ -519,7 +522,7 @@ defmodule ReqLLM.Provider.Registry do
"models" => %{"claude-3-sonnet" => %{"id" => "claude-3-sonnet", ...}}
}
}

ReqLLM.Provider.Registry.initialize(catalog)
#=> :ok

Expand Down
22 changes: 22 additions & 0 deletions lib/req_llm/providers/vllm.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
defmodule ReqLLM.Providers.VLLM do
@moduledoc """
VLLM – fully OpenAI-compatible Chat Completions API.

The OPENAI_API_KEY is required but the contents can be ignored when starting the vLLM service.

## Configuration

# Add to .env file (automatically loaded)
OPENAI_API_KEY=some_value...
"""

@behaviour ReqLLM.Provider

use ReqLLM.Provider.DSL,
id: :vllm,
# Required to have a value, but generally not used.
base_url: "http://localhost:8005/v1",
metadata: "priv/models_dev/vllm.json",
default_env_key: "OPENAI_API_KEY",
provider_schema: []
end
1 change: 1 addition & 0 deletions priv/models_dev/.catalog_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
"priv/models_dev/v0.json",
"priv/models_dev/venice.json",
"priv/models_dev/vercel.json",
"priv/models_dev/vllm.json",
"priv/models_dev/vultr.json",
"priv/models_dev/wandb.json",
"priv/models_dev/xai.json",
Expand Down
186 changes: 186 additions & 0 deletions priv/models_dev/vllm.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
{
"models": [
{
"attachment": false,
"cost": {
"input": 2.0e-5,
"output": 0.0
},
"dimensions": {
"default": 1536,
"max": 1536,
"min": 1
},
"id": "test-only-text-embedding",
"knowledge": "2024-01",
"last_updated": "2024-01-25",
"limit": {
"context": 8191,
"output": 0
},
"modalities": {
"input": [
"text"
],
"output": [
"embedding"
]
},
"name": "Test Only to test text embedding metadata",
"open_weights": false,
"provider": "vllm",
"provider_model_id": "test-only-text-embedding",
"reasoning": false,
"release_date": "2024-01-25",
"temperature": false,
"tool_call": false,
"type": "embedding",
"base_url": "http://localhost:8004/v1"
},
{
"api": "chat",
"attachment": true,
"cost": {
"cache_read": 1.25,
"input": 2.5,
"output": 10
},
"id": "test-only-chat",
"knowledge": "2023-09",
"last_updated": "2024-08-06",
"limit": {
"context": 128000,
"output": 16384
},
"modalities": {
"input": [
"text",
"image"
],
"output": [
"text"
]
},
"name": "Test Only Model to check chat metadata",
"open_weights": false,
"provider": "vllm",
"provider_model_id": "test-only-chat",
"reasoning": false,
"release_date": "2024-05-13",
"supports_strict_tools": true,
"temperature": true,
"tool_call": true,
"base_url": "http://localhost:8006/v1"
},
{
"api": "responses",
"attachment": true,
"cost": {
"cache_read": 0.01,
"input": 0.05,
"output": 0.4
},
"id": "test-only-responses",
"knowledge": "2024-05-30",
"last_updated": "2025-08-07",
"limit": {
"context": 400000,
"output": 128000
},
"modalities": {
"input": [
"text",
"image"
],
"output": [
"text"
]
},
"name": "Test Only Model to check responses metadata",
"open_weights": false,
"provider": "vllm",
"provider_model_id": "test-only-responses",
"reasoning": true,
"release_date": "2025-08-07",
"supports_json_schema_response_format": true,
"temperature": false,
"tool_call": true,
"base_url": "http://localhost:8001/v1"
},
{
"api": "chat",
"attachment": false,
"cost": {
"cache_read": 1.25,
"input": 0.5,
"output": 1.5
},
"id": "test-only-max-completions",
"knowledge": "2021-09-01",
"last_updated": "2023-11-06",
"limit": {
"context": 16385,
"output": 4096
},
"modalities": {
"input": [
"text"
],
"output": [
"text"
]
},
"name": "Test Only Model to check max completions metadata",
"open_weights": false,
"provider": "vllm",
"provider_model_id": "test-only-max-completions",
"reasoning": false,
"release_date": "2023-03-01",
"temperature": true,
"tool_call": false,
"base_url": "http://localhost:8002/v1"
},
{
"api": "chat",
"attachment": true,
"cost": {
"cache_read": 1.25,
"input": 2.5,
"output": 10
},
"id": "test-only-chat-no-model-base-url",
"knowledge": "2023-09",
"last_updated": "2024-08-06",
"limit": {
"context": 128000,
"output": 16384
},
"modalities": {
"input": [
"text"
],
"output": [
"text"
]
},
"name": "Test Only Model to check that provider base_url is used when not model base url",
"open_weights": false,
"provider": "vllm",
"provider_model_id": "test-only-chat-no-model-base-url",
"reasoning": false,
"release_date": "2024-05-13",
"supports_strict_tools": true,
"temperature": true,
"tool_call": true
}
],
"provider": {
"base_url": "http://localhost:8005/v1",
"doc": "Local AI model provider",
"env": [
"OPENAI_API_KEY"
],
"id": "vllm",
"name": "vLLM"
}
}
13 changes: 13 additions & 0 deletions priv/models_local/vllm_exclude.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"provider": {
"id": "vllm"
},
"exclude": [
"test-only-text-embedding",
"test-only-chat",
"test-only-text-embedding",
"test-only-responses",
"test-only-max-completions",
"test-only-chat-no-model-base-url"
]
}
Loading
Loading