diff --git a/config/catalog_allow.exs b/config/catalog_allow.exs index a26698e8..66dc4a4e 100644 --- a/config/catalog_allow.exs +++ b/config/catalog_allow.exs @@ -137,6 +137,8 @@ zai_coder_models = :all # Cerebras - All models cerebras_models = :all +vllm_models = [] + config :req_llm, :catalog, allow: %{ anthropic: anthropic_models, @@ -149,7 +151,8 @@ config :req_llm, :catalog, google_vertex_anthropic: google_vertex_anthropic_models, zai: zai_models, zai_coder: zai_coder_models, - cerebras: cerebras_models + cerebras: cerebras_models, + vllm: vllm_models }, overrides: [], custom: [] diff --git a/config/test.exs b/config/test.exs index 4f3b1077..ca965ba9 100644 --- a/config/test.exs +++ b/config/test.exs @@ -15,6 +15,7 @@ config :req_llm, :catalog, openrouter: :all, amazon_bedrock: :all, google_vertex_anthropic: :all, + vllm: :all, zai: :all, zai_coder: :all, cerebras: :all diff --git a/lib/req_llm/catalog.ex b/lib/req_llm/catalog.ex index 0ecdefce..a576c01d 100644 --- a/lib/req_llm/catalog.ex +++ b/lib/req_llm/catalog.ex @@ -346,7 +346,7 @@ defmodule ReqLLM.Catalog do allowed_spec?(:anthropic, "claude-3-5-sonnet") # => true (if anthropic: :all in catalog) - + allowed_spec?(:openai, "gpt-4o-mini") # => true (if matches pattern) """ diff --git a/lib/req_llm/model.ex b/lib/req_llm/model.ex index 7e90d067..1d618bc9 100644 --- a/lib/req_llm/model.ex +++ b/lib/req_llm/model.ex @@ -51,6 +51,7 @@ defmodule ReqLLM.Model do field(:modalities, %{input: [modality()], output: [modality()]} | nil) field(:capabilities, capabilities() | nil) field(:cost, cost() | nil) + field(:base_url, String.t(), enforce: false) field(:_metadata, map() | nil) end @@ -72,6 +73,7 @@ defmodule ReqLLM.Model do - `:capabilities` - Model capabilities like `:reasoning`, `:tool_call`, `:temperature`, `:attachment` - `:cost` - Pricing information with `:input` and `:output` cost per 1K tokens Optional `:cached_input` cost per 1K tokens (defaults to `:input` rate if not specified) + - `:base_url - model specific base_url value. Overrides the VLLM provider base_url value. - `:_metadata` - Additional provider-specific metadata ## Examples @@ -97,6 +99,7 @@ defmodule ReqLLM.Model do modalities: Keyword.get(opts, :modalities), capabilities: Keyword.get(opts, :capabilities), cost: Keyword.get(opts, :cost), + base_url: Keyword.get(opts, :base_url), _metadata: Keyword.get(opts, :_metadata) } end diff --git a/lib/req_llm/provider/generated/valid_providers.ex b/lib/req_llm/provider/generated/valid_providers.ex index a9755c68..e2302452 100644 --- a/lib/req_llm/provider/generated/valid_providers.ex +++ b/lib/req_llm/provider/generated/valid_providers.ex @@ -55,6 +55,7 @@ defmodule ReqLLM.Provider.Generated.ValidProviders do :v0, :venice, :vercel, + :vllm, :vultr, :wandb, :xai, diff --git a/lib/req_llm/provider/options.ex b/lib/req_llm/provider/options.ex index 8ea9faf3..8086f13b 100644 --- a/lib/req_llm/provider/options.ex +++ b/lib/req_llm/provider/options.ex @@ -422,7 +422,8 @@ defmodule ReqLLM.Provider.Options do """ @spec effective_base_url(module(), ReqLLM.Model.t(), keyword()) :: String.t() def effective_base_url(provider_mod, %ReqLLM.Model{} = model, opts) do - opts[:base_url] || + model.base_url || + opts[:base_url] || base_url_from_application_config(model.provider) || base_url_from_provider_metadata(model.provider) || provider_mod.default_base_url() @@ -766,7 +767,8 @@ defmodule ReqLLM.Provider.Options do defp inject_base_url_from_registry(opts, model, provider_mod) do Keyword.put_new_lazy(opts, :base_url, fn -> - base_url_from_application_config(model.provider) || + model.base_url || + base_url_from_application_config(model.provider) || base_url_from_provider_metadata(model.provider) || provider_mod.default_base_url() end) diff --git a/lib/req_llm/provider/registry.ex b/lib/req_llm/provider/registry.ex index f6103f96..088af6f0 100644 --- a/lib/req_llm/provider/registry.ex +++ b/lib/req_llm/provider/registry.ex @@ -220,12 +220,15 @@ defmodule ReqLLM.Provider.Registry do cost = get_in(model_metadata, ["cost"]) |> ReqLLM.Metadata.map_string_keys_to_atoms() + base_url = get_in(model_metadata, ["base_url"]) + enhanced_model = ReqLLM.Model.new(provider_id, model_name, limit: limit, modalities: modalities, capabilities: capabilities, - cost: cost + cost: cost, + base_url: base_url ) # Add raw metadata for backward compatibility and additional fields @@ -519,7 +522,7 @@ defmodule ReqLLM.Provider.Registry do "models" => %{"claude-3-sonnet" => %{"id" => "claude-3-sonnet", ...}} } } - + ReqLLM.Provider.Registry.initialize(catalog) #=> :ok diff --git a/lib/req_llm/providers/vllm.ex b/lib/req_llm/providers/vllm.ex new file mode 100644 index 00000000..30171a9b --- /dev/null +++ b/lib/req_llm/providers/vllm.ex @@ -0,0 +1,22 @@ +defmodule ReqLLM.Providers.VLLM do + @moduledoc """ + VLLM – fully OpenAI-compatible Chat Completions API. + + The OPENAI_API_KEY is required but the contents can be ignored when starting the vLLM service. + + ## Configuration + + # Add to .env file (automatically loaded) + OPENAI_API_KEY=some_value... + """ + + @behaviour ReqLLM.Provider + + use ReqLLM.Provider.DSL, + id: :vllm, + # Required to have a value, but generally not used. + base_url: "http://localhost:8005/v1", + metadata: "priv/models_dev/vllm.json", + default_env_key: "OPENAI_API_KEY", + provider_schema: [] +end diff --git a/priv/models_dev/.catalog_manifest.json b/priv/models_dev/.catalog_manifest.json index 6760469c..d751cf88 100644 --- a/priv/models_dev/.catalog_manifest.json +++ b/priv/models_dev/.catalog_manifest.json @@ -56,6 +56,7 @@ "priv/models_dev/v0.json", "priv/models_dev/venice.json", "priv/models_dev/vercel.json", + "priv/models_dev/vllm.json", "priv/models_dev/vultr.json", "priv/models_dev/wandb.json", "priv/models_dev/xai.json", diff --git a/priv/models_dev/vllm.json b/priv/models_dev/vllm.json new file mode 100644 index 00000000..ca788509 --- /dev/null +++ b/priv/models_dev/vllm.json @@ -0,0 +1,186 @@ +{ + "models": [ + { + "attachment": false, + "cost": { + "input": 2.0e-5, + "output": 0.0 + }, + "dimensions": { + "default": 1536, + "max": 1536, + "min": 1 + }, + "id": "test-only-text-embedding", + "knowledge": "2024-01", + "last_updated": "2024-01-25", + "limit": { + "context": 8191, + "output": 0 + }, + "modalities": { + "input": [ + "text" + ], + "output": [ + "embedding" + ] + }, + "name": "Test Only to test text embedding metadata", + "open_weights": false, + "provider": "vllm", + "provider_model_id": "test-only-text-embedding", + "reasoning": false, + "release_date": "2024-01-25", + "temperature": false, + "tool_call": false, + "type": "embedding", + "base_url": "http://localhost:8004/v1" + }, + { + "api": "chat", + "attachment": true, + "cost": { + "cache_read": 1.25, + "input": 2.5, + "output": 10 + }, + "id": "test-only-chat", + "knowledge": "2023-09", + "last_updated": "2024-08-06", + "limit": { + "context": 128000, + "output": 16384 + }, + "modalities": { + "input": [ + "text", + "image" + ], + "output": [ + "text" + ] + }, + "name": "Test Only Model to check chat metadata", + "open_weights": false, + "provider": "vllm", + "provider_model_id": "test-only-chat", + "reasoning": false, + "release_date": "2024-05-13", + "supports_strict_tools": true, + "temperature": true, + "tool_call": true, + "base_url": "http://localhost:8006/v1" + }, + { + "api": "responses", + "attachment": true, + "cost": { + "cache_read": 0.01, + "input": 0.05, + "output": 0.4 + }, + "id": "test-only-responses", + "knowledge": "2024-05-30", + "last_updated": "2025-08-07", + "limit": { + "context": 400000, + "output": 128000 + }, + "modalities": { + "input": [ + "text", + "image" + ], + "output": [ + "text" + ] + }, + "name": "Test Only Model to check responses metadata", + "open_weights": false, + "provider": "vllm", + "provider_model_id": "test-only-responses", + "reasoning": true, + "release_date": "2025-08-07", + "supports_json_schema_response_format": true, + "temperature": false, + "tool_call": true, + "base_url": "http://localhost:8001/v1" + }, + { + "api": "chat", + "attachment": false, + "cost": { + "cache_read": 1.25, + "input": 0.5, + "output": 1.5 + }, + "id": "test-only-max-completions", + "knowledge": "2021-09-01", + "last_updated": "2023-11-06", + "limit": { + "context": 16385, + "output": 4096 + }, + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "name": "Test Only Model to check max completions metadata", + "open_weights": false, + "provider": "vllm", + "provider_model_id": "test-only-max-completions", + "reasoning": false, + "release_date": "2023-03-01", + "temperature": true, + "tool_call": false, + "base_url": "http://localhost:8002/v1" + }, + { + "api": "chat", + "attachment": true, + "cost": { + "cache_read": 1.25, + "input": 2.5, + "output": 10 + }, + "id": "test-only-chat-no-model-base-url", + "knowledge": "2023-09", + "last_updated": "2024-08-06", + "limit": { + "context": 128000, + "output": 16384 + }, + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "name": "Test Only Model to check that provider base_url is used when not model base url", + "open_weights": false, + "provider": "vllm", + "provider_model_id": "test-only-chat-no-model-base-url", + "reasoning": false, + "release_date": "2024-05-13", + "supports_strict_tools": true, + "temperature": true, + "tool_call": true + } + ], + "provider": { + "base_url": "http://localhost:8005/v1", + "doc": "Local AI model provider", + "env": [ + "OPENAI_API_KEY" + ], + "id": "vllm", + "name": "vLLM" + } +} \ No newline at end of file diff --git a/priv/models_local/vllm_exclude.json b/priv/models_local/vllm_exclude.json new file mode 100644 index 00000000..39d683b9 --- /dev/null +++ b/priv/models_local/vllm_exclude.json @@ -0,0 +1,13 @@ +{ + "provider": { + "id": "vllm" + }, + "exclude": [ + "test-only-text-embedding", + "test-only-chat", + "test-only-text-embedding", + "test-only-responses", + "test-only-max-completions", + "test-only-chat-no-model-base-url" + ] +} diff --git a/test/providers/vllm_test.exs b/test/providers/vllm_test.exs new file mode 100644 index 00000000..08123709 --- /dev/null +++ b/test/providers/vllm_test.exs @@ -0,0 +1,778 @@ +defmodule ReqLLM.Providers.VLLMTest do + @moduledoc """ + Provider-level tests for VLLM implementation. + + Tests the provider contract directly without going through Generation layer. + Focus: prepare_request -> attach -> request -> decode pipeline. + """ + + use ReqLLM.ProviderCase, provider: ReqLLM.Providers.VLLM + + alias ReqLLM.Context + alias ReqLLM.Providers.VLLM + + describe "provider contract" do + test "provider identity and configuration" do + assert is_atom(VLLM.provider_id()) + assert is_binary(VLLM.default_base_url()) + assert String.starts_with?(VLLM.default_base_url(), "http") + end + + test "provider schema separation from core options" do + schema_keys = VLLM.provider_schema().schema |> Keyword.keys() + core_keys = ReqLLM.Provider.Options.generation_schema().schema |> Keyword.keys() + + # Provider-specific keys should not overlap with core generation keys + overlap = MapSet.intersection(MapSet.new(schema_keys), MapSet.new(core_keys)) + + assert MapSet.size(overlap) == 0, + "Schema overlap detected: #{inspect(MapSet.to_list(overlap))}" + end + + test "supported options include core generation keys" do + supported = VLLM.supported_provider_options() + core_keys = ReqLLM.Provider.Options.all_generation_keys() + + # All core keys should be supported (except meta-keys like :provider_options) + core_without_meta = Enum.reject(core_keys, &(&1 == :provider_options)) + missing = core_without_meta -- supported + assert missing == [], "Missing core generation keys: #{inspect(missing)}" + end + + test "provider_extended_generation_schema includes both base and provider options" do + extended_schema = VLLM.provider_extended_generation_schema() + extended_keys = extended_schema.schema |> Keyword.keys() + + # Should include all core generation keys + core_keys = ReqLLM.Provider.Options.all_generation_keys() + core_without_meta = Enum.reject(core_keys, &(&1 == :provider_options)) + + for core_key <- core_without_meta do + assert core_key in extended_keys, + "Extended schema missing core key: #{core_key}" + end + + # Should include provider-specific keys + provider_keys = VLLM.provider_schema().schema |> Keyword.keys() + + for provider_key <- provider_keys do + assert provider_key in extended_keys, + "Extended schema missing provider key: #{provider_key}" + end + end + end + + describe "request preparation & pipeline wiring" do + test "prepare_request creates configured chat request" do + model = ReqLLM.Model.from!("vllm:test-only-chat") + context = context_fixture() + opts = [temperature: 0.7, max_tokens: 100] + + {:ok, request} = VLLM.prepare_request(:chat, model, context, opts) + + assert %Req.Request{} = request + assert request.url.path == "/chat/completions" + assert request.method == :post + end + + test "prepare_request creates configured embedding request" do + model = ReqLLM.Model.from!("vllm:test-only-text-embedding") + text = "Hello, world!" + opts = [provider_options: []] + + {:ok, request} = VLLM.prepare_request(:embedding, model, text, opts) + + assert %Req.Request{} = request + assert request.url.path == "/embeddings" + assert request.method == :post + end + + test "prepare_request configures authentication and pipeline for chat" do + model = ReqLLM.Model.from!("vllm:test-only-chat") + prompt = "Hello, world!" + opts = [temperature: 0.5, max_tokens: 50] + + {:ok, request} = VLLM.prepare_request(:chat, model, prompt, opts) + + # Verify core options + assert request.options[:model] == model.model + assert request.options[:temperature] == 0.5 + assert request.options[:max_tokens] == 50 + assert String.starts_with?(List.first(request.headers["authorization"]), "Bearer test-key-") + + # Verify pipeline steps + request_steps = Keyword.keys(request.request_steps) + response_steps = Keyword.keys(request.response_steps) + + assert :llm_encode_body in request_steps + assert :llm_decode_response in response_steps + end + + test "prepare_request configures authentication and pipeline for embedding" do + model = ReqLLM.Model.from!("vllm:test-only-text-embedding") + text = "Hello, world!" + opts = [] + + {:ok, request} = VLLM.prepare_request(:embedding, model, text, opts) + + # Verify embedding-specific options + assert request.options[:model] == model.model + assert request.options[:operation] == :embedding + assert request.options[:text] == "Hello, world!" + + # Verify authentication + assert String.starts_with?(List.first(request.headers["authorization"]), "Bearer test-key-") + end + + test "error handling for invalid configurations" do + model = ReqLLM.Model.from!("vllm:test-only-chat") + context = context_fixture() + + # Unsupported operation + {:error, error} = VLLM.prepare_request(:unsupported, model, context, []) + assert %ReqLLM.Error.Invalid.Parameter{} = error + + # Provider mismatch + wrong_model = ReqLLM.Model.from!("groq:llama-3.1-8b-instant") + + assert_raise ReqLLM.Error.Invalid.Provider, fn -> + Req.new() |> VLLM.attach(wrong_model, []) + end + end + end + + describe "body encoding & context translation" do + test "encode_body for chat without tools" do + model = ReqLLM.Model.from!("vllm:test-only-chat") + context = context_fixture() + + # Create a mock request with the expected structure + mock_request = %Req.Request{ + options: [ + context: context, + model: model.model, + stream: false + ] + } + + # Test the encode_body function directly + updated_request = VLLM.encode_body(mock_request) + + assert is_binary(updated_request.body) + decoded = Jason.decode!(updated_request.body) + + assert decoded["model"] == "test-only-chat" + assert is_list(decoded["messages"]) + assert length(decoded["messages"]) == 2 + assert decoded["stream"] == false + refute Map.has_key?(decoded, "tools") + + [system_msg, user_msg] = decoded["messages"] + assert system_msg["role"] == "system" + assert user_msg["role"] == "user" + end + + test "encode_body for chat with tools but no tool_choice" do + model = ReqLLM.Model.from!("vllm:test-only-chat") + context = context_fixture() + + tool = + ReqLLM.Tool.new!( + name: "test_tool", + description: "A test tool", + parameter_schema: [ + name: [type: :string, required: true, doc: "A name parameter"] + ], + callback: fn _ -> {:ok, "result"} end + ) + + mock_request = %Req.Request{ + options: [ + context: context, + model: model.model, + stream: false, + tools: [tool] + ] + } + + updated_request = VLLM.encode_body(mock_request) + decoded = Jason.decode!(updated_request.body) + + assert is_list(decoded["tools"]) + assert length(decoded["tools"]) == 1 + refute Map.has_key?(decoded, "tool_choice") + + [encoded_tool] = decoded["tools"] + assert encoded_tool["function"]["name"] == "test_tool" + end + + test "encode_body for chat with tools and tool_choice" do + model = ReqLLM.Model.from!("vllm:test-only-chat") + context = context_fixture() + + tool = + ReqLLM.Tool.new!( + name: "specific_tool", + description: "A specific tool", + parameter_schema: [ + value: [type: :string, required: true, doc: "A value parameter"] + ], + callback: fn _ -> {:ok, "result"} end + ) + + tool_choice = %{type: "function", function: %{name: "specific_tool"}} + + mock_request = %Req.Request{ + options: [ + context: context, + model: model.model, + stream: false, + tools: [tool], + tool_choice: tool_choice + ] + } + + updated_request = VLLM.encode_body(mock_request) + decoded = Jason.decode!(updated_request.body) + + assert is_list(decoded["tools"]) + + assert decoded["tool_choice"] == %{ + "type" => "function", + "function" => %{"name" => "specific_tool"} + } + end + + test "encode_body for regular models uses max_tokens" do + model = ReqLLM.Model.from!("vllm:test-only-chat") + context = context_fixture() + + mock_request = %Req.Request{ + options: [ + context: context, + model: model.model, + stream: false, + max_tokens: 1500, + temperature: 0.7 + ] + } + + updated_request = VLLM.encode_body(mock_request) + decoded = Jason.decode!(updated_request.body) + + assert decoded["model"] == "test-only-chat" + assert decoded["max_tokens"] == 1500 + assert decoded["temperature"] == 0.7 + refute Map.has_key?(decoded, "max_completion_tokens") + end + + test "encode_body for embedding operation" do + model = ReqLLM.Model.from!("vllm:test-only-text-embedding") + text = "Hello, world!" + + mock_request = %Req.Request{ + options: [ + operation: :embedding, + model: model.model, + text: text + ] + } + + updated_request = VLLM.encode_body(mock_request) + decoded = Jason.decode!(updated_request.body) + + assert decoded["model"] == "test-only-text-embedding" + assert decoded["input"] == "Hello, world!" + end + end + + describe "response decoding" do + test "decode_response for chat handles non-streaming responses" do + # Create a mock non-streaming response body + mock_response_body = %{ + "id" => "test-only-chat", + "object" => "chat.completion", + "created" => 1_677_652_288, + "model" => "test-only-chat", + "choices" => [ + %{ + "index" => 0, + "message" => %{ + "role" => "assistant", + "content" => "Hello! How can I help you today?" + }, + "logprobs" => nil, + "finish_reason" => "stop" + } + ], + "usage" => %{ + "prompt_tokens" => 13, + "completion_tokens" => 7, + "total_tokens" => 20 + } + } + + mock_resp = %Req.Response{ + status: 200, + body: mock_response_body + } + + model = ReqLLM.Model.from!("vllm:test-only-chat") + context = context_fixture() + + mock_req = %Req.Request{ + options: [context: context, stream: false, model: model.model] + } + + # Test decode_response directly + {req, resp} = VLLM.decode_response({mock_req, mock_resp}) + + assert req == mock_req + assert %ReqLLM.Response{} = resp.body + + response = resp.body + assert is_binary(response.id) + assert response.model == model.model + assert response.stream? == false + + # Verify message normalization + assert response.message.role == :assistant + text = ReqLLM.Response.text(response) + assert is_binary(text) + assert String.length(text) > 0 + assert response.finish_reason in [:stop, :length] + + # Verify usage normalization + assert is_integer(response.usage.input_tokens) + assert is_integer(response.usage.output_tokens) + assert is_integer(response.usage.total_tokens) + + # Verify context advancement (original + assistant) + assert length(response.context.messages) == 3 + assert List.last(response.context.messages).role == :assistant + end + + test "decode_response for chat handles streaming responses" do + # Create mock streaming chunks + stream_chunks = [ + %{"choices" => [%{"delta" => %{"content" => "Hello"}}]}, + %{"choices" => [%{"delta" => %{"content" => " world"}}]}, + %{"choices" => [%{"finish_reason" => "stop"}]} + ] + + # Create a mock stream for real-time streaming + mock_real_time_stream = Stream.map(stream_chunks, & &1) + + # Create a mock Req response + mock_resp = %Req.Response{ + status: 200, + body: nil + } + + # Create a mock request with context, model, and real-time stream + context = context_fixture() + model = "test-only-chat" + + mock_req = %Req.Request{ + options: [context: context, stream: true, model: model], + private: %{real_time_stream: mock_real_time_stream} + } + + # Test decode_response directly + {req, resp} = VLLM.decode_response({mock_req, mock_resp}) + + assert req == mock_req + assert %ReqLLM.Response{} = resp.body + + response = resp.body + assert response.stream? == true + assert is_struct(response.stream, Stream) + assert response.model == model + + # Verify context is preserved (original messages only in streaming) + assert length(response.context.messages) == 2 + + # Verify stream structure and processing + assert response.usage == %{ + input_tokens: 0, + output_tokens: 0, + total_tokens: 0, + cached_tokens: 0, + reasoning_tokens: 0 + } + + assert response.finish_reason == nil + # http_task removed after fix for issue #42 (no duplicate request execution) + assert response.provider_meta == %{} + end + + test "decode_response for embedding returns raw body" do + # Create a mock embedding response body + mock_response_body = %{ + "object" => "list", + "data" => [ + %{ + "object" => "embedding", + "embedding" => [0.1, 0.2, 0.3], + "index" => 0 + } + ], + "model" => "test-only-text-embedding", + "usage" => %{ + "prompt_tokens" => 5, + "total_tokens" => 5 + } + } + + mock_resp = %Req.Response{ + status: 200, + body: mock_response_body + } + + mock_req = %Req.Request{ + options: [operation: :embedding, model: "test-only-text-embedding"] + } + + # Test decode_response for embeddings + {req, resp} = VLLM.decode_response({mock_req, mock_resp}) + + assert req == mock_req + # For embeddings, body should be the raw parsed JSON + assert resp.body == mock_response_body + end + + test "decode_response handles API errors with non-200 status" do + # Create error response + error_body = %{ + "error" => %{ + "message" => "Invalid API key provided", + "type" => "invalid_request_error", + "code" => "invalid_api_key" + } + } + + mock_resp = %Req.Response{ + status: 401, + body: error_body + } + + context = context_fixture() + + mock_req = %Req.Request{ + options: [context: context, model: "test-only-chat"] + } + + # Test decode_response error handling (now delegated to ChatAPI) + {req, error} = VLLM.decode_response({mock_req, mock_resp}) + + assert req == mock_req + assert %ReqLLM.Error.API.Response{} = error + assert error.status == 401 + assert error.reason == "Test-only-chat API error" + assert error.response_body == error_body + end + end + + describe "usage extraction" do + test "extract_usage with valid usage data" do + model = ReqLLM.Model.from!("vllm:test-only-chat") + + body_with_usage = %{ + "usage" => %{ + "prompt_tokens" => 15, + "completion_tokens" => 25, + "total_tokens" => 40 + } + } + + {:ok, usage} = VLLM.extract_usage(body_with_usage, model) + assert usage["prompt_tokens"] == 15 + assert usage["completion_tokens"] == 25 + assert usage["total_tokens"] == 40 + end + + test "extract_usage with missing usage data" do + model = ReqLLM.Model.from!("vllm:test-only-chat") + body_without_usage = %{"choices" => []} + + {:error, :no_usage_found} = VLLM.extract_usage(body_without_usage, model) + end + + test "extract_usage with invalid body type" do + model = ReqLLM.Model.from!("vllm:test-only-chat") + + {:error, :invalid_body} = VLLM.extract_usage("invalid", model) + {:error, :invalid_body} = VLLM.extract_usage(nil, model) + {:error, :invalid_body} = VLLM.extract_usage(123, model) + end + end + + describe "embedding support" do + test "prepare_request for embedding with all options" do + model = ReqLLM.Model.from!("vllm:test-only-text-embedding") + text = "Sample text for embedding" + opts = [user: "test-user"] + + {:ok, request} = VLLM.prepare_request(:embedding, model, text, opts) + + assert request.options[:operation] == :embedding + assert request.options[:text] == text + assert request.options[:user] == "test-user" + end + + test "encode_body for embedding with optional parameters" do + model = ReqLLM.Model.from!("vllm:test-only-text-embedding") + + mock_request = %Req.Request{ + options: [ + operation: :embedding, + model: model.model, + text: "Test embedding text", + provider_options: [dimensions: 512, encoding_format: "base64"], + user: "test-user-123" + ] + } + + updated_request = VLLM.encode_body(mock_request) + decoded = Jason.decode!(updated_request.body) + + assert decoded["model"] == "test-only-text-embedding" + assert decoded["input"] == "Test embedding text" + assert decoded["dimensions"] == 512 + assert decoded["encoding_format"] == "base64" + assert decoded["user"] == "test-user-123" + end + end + + describe "error handling & robustness" do + test "context validation" do + # Multiple system messages should fail + invalid_context = + Context.new([ + Context.system("System 1"), + Context.system("System 2"), + Context.user("Hello") + ]) + + assert_raise ReqLLM.Error.Validation.Error, + ~r/should have at most one system message/, + fn -> + Context.validate!(invalid_context) + end + end + + test "prepare_request rejects unsupported operations" do + model = ReqLLM.Model.from!("vllm:test-only-chat") + context = context_fixture() + + {:error, error} = VLLM.prepare_request(:unsupported, model, context, []) + assert %ReqLLM.Error.Invalid.Parameter{} = error + + assert error.parameter =~ + "operation: :unsupported not supported by ReqLLM.Providers.VLLM. Supported operations: [:chat, :object, :embedding]" + end + + test "attach rejects invalid model provider" do + wrong_model = ReqLLM.Model.from!("groq:llama-3.1-8b-instant") + + assert_raise ReqLLM.Error.Invalid.Provider, fn -> + Req.new() |> VLLM.attach(wrong_model, []) + end + end + end + + describe "ResponsesAPI json_schema support" do + test "ResponsesAPI encode_text_format transforms response_format to flattened text.format" do + schema = [ + name: [type: :string, required: true], + title: [type: :string, required: true] + ] + + json_schema = ReqLLM.Schema.to_json(schema) + + # Enforce strict schema requirements + json_schema = + json_schema + |> Map.put("required", Map.keys(json_schema["properties"])) + |> Map.put("additionalProperties", false) + + response_format = %{ + type: "json_schema", + json_schema: %{ + name: "output_schema", + strict: true, + schema: json_schema + } + } + + # Test the encode_text_format function + text_format = ReqLLM.Providers.OpenAI.ResponsesAPI.encode_text_format(response_format) + + # ResponsesAPI expects flattened structure: text.format.{name, strict, schema} + # not text.format.json_schema.{name, strict, schema} + assert text_format["format"]["type"] == "json_schema" + assert text_format["format"]["name"] == "output_schema" + assert text_format["format"]["strict"] == true + assert text_format["format"]["schema"] != nil + refute Map.has_key?(text_format["format"], "json_schema") + end + + test "ResponsesAPI includes text parameter in request body with name at format level" do + model = ReqLLM.Model.from!("vllm:test-only-responses") + + schema = [ + name: [type: :string, required: true], + title: [type: :string, required: true] + ] + + json_schema = ReqLLM.Schema.to_json(schema) + + # Enforce strict schema requirements + json_schema = + json_schema + |> Map.put("required", Map.keys(json_schema["properties"])) + |> Map.put("additionalProperties", false) + + response_format = %{ + type: "json_schema", + json_schema: %{ + name: "output_schema", + strict: true, + schema: json_schema + } + } + + context = %ReqLLM.Context{ + messages: [ + %ReqLLM.Message{ + role: :user, + content: [%ReqLLM.Message.ContentPart{type: :text, text: "Generate a person"}] + } + ] + } + + opts = [ + provider_options: [response_format: response_format], + context: context, + model: model.model + ] + + # Create a mock request + request = %Req.Request{ + url: URI.parse("https://api.openai.com/v1/responses"), + method: :post, + options: opts + } + + # Test encode_body + encoded_request = ReqLLM.Providers.OpenAI.ResponsesAPI.encode_body(request) + body = Jason.decode!(encoded_request.body) + + # Verify text parameter exists with correct structure + # VLLM ResponsesAPI expects name at text.format.name level, not text.format.json_schema.name + assert Map.has_key?(body, "text") + assert body["text"]["format"]["type"] == "json_schema" + assert body["text"]["format"]["name"] == "output_schema" + assert body["text"]["format"]["strict"] == true + assert body["text"]["format"]["schema"] != nil + assert body["text"]["format"]["schema"]["type"] == "object" + assert Map.has_key?(body["text"]["format"]["schema"], "properties") + end + + test "ResponsesAPI decode_response extracts and validates object from json_schema response" do + model = ReqLLM.Model.from!("vllm:test-only-responses") + + schema = [ + name: [type: :string, required: true] + ] + + {:ok, compiled_schema} = ReqLLM.Schema.compile(schema) + + # Mock a ResponsesAPI response with JSON in output_text + mock_response_body = %{ + "id" => "resp_test123", + "model" => "gpt-5-nano-2025-08-07", + "object" => "response", + "status" => "completed", + "output" => [], + "output_text" => ~s({"name":"Mara Ellington"}), + "usage" => %{ + "input_tokens" => 31, + "output_tokens" => 594, + "reasoning_tokens" => 576 + } + } + + mock_resp = %Req.Response{ + status: 200, + body: mock_response_body + } + + context = %ReqLLM.Context{ + messages: [ + %ReqLLM.Message{ + role: :user, + content: [%ReqLLM.Message.ContentPart{type: :text, text: "Generate a person"}] + } + ] + } + + mock_req = %Req.Request{ + options: [ + context: context, + model: model.model, + operation: :object, + compiled_schema: compiled_schema + ] + } + + # Test decode_response + {req, resp} = ReqLLM.Providers.OpenAI.ResponsesAPI.decode_response({mock_req, mock_resp}) + + assert req == mock_req + assert %ReqLLM.Response{} = resp.body + + response = resp.body + + # The object field should be populated with the parsed and validated JSON + assert response.object != nil + assert response.object["name"] == "Mara Ellington" + + # The message should still contain the original JSON text + text = ReqLLM.Response.text(response) + assert text == ~s({"name":"Mara Ellington"}) + end + end + + describe "model base_url overrides the provider base_url" do + test "model with a base_url is used by the request base_url" do + {:ok, provider} = ReqLLM.Provider.Registry.get_provider(:vllm) + {:ok, model} = ReqLLM.Provider.Registry.get_model(:vllm, "test-only-chat") + {:ok, provider_metadata} = ReqLLM.Provider.Registry.get_provider_metadata(:vllm) + {:ok, request} = provider.prepare_request(:chat, model, "Hello!", temperature: 0.7) + assert request.options.base_url == "http://localhost:8006/v1" + assert request.options.base_url != provider_metadata["base_url"] + assert request.options.base_url == model.base_url + end + + test "another model with a base_url is used by the request base_url" do + {:ok, provider} = ReqLLM.Provider.Registry.get_provider(:vllm) + {:ok, model} = ReqLLM.Provider.Registry.get_model(:vllm, "test-only-responses") + {:ok, provider_metadata} = ReqLLM.Provider.Registry.get_provider_metadata(:vllm) + {:ok, request} = provider.prepare_request(:chat, model, "Hello!", temperature: 0.7) + assert request.options.base_url == "http://localhost:8001/v1" + assert request.options.base_url != provider_metadata["base_url"] + assert request.options.base_url == model.base_url + end + + test "request base_url for model with no base_url uses the provider base_url" do + {:ok, provider} = ReqLLM.Provider.Registry.get_provider(:vllm) + {:ok, model} = ReqLLM.Provider.Registry.get_model(:vllm, "test-only-chat-no-model-base-url") + {:ok, provider_metadata} = ReqLLM.Provider.Registry.get_provider_metadata(:vllm) + {:ok, request} = provider.prepare_request(:chat, model, "Hello!", temperature: 0.7) + assert request.options.base_url == "http://localhost:8005/v1" + assert request.options.base_url == provider_metadata["base_url"] + assert request.options.base_url != model.base_url + end + end +end