From ac0a054aaee2d37adc005f942ffa06a4d619be67 Mon Sep 17 00:00:00 2001 From: the-praxs Date: Sun, 11 May 2025 00:02:47 +0000 Subject: [PATCH] chore: Update token prices (11-05-2025) --- README.md | 52 ++++--- pricing_table.md | 44 ++++-- tokencost/model_prices.json | 302 ++++++++++++++++++++++++++++++++---- 3 files changed, 337 insertions(+), 61 deletions(-) diff --git a/README.md b/README.md index 090b56c..e0813e3 100644 --- a/README.md +++ b/README.md @@ -922,8 +922,8 @@ Units denominated in USD. All prices can be located in `model_prices.json`. | azure/gpt-3.5-turbo | $0.5 | $1.5 | 4,097 | 4096 | | gemini-2.0-pro-exp-02-05 | $1.25 | $10 | 2,097,152 | 8192 | | us.meta.llama3-3-70b-instruct-v1:0 | $0.72 | $0.72 | 128,000 | 4096 | -| perplexity/sonar | $1 | $1 | 127,072 | 127072 | -| perplexity/sonar-pro | $3 | $15 | 200,000 | 8096 | +| perplexity/sonar | $1 | $1 | 128,000 | nan | +| perplexity/sonar-pro | $3 | $15 | 200,000 | 8000 | | openrouter/google/gemini-2.0-flash-001 | $0.1 | $0.4 | 1,048,576 | 8192 | | gpt-4.5-preview | $75 | $150 | 128,000 | 16384 | | gpt-4.5-preview-2025-02-27 | $75 | $150 | 128,000 | 16384 | @@ -1078,17 +1078,17 @@ Units denominated in USD. All prices can be located in `model_prices.json`. | meta_llama/Llama-3.3-70B-Instruct | -- | -- | 128,000 | 4028 | | meta_llama/Llama-3.3-8B-Instruct | -- | -- | 128,000 | 4028 | | gemini-2.5-pro-exp-03-25 | $1.25 | $10 | 1,048,576 | 65535 | -| gemini/gemini-2.5-pro-exp-03-25 | $0 | $0 | 1,048,576 | 65536 | -| gemini/gemini-2.5-flash-preview-04-17 | $0.15 | $0.6 | 1,048,576 | 65536 | -| gemini-2.5-flash-preview-04-17 | $0.15 | $0.6 | 1,048,576 | 65536 | +| gemini/gemini-2.5-pro-exp-03-25 | $0 | $0 | 1,048,576 | 65535 | +| gemini/gemini-2.5-flash-preview-04-17 | $0.15 | $0.6 | 1,048,576 | 65535 | +| gemini-2.5-flash-preview-04-17 | $0.15 | $0.6 | 1,048,576 | 65535 | | gemini-2.0-flash | $0.1 | $0.4 | 1,048,576 | 8192 | | gemini-2.0-flash-lite | $0.08 | $0.3 | 1,048,576 | 8192 | | gemini-2.0-flash-lite-001 | $0.08 | $0.3 | 1,048,576 | 8192 | -| gemini-2.5-pro-preview-05-06 | $1.25 | $10 | 1,048,576 | 65536 | -| gemini-2.5-pro-preview-03-25 | $1.25 | $10 | 1,048,576 | 65536 | +| gemini-2.5-pro-preview-05-06 | $1.25 | $10 | 1,048,576 | 65535 | +| gemini-2.5-pro-preview-03-25 | $1.25 | $10 | 1,048,576 | 65535 | | gemini/gemini-2.0-flash-lite | $0.08 | $0.3 | 1,048,576 | 8192 | -| gemini/gemini-2.5-pro-preview-05-06 | $1.25 | $10 | 1,048,576 | 65536 | -| gemini/gemini-2.5-pro-preview-03-25 | $1.25 | $10 | 1,048,576 | 65536 | +| gemini/gemini-2.5-pro-preview-05-06 | $1.25 | $10 | 1,048,576 | 65535 | +| gemini/gemini-2.5-pro-preview-03-25 | $1.25 | $10 | 1,048,576 | 65535 | | vertex_ai/meta/llama-4-scout-17b-16e-instruct-maas | $0.25 | $0.7 | 10,000,000 | 1e+07 | | vertex_ai/meta/llama-4-scout-17b-128e-instruct-maas | $0.25 | $0.7 | 10,000,000 | 1e+07 | | vertex_ai/meta/llama-4-maverick-17b-128e-instruct-maas | $0.35 | $1.15 | 1,000,000 | 1e+06 | @@ -1113,7 +1113,7 @@ Units denominated in USD. All prices can be located in `model_prices.json`. | together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo | -- | -- | nan | nan | | together_ai/deepseek-ai/DeepSeek-V3 | -- | -- | nan | nan | | together_ai/mistralai/Mistral-Small-24B-Instruct-2501 | -- | -- | nan | nan | -| perplexity/sonar-deep-research | $2 | $8 | 12,000 | 12000 | +| perplexity/sonar-deep-research | $2 | $8 | 128,000 | nan | | fireworks_ai/accounts/fireworks/models/deepseek-r1 | $3 | $8 | 128,000 | 20480 | | fireworks_ai/accounts/fireworks/models/deepseek-r1-basic | $0.55 | $2.19 | 128,000 | 20480 | | fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct | $3 | $3 | 128,000 | 16384 | @@ -1123,15 +1123,29 @@ Units denominated in USD. All prices can be located in `model_prices.json`. | fireworks-ai-4.1b-to-16b | $0.2 | $0.2 | nan | nan | | fireworks-ai-above-16b | $0.9 | $0.9 | nan | nan | | databricks/databricks-claude-3-7-sonnet | $2.5 | $178.57 | 200,000 | 128000 | -| databricks/databricks-meta-llama-3-3-70b-instruct | $1 | $3 | 128,000 | 128000 |#### Installation via [GitHub](https://github.com/AgentOps-AI/tokencost): - -```bash -git clone git@github.com:AgentOps-AI/tokencost.git -cd tokencost -pip install -e . -``` - -## Running tests +| databricks/databricks-meta-llama-3-3-70b-instruct | $1 | $3 | 128,000 | 128000 | +| azure_ai/deepseek-v3-0324 | $1.14 | $4.56 | 128,000 | 8192 | +| azure_ai/Llama-4-Scout-17B-16E-Instruct | $0.2 | $0.78 | 10,000,000 | 16384 | +| azure_ai/Llama-4-Maverick-17B-128E-Instruct-FP8 | $1.41 | $0.35 | 1,000,000 | 16384 | +| cerebras/llama-3.3-70b | $0.85 | $1.2 | 128,000 | 128000 | +| perplexity/sonar-reasoning | $1 | $5 | 128,000 | nan | +| perplexity/sonar-reasoning-pro | $2 | $8 | 128,000 | nan | +| nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct | $0.09 | $0.29 | nan | nan | +| nscale/Qwen/Qwen2.5-Coder-3B-Instruct | $0.01 | $0.03 | nan | nan | +| nscale/Qwen/Qwen2.5-Coder-7B-Instruct | $0.01 | $0.03 | nan | nan | +| nscale/Qwen/Qwen2.5-Coder-32B-Instruct | $0.06 | $0.2 | nan | nan | +| nscale/Qwen/QwQ-32B | $0.18 | $0.2 | nan | nan | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-70B | $0.38 | $0.38 | nan | nan | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-8B | $0.02 | $0.02 | nan | nan | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B | $0.09 | $0.09 | nan | nan | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B | $0.2 | $0.2 | nan | nan | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B | $0.07 | $0.07 | nan | nan | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B | $0.15 | $0.15 | nan | nan | +| nscale/mistralai/mixtral-8x22b-instruct-v0.1 | $0.6 | $0.6 | nan | nan | +| nscale/meta-llama/Llama-3.1-8B-Instruct | $0.03 | $0.03 | nan | nan | +| nscale/meta-llama/Llama-3.3-70B-Instruct | $0.2 | $0.2 | nan | nan | +| nscale/black-forest-labs/FLUX.1-schnell | -- | -- | nan | nan | +| nscale/stabilityai/stable-diffusion-xl-base-1.0 | -- | -- | nan | nan |## Running tests 1. Install `pytest` if you don't have it already diff --git a/pricing_table.md b/pricing_table.md index 91ec051..5f01bdc 100644 --- a/pricing_table.md +++ b/pricing_table.md @@ -796,8 +796,8 @@ | azure/gpt-3.5-turbo | $0.5 | $1.5 | 4,097 | 4096 | | gemini-2.0-pro-exp-02-05 | $1.25 | $10 | 2,097,152 | 8192 | | us.meta.llama3-3-70b-instruct-v1:0 | $0.72 | $0.72 | 128,000 | 4096 | -| perplexity/sonar | $1 | $1 | 127,072 | 127072 | -| perplexity/sonar-pro | $3 | $15 | 200,000 | 8096 | +| perplexity/sonar | $1 | $1 | 128,000 | nan | +| perplexity/sonar-pro | $3 | $15 | 200,000 | 8000 | | openrouter/google/gemini-2.0-flash-001 | $0.1 | $0.4 | 1,048,576 | 8192 | | gpt-4.5-preview | $75 | $150 | 128,000 | 16384 | | gpt-4.5-preview-2025-02-27 | $75 | $150 | 128,000 | 16384 | @@ -952,17 +952,17 @@ | meta_llama/Llama-3.3-70B-Instruct | -- | -- | 128,000 | 4028 | | meta_llama/Llama-3.3-8B-Instruct | -- | -- | 128,000 | 4028 | | gemini-2.5-pro-exp-03-25 | $1.25 | $10 | 1,048,576 | 65535 | -| gemini/gemini-2.5-pro-exp-03-25 | $0 | $0 | 1,048,576 | 65536 | -| gemini/gemini-2.5-flash-preview-04-17 | $0.15 | $0.6 | 1,048,576 | 65536 | -| gemini-2.5-flash-preview-04-17 | $0.15 | $0.6 | 1,048,576 | 65536 | +| gemini/gemini-2.5-pro-exp-03-25 | $0 | $0 | 1,048,576 | 65535 | +| gemini/gemini-2.5-flash-preview-04-17 | $0.15 | $0.6 | 1,048,576 | 65535 | +| gemini-2.5-flash-preview-04-17 | $0.15 | $0.6 | 1,048,576 | 65535 | | gemini-2.0-flash | $0.1 | $0.4 | 1,048,576 | 8192 | | gemini-2.0-flash-lite | $0.08 | $0.3 | 1,048,576 | 8192 | | gemini-2.0-flash-lite-001 | $0.08 | $0.3 | 1,048,576 | 8192 | -| gemini-2.5-pro-preview-05-06 | $1.25 | $10 | 1,048,576 | 65536 | -| gemini-2.5-pro-preview-03-25 | $1.25 | $10 | 1,048,576 | 65536 | +| gemini-2.5-pro-preview-05-06 | $1.25 | $10 | 1,048,576 | 65535 | +| gemini-2.5-pro-preview-03-25 | $1.25 | $10 | 1,048,576 | 65535 | | gemini/gemini-2.0-flash-lite | $0.08 | $0.3 | 1,048,576 | 8192 | -| gemini/gemini-2.5-pro-preview-05-06 | $1.25 | $10 | 1,048,576 | 65536 | -| gemini/gemini-2.5-pro-preview-03-25 | $1.25 | $10 | 1,048,576 | 65536 | +| gemini/gemini-2.5-pro-preview-05-06 | $1.25 | $10 | 1,048,576 | 65535 | +| gemini/gemini-2.5-pro-preview-03-25 | $1.25 | $10 | 1,048,576 | 65535 | | vertex_ai/meta/llama-4-scout-17b-16e-instruct-maas | $0.25 | $0.7 | 10,000,000 | 1e+07 | | vertex_ai/meta/llama-4-scout-17b-128e-instruct-maas | $0.25 | $0.7 | 10,000,000 | 1e+07 | | vertex_ai/meta/llama-4-maverick-17b-128e-instruct-maas | $0.35 | $1.15 | 1,000,000 | 1e+06 | @@ -987,7 +987,7 @@ | together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo | -- | -- | nan | nan | | together_ai/deepseek-ai/DeepSeek-V3 | -- | -- | nan | nan | | together_ai/mistralai/Mistral-Small-24B-Instruct-2501 | -- | -- | nan | nan | -| perplexity/sonar-deep-research | $2 | $8 | 12,000 | 12000 | +| perplexity/sonar-deep-research | $2 | $8 | 128,000 | nan | | fireworks_ai/accounts/fireworks/models/deepseek-r1 | $3 | $8 | 128,000 | 20480 | | fireworks_ai/accounts/fireworks/models/deepseek-r1-basic | $0.55 | $2.19 | 128,000 | 20480 | | fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct | $3 | $3 | 128,000 | 16384 | @@ -997,4 +997,26 @@ | fireworks-ai-4.1b-to-16b | $0.2 | $0.2 | nan | nan | | fireworks-ai-above-16b | $0.9 | $0.9 | nan | nan | | databricks/databricks-claude-3-7-sonnet | $2.5 | $178.57 | 200,000 | 128000 | -| databricks/databricks-meta-llama-3-3-70b-instruct | $1 | $3 | 128,000 | 128000 | \ No newline at end of file +| databricks/databricks-meta-llama-3-3-70b-instruct | $1 | $3 | 128,000 | 128000 | +| azure_ai/deepseek-v3-0324 | $1.14 | $4.56 | 128,000 | 8192 | +| azure_ai/Llama-4-Scout-17B-16E-Instruct | $0.2 | $0.78 | 10,000,000 | 16384 | +| azure_ai/Llama-4-Maverick-17B-128E-Instruct-FP8 | $1.41 | $0.35 | 1,000,000 | 16384 | +| cerebras/llama-3.3-70b | $0.85 | $1.2 | 128,000 | 128000 | +| perplexity/sonar-reasoning | $1 | $5 | 128,000 | nan | +| perplexity/sonar-reasoning-pro | $2 | $8 | 128,000 | nan | +| nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct | $0.09 | $0.29 | nan | nan | +| nscale/Qwen/Qwen2.5-Coder-3B-Instruct | $0.01 | $0.03 | nan | nan | +| nscale/Qwen/Qwen2.5-Coder-7B-Instruct | $0.01 | $0.03 | nan | nan | +| nscale/Qwen/Qwen2.5-Coder-32B-Instruct | $0.06 | $0.2 | nan | nan | +| nscale/Qwen/QwQ-32B | $0.18 | $0.2 | nan | nan | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-70B | $0.38 | $0.38 | nan | nan | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-8B | $0.02 | $0.02 | nan | nan | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B | $0.09 | $0.09 | nan | nan | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B | $0.2 | $0.2 | nan | nan | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B | $0.07 | $0.07 | nan | nan | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B | $0.15 | $0.15 | nan | nan | +| nscale/mistralai/mixtral-8x22b-instruct-v0.1 | $0.6 | $0.6 | nan | nan | +| nscale/meta-llama/Llama-3.1-8B-Instruct | $0.03 | $0.03 | nan | nan | +| nscale/meta-llama/Llama-3.3-70B-Instruct | $0.2 | $0.2 | nan | nan | +| nscale/black-forest-labs/FLUX.1-schnell | -- | -- | nan | nan | +| nscale/stabilityai/stable-diffusion-xl-base-1.0 | -- | -- | nan | nan | \ No newline at end of file diff --git a/tokencost/model_prices.json b/tokencost/model_prices.json index 890987d..7c7c0d4 100644 --- a/tokencost/model_prices.json +++ b/tokencost/model_prices.json @@ -6257,7 +6257,8 @@ "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, "litellm_provider": "ollama", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "ollama/mistral": { "max_tokens": 8192, @@ -6266,7 +6267,8 @@ "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, "litellm_provider": "ollama", - "mode": "completion" + "mode": "completion", + "supports_function_calling": true }, "ollama/mistral-7B-Instruct-v0.1": { "max_tokens": 8192, @@ -6275,7 +6277,8 @@ "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, "litellm_provider": "ollama", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "ollama/mistral-7B-Instruct-v0.2": { "max_tokens": 32768, @@ -6284,7 +6287,8 @@ "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, "litellm_provider": "ollama", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "ollama/mixtral-8x7B-Instruct-v0.1": { "max_tokens": 32768, @@ -6293,7 +6297,8 @@ "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, "litellm_provider": "ollama", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "ollama/mixtral-8x22B-Instruct-v0.1": { "max_tokens": 65536, @@ -6302,7 +6307,8 @@ "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, "litellm_provider": "ollama", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "ollama/codellama": { "max_tokens": 4096, @@ -9458,22 +9464,33 @@ "supports_tool_choice": false }, "perplexity/sonar": { - "max_tokens": 127072, - "max_input_tokens": 127072, - "max_output_tokens": 127072, + "max_tokens": 128000, + "max_input_tokens": 128000, "input_cost_per_token": 1e-06, "output_cost_per_token": 1e-06, "litellm_provider": "perplexity", - "mode": "chat" + "mode": "chat", + "search_context_cost_per_query": { + "search_context_size_low": 0.005, + "search_context_size_medium": 0.008, + "search_context_size_high": 0.012 + }, + "supports_web_search": true }, "perplexity/sonar-pro": { - "max_tokens": 200000, + "max_tokens": 8000, "max_input_tokens": 200000, - "max_output_tokens": 8096, + "max_output_tokens": 8000, "input_cost_per_token": 3e-06, "output_cost_per_token": 1.5e-05, "litellm_provider": "perplexity", - "mode": "chat" + "mode": "chat", + "search_context_cost_per_query": { + "search_context_size_low": 0.006, + "search_context_size_medium": 0.01, + "search_context_size_high": 0.014 + }, + "supports_web_search": true }, "openrouter/google/gemini-2.0-flash-001": { "max_tokens": 8192, @@ -12024,9 +12041,9 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" }, "gemini/gemini-2.5-pro-exp-03-25": { - "max_tokens": 65536, + "max_tokens": 65535, "max_input_tokens": 1048576, - "max_output_tokens": 65536, + "max_output_tokens": 65535, "max_images_per_prompt": 3000, "max_videos_per_prompt": 10, "max_video_length": 1, @@ -12065,9 +12082,9 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" }, "gemini/gemini-2.5-flash-preview-04-17": { - "max_tokens": 65536, + "max_tokens": 65535, "max_input_tokens": 1048576, - "max_output_tokens": 65536, + "max_output_tokens": 65535, "max_images_per_prompt": 3000, "max_videos_per_prompt": 10, "max_video_length": 1, @@ -12105,9 +12122,9 @@ "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview" }, "gemini-2.5-flash-preview-04-17": { - "max_tokens": 65536, + "max_tokens": 65535, "max_input_tokens": 1048576, - "max_output_tokens": 65536, + "max_output_tokens": 65535, "max_images_per_prompt": 3000, "max_videos_per_prompt": 10, "max_video_length": 1, @@ -12241,9 +12258,9 @@ "deprecation_date": "2026-02-25" }, "gemini-2.5-pro-preview-05-06": { - "max_tokens": 65536, + "max_tokens": 65535, "max_input_tokens": 1048576, - "max_output_tokens": 65536, + "max_output_tokens": 65535, "max_images_per_prompt": 3000, "max_videos_per_prompt": 10, "max_video_length": 1, @@ -12281,9 +12298,9 @@ "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview" }, "gemini-2.5-pro-preview-03-25": { - "max_tokens": 65536, + "max_tokens": 65535, "max_input_tokens": 1048576, - "max_output_tokens": 65536, + "max_output_tokens": 65535, "max_images_per_prompt": 3000, "max_videos_per_prompt": 10, "max_video_length": 1, @@ -12354,9 +12371,9 @@ "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite" }, "gemini/gemini-2.5-pro-preview-05-06": { - "max_tokens": 65536, + "max_tokens": 65535, "max_input_tokens": 1048576, - "max_output_tokens": 65536, + "max_output_tokens": 65535, "max_images_per_prompt": 3000, "max_videos_per_prompt": 10, "max_video_length": 1, @@ -12390,9 +12407,9 @@ "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview" }, "gemini/gemini-2.5-pro-preview-03-25": { - "max_tokens": 65536, + "max_tokens": 65535, "max_input_tokens": 1048576, - "max_output_tokens": 65536, + "max_output_tokens": 65535, "max_images_per_prompt": 3000, "max_videos_per_prompt": 10, "max_video_length": 1, @@ -12769,12 +12786,11 @@ "supports_tool_choice": true }, "perplexity/sonar-deep-research": { - "max_tokens": 12000, - "max_input_tokens": 12000, - "max_output_tokens": 12000, + "max_tokens": 128000, + "max_input_tokens": 128000, "input_cost_per_token": 2e-06, "output_cost_per_token": 8e-06, - "output_cost_per_reasoning_token": 3e-05, + "output_cost_per_reasoning_token": 3e-06, "litellm_provider": "perplexity", "mode": "chat", "search_context_cost_per_query": { @@ -12894,5 +12910,229 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "supports_tool_choice": true + }, + "azure_ai/deepseek-v3-0324": { + "max_tokens": 8192, + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "input_cost_per_token": 1.14e-06, + "output_cost_per_token": 4.56e-06, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/announcing-deepseek-v3-on-azure-ai-foundry-and-github/4390438" + }, + "azure_ai/Llama-4-Scout-17B-16E-Instruct": { + "max_tokens": 16384, + "max_input_tokens": 10000000, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 7.8e-07, + "litellm_provider": "azure_ai", + "supports_function_calling": true, + "supports_vision": true, + "mode": "chat", + "source": "https://azure.microsoft.com/en-us/blog/introducing-the-llama-4-herd-in-azure-ai-foundry-and-azure-databricks/", + "supports_tool_choice": true + }, + "azure_ai/Llama-4-Maverick-17B-128E-Instruct-FP8": { + "max_tokens": 16384, + "max_input_tokens": 1000000, + "max_output_tokens": 16384, + "input_cost_per_token": 1.41e-06, + "output_cost_per_token": 3.5e-07, + "litellm_provider": "azure_ai", + "supports_function_calling": true, + "supports_vision": true, + "mode": "chat", + "source": "https://azure.microsoft.com/en-us/blog/introducing-the-llama-4-herd-in-azure-ai-foundry-and-azure-databricks/", + "supports_tool_choice": true + }, + "cerebras/llama-3.3-70b": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 8.5e-07, + "output_cost_per_token": 1.2e-06, + "litellm_provider": "cerebras", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "perplexity/sonar-reasoning": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "input_cost_per_token": 1e-06, + "output_cost_per_token": 5e-06, + "litellm_provider": "perplexity", + "mode": "chat", + "search_context_cost_per_query": { + "search_context_size_low": 0.005, + "search_context_size_medium": 0.008, + "search_context_size_high": 0.014 + }, + "supports_web_search": true, + "supports_reasoning": true + }, + "perplexity/sonar-reasoning-pro": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "input_cost_per_token": 2e-06, + "output_cost_per_token": 8e-06, + "litellm_provider": "perplexity", + "mode": "chat", + "search_context_cost_per_query": { + "search_context_size_low": 0.006, + "search_context_size_medium": 0.01, + "search_context_size_high": 0.014 + }, + "supports_web_search": true, + "supports_reasoning": true + }, + "nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct": { + "input_cost_per_token": 9e-08, + "output_cost_per_token": 2.9e-07, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/Qwen2.5-Coder-3B-Instruct": { + "input_cost_per_token": 1e-08, + "output_cost_per_token": 3e-08, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/Qwen2.5-Coder-7B-Instruct": { + "input_cost_per_token": 1e-08, + "output_cost_per_token": 3e-08, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/Qwen2.5-Coder-32B-Instruct": { + "input_cost_per_token": 6e-08, + "output_cost_per_token": 2e-07, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/QwQ-32B": { + "input_cost_per_token": 1.8e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { + "input_cost_per_token": 3.75e-07, + "output_cost_per_token": 3.75e-07, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "metadata": { + "notes": "Pricing listed as $0.75/1M tokens total. Assumed 50/50 split for input/output." + } + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-8B": { + "input_cost_per_token": 2.5e-08, + "output_cost_per_token": 2.5e-08, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "metadata": { + "notes": "Pricing listed as $0.05/1M tokens total. Assumed 50/50 split for input/output." + } + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": { + "input_cost_per_token": 9e-08, + "output_cost_per_token": 9e-08, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "metadata": { + "notes": "Pricing listed as $0.18/1M tokens total. Assumed 50/50 split for input/output." + } + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": { + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "metadata": { + "notes": "Pricing listed as $0.40/1M tokens total. Assumed 50/50 split for input/output." + } + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": { + "input_cost_per_token": 7e-08, + "output_cost_per_token": 7e-08, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "metadata": { + "notes": "Pricing listed as $0.14/1M tokens total. Assumed 50/50 split for input/output." + } + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": { + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 1.5e-07, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "metadata": { + "notes": "Pricing listed as $0.30/1M tokens total. Assumed 50/50 split for input/output." + } + }, + "nscale/mistralai/mixtral-8x22b-instruct-v0.1": { + "input_cost_per_token": 6e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "metadata": { + "notes": "Pricing listed as $1.20/1M tokens total. Assumed 50/50 split for input/output." + } + }, + "nscale/meta-llama/Llama-3.1-8B-Instruct": { + "input_cost_per_token": 3e-08, + "output_cost_per_token": 3e-08, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "metadata": { + "notes": "Pricing listed as $0.06/1M tokens total. Assumed 50/50 split for input/output." + } + }, + "nscale/meta-llama/Llama-3.3-70B-Instruct": { + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "metadata": { + "notes": "Pricing listed as $0.40/1M tokens total. Assumed 50/50 split for input/output." + } + }, + "nscale/black-forest-labs/FLUX.1-schnell": { + "mode": "image_generation", + "input_cost_per_pixel": 1.3e-09, + "output_cost_per_pixel": 0.0, + "litellm_provider": "nscale", + "supported_endpoints": [ + "/v1/images/generations" + ], + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models" + }, + "nscale/stabilityai/stable-diffusion-xl-base-1.0": { + "mode": "image_generation", + "input_cost_per_pixel": 3e-09, + "output_cost_per_pixel": 0.0, + "litellm_provider": "nscale", + "supported_endpoints": [ + "/v1/images/generations" + ], + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models" } } \ No newline at end of file