diff --git a/docs/src/pages/docs/desktop/remote-models/_meta.json b/docs/src/pages/docs/desktop/remote-models/_meta.json
index 60268b73c9..ce7a19e0c8 100644
--- a/docs/src/pages/docs/desktop/remote-models/_meta.json
+++ b/docs/src/pages/docs/desktop/remote-models/_meta.json
@@ -2,6 +2,9 @@
"anthropic": {
"title": "Anthropic"
},
+ "cerebras": {
+ "title": "Cerebras"
+ },
"cohere": {
"title": "Cohere"
},
diff --git a/docs/src/pages/docs/desktop/remote-models/cerebras.mdx b/docs/src/pages/docs/desktop/remote-models/cerebras.mdx
new file mode 100644
index 0000000000..ca43e6cf30
--- /dev/null
+++ b/docs/src/pages/docs/desktop/remote-models/cerebras.mdx
@@ -0,0 +1,134 @@
+---
+title: Cerebras
+description: Learn how to integrate Cerebras with Jan for ultra-fast AI inference.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cerebras,
+ ultra-fast inference,
+ OpenAI compatible,
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Settings, Plus } from 'lucide-react'
+
+# Cerebras
+
+## Overview
+
+Jan supports Cerebras Inference, enabling you to leverage ultra-fast AI inference with speeds up to 3,000 tokens per second. Cerebras offers OpenAI-compatible endpoints, making integration seamless.
+
+## Supported Models
+
+### Production Models
+- **Llama 4 Scout** (`llama-4-scout-17b-16e-instruct`) - 17B parameters, ~2600 tokens/s
+- **Llama 3.1 8B** (`llama3.1-8b`) - 8B parameters, ~2200 tokens/s
+- **Llama 3.3 70B** (`llama-3.3-70b`) - 70B parameters, ~2100 tokens/s, supports tool calling
+- **OpenAI GPT OSS** (`gpt-oss-120b`) - 120B parameters, ~3000 tokens/s, supports tool calling
+- **Qwen 3 32B** (`qwen-3-32b`) - 32B parameters, ~2600 tokens/s
+
+### Preview Models (Evaluation Only)
+- **Qwen 3 235B Instruct** - 235B parameters, ~1400 tokens/s
+- **Qwen 3 235B Thinking** - 235B parameters, ~1700 tokens/s
+- **Qwen 3 Coder 480B** - 480B parameters, ~2000 tokens/s, supports tool calling
+
+
+ Preview models are for evaluation only and may be discontinued with short notice.
+
+ The following models are scheduled for deprecation:
+ - Llama 4 Scout - November 3, 2025
+ - Qwen 3 Coder 480B - November 5, 2025
+ - Qwen 3 235B Thinking - November 14, 2025
+
+
+## How to Integrate Cerebras with Jan
+
+
+
+### Step 1: Configure Cerebras
+
+1. Obtain a Cerebras API key from [Cerebras Cloud](https://cloud.cerebras.ai/api-keys)
+2. In Jan, navigate to **Settings** () → **Providers**
+3. Find **Cerebras** in the provider list and toggle it **On**
+4. Click the settings icon next to Cerebras
+
+### Step 2: Enter API Credentials
+
+1. In the **API Key** field, paste your Cerebras API key
+2. The **Base URL** should be pre-filled with `https://api.cerebras.ai/v1`
+3. Click **Save** or the changes will auto-save
+
+### Step 3: Select Models
+
+1. Click **Fetch Models** to retrieve available Cerebras models
+2. Navigate to **Settings** → **Models** or start a new conversation
+3. Select a Cerebras model from the model dropdown
+4. Start chatting with ultra-fast inference!
+
+
+
+## Features
+
+### Tool Calling
+The following Cerebras models support tool calling (function calling):
+- `gpt-oss-120b`
+- `llama-3.3-70b`
+- `qwen-3-coder-480b`
+
+### Streaming
+All Cerebras models support streaming responses for real-time output.
+
+
+ Streaming is not supported for reasoning models when using JSON mode or tool calling together.
+
+
+### Developer-Level Instructions (gpt-oss-120b)
+The `gpt-oss-120b` model treats system messages as developer-level instructions, giving them stronger influence than standard OpenAI behavior.
+
+## Limitations
+
+The following OpenAI parameters are **not supported** by Cerebras and will return a 400 error:
+- `frequency_penalty`
+- `logit_bias`
+- `presence_penalty`
+- `parallel_tool_calls`
+- `service_tier`
+
+## Troubleshooting
+
+### Authentication Failed
+- Verify your API key is correct
+- Ensure you have credits in your Cerebras account
+- Check that the API key hasn't expired
+
+### Models Not Loading
+- Click the **Fetch Models** button in provider settings
+- Check your internet connection
+- Verify the base URL is `https://api.cerebras.ai/v1`
+
+### Slow Performance
+- Cerebras offers extremely fast inference (~2000-3000 tokens/s)
+- If experiencing slowness, check your network connection
+- Verify you're using production models, not preview models
+
+## Additional Resources
+
+- [Cerebras Inference Documentation](https://inference-docs.cerebras.ai/)
+- [Cerebras API Reference](https://inference-docs.cerebras.ai/api-reference/chat-completions)
+- [Supported Models](https://inference-docs.cerebras.ai/supported-models)
+- [Cerebras Pricing](https://www.cerebras.ai/pricing)
+
+## Support
+
+For Cerebras-specific issues, contact [Cerebras Support](https://www.cerebras.ai/support).
+
+For Jan integration issues, visit [Jan GitHub](https://github.com/janhq/jan/issues).
diff --git a/web-app/public/images/model-provider/cerebras.png b/web-app/public/images/model-provider/cerebras.png
new file mode 100644
index 0000000000..2cba48e532
Binary files /dev/null and b/web-app/public/images/model-provider/cerebras.png differ
diff --git a/web-app/src/consts/providers.ts b/web-app/src/consts/providers.ts
index d3d8068881..e8b72303da 100644
--- a/web-app/src/consts/providers.ts
+++ b/web-app/src/consts/providers.ts
@@ -290,6 +290,97 @@ export const predefinedProviders = [
],
models: [],
},
+ {
+ active: true,
+ api_key: '',
+ base_url: 'https://api.cerebras.ai/v1',
+ explore_models_url: 'https://inference-docs.cerebras.ai/supported-models',
+ provider: 'cerebras',
+ settings: [
+ {
+ key: 'api-key',
+ title: 'API Key',
+ description:
+ "The Cerebras API uses API keys for authentication. Visit your [API Keys](https://cloud.cerebras.ai/api-keys) page to retrieve the API key you'll use in your requests.",
+ controller_type: 'input',
+ controller_props: {
+ placeholder: 'Insert API Key',
+ value: '',
+ type: 'password',
+ input_actions: ['unobscure', 'copy'],
+ },
+ },
+ {
+ key: 'base-url',
+ title: 'Base URL',
+ description:
+ 'The base OpenAI-compatible endpoint to use. See the [Cerebras API documentation](https://inference-docs.cerebras.ai/api-reference/chat-completions) for more information.',
+ controller_type: 'input',
+ controller_props: {
+ placeholder: 'https://api.cerebras.ai/v1',
+ value: 'https://api.cerebras.ai/v1',
+ },
+ },
+ ],
+ models: [
+ {
+ id: 'llama-4-scout-17b-16e-instruct',
+ name: 'Llama 4 Scout (17B params)',
+ version: '1.0',
+ description: 'Fast inference with ~2600 tokens/s. Scheduled for deprecation Nov 3, 2025.',
+ capabilities: ['completion'],
+ },
+ {
+ id: 'llama3.1-8b',
+ name: 'Llama 3.1 8B',
+ version: '1.0',
+ description: 'Compact model with ~2200 tokens/s.',
+ capabilities: ['completion'],
+ },
+ {
+ id: 'llama-3.3-70b',
+ name: 'Llama 3.3 70B',
+ version: '1.0',
+ description: 'Powerful model with ~2100 tokens/s.',
+ capabilities: ['completion', 'tools'],
+ },
+ {
+ id: 'gpt-oss-120b',
+ name: 'OpenAI GPT OSS (120B params)',
+ version: '1.0',
+ description: 'Ultra-fast with ~3000 tokens/s. Supports developer-level system instructions.',
+ capabilities: ['completion', 'tools'],
+ },
+ {
+ id: 'qwen-3-32b',
+ name: 'Qwen 3 32B',
+ version: '1.0',
+ description: 'Fast Qwen model with ~2600 tokens/s.',
+ capabilities: ['completion'],
+ },
+ {
+ id: 'qwen-3-235b-a22b-instruct-2507',
+ name: 'Qwen 3 235B Instruct (Preview)',
+ version: '1.0',
+ description: 'Preview model - evaluation only. ~1400 tokens/s. Deprecates Nov 14, 2025.',
+ capabilities: ['completion'],
+ },
+ {
+ id: 'qwen-3-235b-a22b-thinking-2507',
+ name: 'Qwen 3 235B Thinking (Preview)',
+ version: '1.0',
+ description: 'Preview reasoning model - evaluation only. ~1700 tokens/s. Deprecates Nov 14, 2025.',
+ capabilities: ['completion'],
+ },
+ {
+ id: 'qwen-3-coder-480b',
+ name: 'Qwen 3 Coder 480B (Preview)',
+ version: '1.0',
+ description: 'Preview coding model - evaluation only. ~2000 tokens/s. Deprecates Nov 5, 2025.',
+ capabilities: ['completion', 'tools'],
+ },
+ ],
+ },
{
active: true,
api_key: '',
diff --git a/web-app/src/lib/utils.ts b/web-app/src/lib/utils.ts
index 6a4fc92acf..09c9858a6d 100644
--- a/web-app/src/lib/utils.ts
+++ b/web-app/src/lib/utils.ts
@@ -46,6 +46,8 @@ export function getProviderLogo(provider: string) {
return '/images/model-provider/open-router.svg'
case 'groq':
return '/images/model-provider/groq.svg'
+ case 'cerebras':
+ return '/images/model-provider/cerebras.png'
case 'cohere':
return '/images/model-provider/cohere.svg'
case 'gemini':