diff --git a/docs/src/pages/docs/desktop/remote-models/_meta.json b/docs/src/pages/docs/desktop/remote-models/_meta.json index 60268b73c9..ce7a19e0c8 100644 --- a/docs/src/pages/docs/desktop/remote-models/_meta.json +++ b/docs/src/pages/docs/desktop/remote-models/_meta.json @@ -2,6 +2,9 @@ "anthropic": { "title": "Anthropic" }, + "cerebras": { + "title": "Cerebras" + }, "cohere": { "title": "Cohere" }, diff --git a/docs/src/pages/docs/desktop/remote-models/cerebras.mdx b/docs/src/pages/docs/desktop/remote-models/cerebras.mdx new file mode 100644 index 0000000000..ca43e6cf30 --- /dev/null +++ b/docs/src/pages/docs/desktop/remote-models/cerebras.mdx @@ -0,0 +1,134 @@ +--- +title: Cerebras +description: Learn how to integrate Cerebras with Jan for ultra-fast AI inference. +keywords: + [ + Jan, + Customizable Intelligence, LLM, + local AI, + privacy focus, + free and open source, + private and offline, + conversational AI, + no-subscription fee, + large language models, + Cerebras, + ultra-fast inference, + OpenAI compatible, + ] +--- + +import { Callout, Steps } from 'nextra/components' +import { Settings, Plus } from 'lucide-react' + +# Cerebras + +## Overview + +Jan supports Cerebras Inference, enabling you to leverage ultra-fast AI inference with speeds up to 3,000 tokens per second. Cerebras offers OpenAI-compatible endpoints, making integration seamless. + +## Supported Models + +### Production Models +- **Llama 4 Scout** (`llama-4-scout-17b-16e-instruct`) - 17B parameters, ~2600 tokens/s +- **Llama 3.1 8B** (`llama3.1-8b`) - 8B parameters, ~2200 tokens/s +- **Llama 3.3 70B** (`llama-3.3-70b`) - 70B parameters, ~2100 tokens/s, supports tool calling +- **OpenAI GPT OSS** (`gpt-oss-120b`) - 120B parameters, ~3000 tokens/s, supports tool calling +- **Qwen 3 32B** (`qwen-3-32b`) - 32B parameters, ~2600 tokens/s + +### Preview Models (Evaluation Only) +- **Qwen 3 235B Instruct** - 235B parameters, ~1400 tokens/s +- **Qwen 3 235B Thinking** - 235B parameters, ~1700 tokens/s +- **Qwen 3 Coder 480B** - 480B parameters, ~2000 tokens/s, supports tool calling + + + Preview models are for evaluation only and may be discontinued with short notice. + + The following models are scheduled for deprecation: + - Llama 4 Scout - November 3, 2025 + - Qwen 3 Coder 480B - November 5, 2025 + - Qwen 3 235B Thinking - November 14, 2025 + + +## How to Integrate Cerebras with Jan + + + +### Step 1: Configure Cerebras + +1. Obtain a Cerebras API key from [Cerebras Cloud](https://cloud.cerebras.ai/api-keys) +2. In Jan, navigate to **Settings** () → **Providers** +3. Find **Cerebras** in the provider list and toggle it **On** +4. Click the settings icon next to Cerebras + +### Step 2: Enter API Credentials + +1. In the **API Key** field, paste your Cerebras API key +2. The **Base URL** should be pre-filled with `https://api.cerebras.ai/v1` +3. Click **Save** or the changes will auto-save + +### Step 3: Select Models + +1. Click **Fetch Models** to retrieve available Cerebras models +2. Navigate to **Settings** → **Models** or start a new conversation +3. Select a Cerebras model from the model dropdown +4. Start chatting with ultra-fast inference! + + + +## Features + +### Tool Calling +The following Cerebras models support tool calling (function calling): +- `gpt-oss-120b` +- `llama-3.3-70b` +- `qwen-3-coder-480b` + +### Streaming +All Cerebras models support streaming responses for real-time output. + + + Streaming is not supported for reasoning models when using JSON mode or tool calling together. + + +### Developer-Level Instructions (gpt-oss-120b) +The `gpt-oss-120b` model treats system messages as developer-level instructions, giving them stronger influence than standard OpenAI behavior. + +## Limitations + +The following OpenAI parameters are **not supported** by Cerebras and will return a 400 error: +- `frequency_penalty` +- `logit_bias` +- `presence_penalty` +- `parallel_tool_calls` +- `service_tier` + +## Troubleshooting + +### Authentication Failed +- Verify your API key is correct +- Ensure you have credits in your Cerebras account +- Check that the API key hasn't expired + +### Models Not Loading +- Click the **Fetch Models** button in provider settings +- Check your internet connection +- Verify the base URL is `https://api.cerebras.ai/v1` + +### Slow Performance +- Cerebras offers extremely fast inference (~2000-3000 tokens/s) +- If experiencing slowness, check your network connection +- Verify you're using production models, not preview models + +## Additional Resources + +- [Cerebras Inference Documentation](https://inference-docs.cerebras.ai/) +- [Cerebras API Reference](https://inference-docs.cerebras.ai/api-reference/chat-completions) +- [Supported Models](https://inference-docs.cerebras.ai/supported-models) +- [Cerebras Pricing](https://www.cerebras.ai/pricing) + +## Support + +For Cerebras-specific issues, contact [Cerebras Support](https://www.cerebras.ai/support). + +For Jan integration issues, visit [Jan GitHub](https://github.com/janhq/jan/issues). diff --git a/web-app/public/images/model-provider/cerebras.png b/web-app/public/images/model-provider/cerebras.png new file mode 100644 index 0000000000..2cba48e532 Binary files /dev/null and b/web-app/public/images/model-provider/cerebras.png differ diff --git a/web-app/src/consts/providers.ts b/web-app/src/consts/providers.ts index d3d8068881..e8b72303da 100644 --- a/web-app/src/consts/providers.ts +++ b/web-app/src/consts/providers.ts @@ -290,6 +290,97 @@ export const predefinedProviders = [ ], models: [], }, + { + active: true, + api_key: '', + base_url: 'https://api.cerebras.ai/v1', + explore_models_url: 'https://inference-docs.cerebras.ai/supported-models', + provider: 'cerebras', + settings: [ + { + key: 'api-key', + title: 'API Key', + description: + "The Cerebras API uses API keys for authentication. Visit your [API Keys](https://cloud.cerebras.ai/api-keys) page to retrieve the API key you'll use in your requests.", + controller_type: 'input', + controller_props: { + placeholder: 'Insert API Key', + value: '', + type: 'password', + input_actions: ['unobscure', 'copy'], + }, + }, + { + key: 'base-url', + title: 'Base URL', + description: + 'The base OpenAI-compatible endpoint to use. See the [Cerebras API documentation](https://inference-docs.cerebras.ai/api-reference/chat-completions) for more information.', + controller_type: 'input', + controller_props: { + placeholder: 'https://api.cerebras.ai/v1', + value: 'https://api.cerebras.ai/v1', + }, + }, + ], + models: [ + { + id: 'llama-4-scout-17b-16e-instruct', + name: 'Llama 4 Scout (17B params)', + version: '1.0', + description: 'Fast inference with ~2600 tokens/s. Scheduled for deprecation Nov 3, 2025.', + capabilities: ['completion'], + }, + { + id: 'llama3.1-8b', + name: 'Llama 3.1 8B', + version: '1.0', + description: 'Compact model with ~2200 tokens/s.', + capabilities: ['completion'], + }, + { + id: 'llama-3.3-70b', + name: 'Llama 3.3 70B', + version: '1.0', + description: 'Powerful model with ~2100 tokens/s.', + capabilities: ['completion', 'tools'], + }, + { + id: 'gpt-oss-120b', + name: 'OpenAI GPT OSS (120B params)', + version: '1.0', + description: 'Ultra-fast with ~3000 tokens/s. Supports developer-level system instructions.', + capabilities: ['completion', 'tools'], + }, + { + id: 'qwen-3-32b', + name: 'Qwen 3 32B', + version: '1.0', + description: 'Fast Qwen model with ~2600 tokens/s.', + capabilities: ['completion'], + }, + { + id: 'qwen-3-235b-a22b-instruct-2507', + name: 'Qwen 3 235B Instruct (Preview)', + version: '1.0', + description: 'Preview model - evaluation only. ~1400 tokens/s. Deprecates Nov 14, 2025.', + capabilities: ['completion'], + }, + { + id: 'qwen-3-235b-a22b-thinking-2507', + name: 'Qwen 3 235B Thinking (Preview)', + version: '1.0', + description: 'Preview reasoning model - evaluation only. ~1700 tokens/s. Deprecates Nov 14, 2025.', + capabilities: ['completion'], + }, + { + id: 'qwen-3-coder-480b', + name: 'Qwen 3 Coder 480B (Preview)', + version: '1.0', + description: 'Preview coding model - evaluation only. ~2000 tokens/s. Deprecates Nov 5, 2025.', + capabilities: ['completion', 'tools'], + }, + ], + }, { active: true, api_key: '', diff --git a/web-app/src/lib/utils.ts b/web-app/src/lib/utils.ts index 6a4fc92acf..09c9858a6d 100644 --- a/web-app/src/lib/utils.ts +++ b/web-app/src/lib/utils.ts @@ -46,6 +46,8 @@ export function getProviderLogo(provider: string) { return '/images/model-provider/open-router.svg' case 'groq': return '/images/model-provider/groq.svg' + case 'cerebras': + return '/images/model-provider/cerebras.png' case 'cohere': return '/images/model-provider/cohere.svg' case 'gemini':