feat(provider): add 1M context window support for Anthropic models

DusKing1 · DusKing1 · commit d471249d85bb · 2026-03-02T23:57:43.000+08:00
Add context-1m-2025-08-07 beta header for Anthropic provider, enabling
1M token context window for Claude Opus 4.6, Sonnet 4.6, Sonnet 4.5,
and Sonnet 4.0.

Override context limit from 200K to 1M for supported models so compaction
triggers at the correct threshold instead of prematurely at 200K.

Fix compaction token counting: when model.limit.input is set, only count
input tokens (not output/thinking) against the input limit. Output tokens
do not consume the input context window.
diff --git a/packages/opencode/src/provider/provider.ts b/packages/opencode/src/provider/provider.ts
@@ -123,7 +123,7 @@ export namespace Provider {
         options: {
           headers: {
             "anthropic-beta":
-              "claude-code-20250219,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14",
+              "claude-code-20250219,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14,context-1m-2025-08-07",
           },
         },
       }
@@ -995,6 +995,29 @@ export namespace Provider {
 
       for (const [modelID, model] of Object.entries(provider.models)) {
         model.api.id = model.api.id ?? model.id ?? modelID
+
+        // Override context window for Anthropic models that support 1M via the
+        // context-1m-2025-08-07 beta header. models.dev currently reports 200K;
+        // this ensures compaction uses the real limit for API key users.
+        // See: https://docs.anthropic.com/en/docs/about-claude/models
+        if (
+          providerID === "anthropic" &&
+          model.limit.context < 1_000_000 &&
+          [
+            "opus-4-6",
+            "opus-4.6",
+            "sonnet-4-6",
+            "sonnet-4.6",
+            "sonnet-4-5",
+            "sonnet-4.5",
+            "sonnet-4-0",
+            "sonnet-4.0",
+            "sonnet-4-20250514",
+          ].some((p) => model.api.id.includes(p))
+        ) {
+          model.limit.context = 1_000_000
+        }
+
         if (modelID === "gpt-5-chat-latest" || (providerID === "openrouter" && modelID === "openai/gpt-5-chat"))
           delete provider.models[modelID]
         if (model.status === "alpha" && !Flag.OPENCODE_ENABLE_EXPERIMENTAL_MODELS) delete provider.models[modelID]
diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
@@ -35,9 +35,12 @@ export namespace SessionCompaction {
     const context = input.model.limit.context
     if (context === 0) return false
 
-    const count =
-      input.tokens.total ||
-      input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
+    // When limit.input is set, only count input tokens against that limit
+    // (output/thinking tokens don't consume the input window).
+    const count = input.model.limit.input
+      ? input.tokens.input + input.tokens.cache.read + input.tokens.cache.write
+      : input.tokens.total ||
+        input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
 
     const reserved =
       config.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model))
diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts
@@ -171,7 +171,7 @@ describe("session.compaction.isOverflow", () => {
     })
   })
 
-  test("BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it", async () => {
+  test("limit.input counts only input tokens — output tokens do not consume input window", async () => {
     await using tmp = await tmpdir()
     await Instance.provide({
       directory: tmp.path,
@@ -180,15 +180,18 @@ describe("session.compaction.isOverflow", () => {
         const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 })
         const withoutInputLimit = createModel({ context: 200_000, output: 32_000 })
 
-        // 170K total tokens — well above context-output (168K) but below input limit (200K)
+        // 171K input tokens (166K + 5K cache), 10K output → 181K total
         const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } }
 
         const withLimit = await SessionCompaction.isOverflow({ tokens, model: withInputLimit })
         const withoutLimit = await SessionCompaction.isOverflow({ tokens, model: withoutInputLimit })
 
-        // Both models have identical real capacity — they should agree:
-        expect(withLimit).toBe(true) // should compact (170K leaves no room for 32K output)
-        expect(withoutLimit).toBe(true) // correctly compacts (170K > 168K)
+        // With limit.input: only input tokens (171K) are counted against limit (200K - 20K = 180K)
+        // 171K < 180K → no compaction needed yet
+        expect(withLimit).toBe(false)
+        // Without limit.input: total tokens (181K) are counted against context - output (200K - 32K = 168K)
+        // 181K > 168K → compaction triggered
+        expect(withoutLimit).toBe(true)
       },
     })
   })