Skip to content

Commit d471249

Browse files
committed
feat(provider): add 1M context window support for Anthropic models
Add context-1m-2025-08-07 beta header for Anthropic provider, enabling 1M token context window for Claude Opus 4.6, Sonnet 4.6, Sonnet 4.5, and Sonnet 4.0. Override context limit from 200K to 1M for supported models so compaction triggers at the correct threshold instead of prematurely at 200K. Fix compaction token counting: when model.limit.input is set, only count input tokens (not output/thinking) against the input limit. Output tokens do not consume the input context window.
1 parent 51e6000 commit d471249

File tree

3 files changed

+38
-9
lines changed

3 files changed

+38
-9
lines changed

packages/opencode/src/provider/provider.ts

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ export namespace Provider {
123123
options: {
124124
headers: {
125125
"anthropic-beta":
126-
"claude-code-20250219,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14",
126+
"claude-code-20250219,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14,context-1m-2025-08-07",
127127
},
128128
},
129129
}
@@ -995,6 +995,29 @@ export namespace Provider {
995995

996996
for (const [modelID, model] of Object.entries(provider.models)) {
997997
model.api.id = model.api.id ?? model.id ?? modelID
998+
999+
// Override context window for Anthropic models that support 1M via the
1000+
// context-1m-2025-08-07 beta header. models.dev currently reports 200K;
1001+
// this ensures compaction uses the real limit for API key users.
1002+
// See: https://docs.anthropic.com/en/docs/about-claude/models
1003+
if (
1004+
providerID === "anthropic" &&
1005+
model.limit.context < 1_000_000 &&
1006+
[
1007+
"opus-4-6",
1008+
"opus-4.6",
1009+
"sonnet-4-6",
1010+
"sonnet-4.6",
1011+
"sonnet-4-5",
1012+
"sonnet-4.5",
1013+
"sonnet-4-0",
1014+
"sonnet-4.0",
1015+
"sonnet-4-20250514",
1016+
].some((p) => model.api.id.includes(p))
1017+
) {
1018+
model.limit.context = 1_000_000
1019+
}
1020+
9981021
if (modelID === "gpt-5-chat-latest" || (providerID === "openrouter" && modelID === "openai/gpt-5-chat"))
9991022
delete provider.models[modelID]
10001023
if (model.status === "alpha" && !Flag.OPENCODE_ENABLE_EXPERIMENTAL_MODELS) delete provider.models[modelID]

packages/opencode/src/session/compaction.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,12 @@ export namespace SessionCompaction {
3535
const context = input.model.limit.context
3636
if (context === 0) return false
3737

38-
const count =
39-
input.tokens.total ||
40-
input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
38+
// When limit.input is set, only count input tokens against that limit
39+
// (output/thinking tokens don't consume the input window).
40+
const count = input.model.limit.input
41+
? input.tokens.input + input.tokens.cache.read + input.tokens.cache.write
42+
: input.tokens.total ||
43+
input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
4144

4245
const reserved =
4346
config.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model))

packages/opencode/test/session/compaction.test.ts

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ describe("session.compaction.isOverflow", () => {
171171
})
172172
})
173173

174-
test("BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it", async () => {
174+
test("limit.input counts only input tokens — output tokens do not consume input window", async () => {
175175
await using tmp = await tmpdir()
176176
await Instance.provide({
177177
directory: tmp.path,
@@ -180,15 +180,18 @@ describe("session.compaction.isOverflow", () => {
180180
const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 })
181181
const withoutInputLimit = createModel({ context: 200_000, output: 32_000 })
182182

183-
// 170K total tokens — well above context-output (168K) but below input limit (200K)
183+
// 171K input tokens (166K + 5K cache), 10K output → 181K total
184184
const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } }
185185

186186
const withLimit = await SessionCompaction.isOverflow({ tokens, model: withInputLimit })
187187
const withoutLimit = await SessionCompaction.isOverflow({ tokens, model: withoutInputLimit })
188188

189-
// Both models have identical real capacity — they should agree:
190-
expect(withLimit).toBe(true) // should compact (170K leaves no room for 32K output)
191-
expect(withoutLimit).toBe(true) // correctly compacts (170K > 168K)
189+
// With limit.input: only input tokens (171K) are counted against limit (200K - 20K = 180K)
190+
// 171K < 180K → no compaction needed yet
191+
expect(withLimit).toBe(false)
192+
// Without limit.input: total tokens (181K) are counted against context - output (200K - 32K = 168K)
193+
// 181K > 168K → compaction triggered
194+
expect(withoutLimit).toBe(true)
192195
},
193196
})
194197
})

0 commit comments

Comments
 (0)