Skip to content

Commit 5ac21c2

Browse files
ualtinokrekram1-node
authored andcommitted
fix(session): fix token usage double-counting w/ anthropic & bedrock due to AI SDK v6 upgrade (anomalyco#19758)
Co-authored-by: Aiden Cline <63023139+rekram1-node@users.noreply.github.com> Co-authored-by: Aiden Cline <aidenpcline@gmail.com>
1 parent ae171c4 commit 5ac21c2

File tree

2 files changed

+17
-29
lines changed

2 files changed

+17
-29
lines changed

packages/opencode/src/session/index.ts

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ import { ModelID, ProviderID } from "@/provider/schema"
3232
import { Permission } from "@/permission"
3333
import { Global } from "@/global"
3434
import type { LanguageModelV2Usage } from "@ai-sdk/provider"
35-
import { iife } from "@/util/iife"
3635
import { Effect, Layer, Scope, ServiceMap } from "effect"
3736
import { makeRuntime } from "@/effect/run-service"
3837

@@ -265,27 +264,12 @@ export namespace Session {
265264
0) as number,
266265
)
267266

268-
// OpenRouter provides inputTokens as the total count of input tokens (including cached).
269-
// AFAIK other providers (OpenRouter/OpenAI/Gemini etc.) do it the same way e.g. vercel/ai#8794 (comment)
270-
// Anthropic does it differently though - inputTokens doesn't include cached tokens.
271-
// It looks like OpenCode's cost calculation assumes all providers return inputTokens the same way Anthropic does (I'm guessing getUsage logic was originally implemented with anthropic), so it's causing incorrect cost calculation for OpenRouter and others.
272-
const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"])
273-
const adjustedInputTokens = safe(
274-
excludesCachedTokens ? inputTokens : inputTokens - cacheReadInputTokens - cacheWriteInputTokens,
275-
)
267+
// AI SDK v6 normalized inputTokens to include cached tokens across all providers
268+
// (including Anthropic/Bedrock which previously excluded them). Always subtract cache
269+
// tokens to get the non-cached input count for separate cost calculation.
270+
const adjustedInputTokens = safe(inputTokens - cacheReadInputTokens - cacheWriteInputTokens)
276271

277-
const total = iife(() => {
278-
// Anthropic doesn't provide total_tokens, also ai sdk will vastly undercount if we
279-
// don't compute from components
280-
if (
281-
input.model.api.npm === "@ai-sdk/anthropic" ||
282-
input.model.api.npm === "@ai-sdk/amazon-bedrock" ||
283-
input.model.api.npm === "@ai-sdk/google-vertex/anthropic"
284-
) {
285-
return adjustedInputTokens + outputTokens + cacheReadInputTokens + cacheWriteInputTokens
286-
}
287-
return input.usage.totalTokens
288-
})
272+
const total = input.usage.totalTokens
289273

290274
const tokens = {
291275
total,

packages/opencode/test/session/compaction.test.ts

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -964,8 +964,9 @@ describe("session.getUsage", () => {
964964
expect(result.tokens.cache.write).toBe(300)
965965
})
966966

967-
test("does not subtract cached tokens for anthropic provider", () => {
967+
test("subtracts cached tokens for anthropic provider", () => {
968968
const model = createModel({ context: 100_000, output: 32_000 })
969+
// AI SDK v6 normalizes inputTokens to include cached tokens for all providers
969970
const result = Session.getUsage({
970971
model,
971972
usage: {
@@ -979,7 +980,7 @@ describe("session.getUsage", () => {
979980
},
980981
})
981982

982-
expect(result.tokens.input).toBe(1000)
983+
expect(result.tokens.input).toBe(800)
983984
expect(result.tokens.cache.read).toBe(200)
984985
})
985986

@@ -1043,11 +1044,10 @@ describe("session.getUsage", () => {
10431044
"computes total from components for %s models",
10441045
(npm) => {
10451046
const model = createModel({ context: 100_000, output: 32_000, npm })
1047+
// AI SDK v6: inputTokens includes cached tokens for all providers
10461048
const usage = {
10471049
inputTokens: 1000,
10481050
outputTokens: 500,
1049-
// These providers typically report total as input + output only,
1050-
// excluding cache read/write.
10511051
totalTokens: 1500,
10521052
cachedInputTokens: 200,
10531053
}
@@ -1064,10 +1064,12 @@ describe("session.getUsage", () => {
10641064
},
10651065
})
10661066

1067-
expect(result.tokens.input).toBe(1000)
1067+
// inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500
1068+
expect(result.tokens.input).toBe(500)
10681069
expect(result.tokens.cache.read).toBe(200)
10691070
expect(result.tokens.cache.write).toBe(300)
1070-
expect(result.tokens.total).toBe(2000)
1071+
// total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300)
1072+
expect(result.tokens.total).toBe(1500)
10711073
return
10721074
}
10731075

@@ -1081,10 +1083,12 @@ describe("session.getUsage", () => {
10811083
},
10821084
})
10831085

1084-
expect(result.tokens.input).toBe(1000)
1086+
// inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500
1087+
expect(result.tokens.input).toBe(500)
10851088
expect(result.tokens.cache.read).toBe(200)
10861089
expect(result.tokens.cache.write).toBe(300)
1087-
expect(result.tokens.total).toBe(2000)
1090+
// total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300)
1091+
expect(result.tokens.total).toBe(1500)
10881092
},
10891093
)
10901094
})

0 commit comments

Comments
 (0)