Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 5 additions & 21 deletions packages/opencode/src/session/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ import { ModelID, ProviderID } from "@/provider/schema"
import { Permission } from "@/permission"
import { Global } from "@/global"
import type { LanguageModelV2Usage } from "@ai-sdk/provider"
import { iife } from "@/util/iife"
import { Effect, Layer, Scope, ServiceMap } from "effect"
import { makeRuntime } from "@/effect/run-service"

Expand Down Expand Up @@ -265,27 +264,12 @@ export namespace Session {
0) as number,
)

// OpenRouter provides inputTokens as the total count of input tokens (including cached).
// AFAIK other providers (OpenRouter/OpenAI/Gemini etc.) do it the same way e.g. vercel/ai#8794 (comment)
// Anthropic does it differently though - inputTokens doesn't include cached tokens.
// It looks like OpenCode's cost calculation assumes all providers return inputTokens the same way Anthropic does (I'm guessing getUsage logic was originally implemented with anthropic), so it's causing incorrect cost calculation for OpenRouter and others.
const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"])
const adjustedInputTokens = safe(
excludesCachedTokens ? inputTokens : inputTokens - cacheReadInputTokens - cacheWriteInputTokens,
)
// AI SDK v6 normalized inputTokens to include cached tokens across all providers
// (including Anthropic/Bedrock which previously excluded them). Always subtract cache
// tokens to get the non-cached input count for separate cost calculation.
const adjustedInputTokens = safe(inputTokens - cacheReadInputTokens - cacheWriteInputTokens)

const total = iife(() => {
// Anthropic doesn't provide total_tokens, also ai sdk will vastly undercount if we
// don't compute from components
if (
input.model.api.npm === "@ai-sdk/anthropic" ||
input.model.api.npm === "@ai-sdk/amazon-bedrock" ||
input.model.api.npm === "@ai-sdk/google-vertex/anthropic"
) {
return adjustedInputTokens + outputTokens + cacheReadInputTokens + cacheWriteInputTokens
}
return input.usage.totalTokens
})
const total = input.usage.totalTokens

const tokens = {
total,
Expand Down
20 changes: 12 additions & 8 deletions packages/opencode/test/session/compaction.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -964,8 +964,9 @@ describe("session.getUsage", () => {
expect(result.tokens.cache.write).toBe(300)
})

test("does not subtract cached tokens for anthropic provider", () => {
test("subtracts cached tokens for anthropic provider", () => {
const model = createModel({ context: 100_000, output: 32_000 })
// AI SDK v6 normalizes inputTokens to include cached tokens for all providers
const result = Session.getUsage({
model,
usage: {
Expand All @@ -979,7 +980,7 @@ describe("session.getUsage", () => {
},
})

expect(result.tokens.input).toBe(1000)
expect(result.tokens.input).toBe(800)
expect(result.tokens.cache.read).toBe(200)
})

Expand Down Expand Up @@ -1043,11 +1044,10 @@ describe("session.getUsage", () => {
"computes total from components for %s models",
(npm) => {
const model = createModel({ context: 100_000, output: 32_000, npm })
// AI SDK v6: inputTokens includes cached tokens for all providers
const usage = {
inputTokens: 1000,
outputTokens: 500,
// These providers typically report total as input + output only,
// excluding cache read/write.
totalTokens: 1500,
cachedInputTokens: 200,
}
Expand All @@ -1064,10 +1064,12 @@ describe("session.getUsage", () => {
},
})

expect(result.tokens.input).toBe(1000)
// inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500
expect(result.tokens.input).toBe(500)
expect(result.tokens.cache.read).toBe(200)
expect(result.tokens.cache.write).toBe(300)
expect(result.tokens.total).toBe(2000)
// total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300)
expect(result.tokens.total).toBe(1500)
return
}

Expand All @@ -1081,10 +1083,12 @@ describe("session.getUsage", () => {
},
})

expect(result.tokens.input).toBe(1000)
// inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500
expect(result.tokens.input).toBe(500)
expect(result.tokens.cache.read).toBe(200)
expect(result.tokens.cache.write).toBe(300)
expect(result.tokens.total).toBe(2000)
// total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300)
expect(result.tokens.total).toBe(1500)
},
)
})
Loading