fix(session): fix token usage double-counting w/ anthropic & bedrock due to AI SDK v6 upgrade (anomalyco#19758)

ualtinok · rekram1-node · loocor · commit 5ac21c225d06 · 2026-03-30T21:36:04.000+08:00
Co-authored-by: Aiden Cline &lt;63023139+rekram1-node@users.noreply.github.com&gt;
Co-authored-by: Aiden Cline &lt;aidenpcline@gmail.com&gt;
diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts
@@ -32,7 +32,6 @@ import { ModelID, ProviderID } from "@/provider/schema"
 import { Permission } from "@/permission"
 import { Global } from "@/global"
 import type { LanguageModelV2Usage } from "@ai-sdk/provider"
-import { iife } from "@/util/iife"
 import { Effect, Layer, Scope, ServiceMap } from "effect"
 import { makeRuntime } from "@/effect/run-service"
 
@@ -265,27 +264,12 @@ export namespace Session {
         0) as number,
     )
 
-    // OpenRouter provides inputTokens as the total count of input tokens (including cached).
-    // AFAIK other providers (OpenRouter/OpenAI/Gemini etc.) do it the same way e.g. vercel/ai#8794 (comment)
-    // Anthropic does it differently though - inputTokens doesn't include cached tokens.
-    // It looks like OpenCode's cost calculation assumes all providers return inputTokens the same way Anthropic does (I'm guessing getUsage logic was originally implemented with anthropic), so it's causing incorrect cost calculation for OpenRouter and others.
-    const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"])
-    const adjustedInputTokens = safe(
-      excludesCachedTokens ? inputTokens : inputTokens - cacheReadInputTokens - cacheWriteInputTokens,
-    )
+    // AI SDK v6 normalized inputTokens to include cached tokens across all providers
+    // (including Anthropic/Bedrock which previously excluded them). Always subtract cache
+    // tokens to get the non-cached input count for separate cost calculation.
+    const adjustedInputTokens = safe(inputTokens - cacheReadInputTokens - cacheWriteInputTokens)
 
-    const total = iife(() => {
-      // Anthropic doesn't provide total_tokens, also ai sdk will vastly undercount if we
-      // don't compute from components
-      if (
-        input.model.api.npm === "@ai-sdk/anthropic" ||
-        input.model.api.npm === "@ai-sdk/amazon-bedrock" ||
-        input.model.api.npm === "@ai-sdk/google-vertex/anthropic"
-      ) {
-        return adjustedInputTokens + outputTokens + cacheReadInputTokens + cacheWriteInputTokens
-      }
-      return input.usage.totalTokens
-    })
+    const total = input.usage.totalTokens
 
     const tokens = {
       total,
diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts
@@ -964,8 +964,9 @@ describe("session.getUsage", () => {
     expect(result.tokens.cache.write).toBe(300)
   })
 
-  test("does not subtract cached tokens for anthropic provider", () => {
+  test("subtracts cached tokens for anthropic provider", () => {
     const model = createModel({ context: 100_000, output: 32_000 })
+    // AI SDK v6 normalizes inputTokens to include cached tokens for all providers
     const result = Session.getUsage({
       model,
       usage: {
@@ -979,7 +980,7 @@ describe("session.getUsage", () => {
       },
     })
 
-    expect(result.tokens.input).toBe(1000)
+    expect(result.tokens.input).toBe(800)
     expect(result.tokens.cache.read).toBe(200)
   })
 
@@ -1043,11 +1044,10 @@ describe("session.getUsage", () => {
     "computes total from components for %s models",
     (npm) => {
       const model = createModel({ context: 100_000, output: 32_000, npm })
+      // AI SDK v6: inputTokens includes cached tokens for all providers
       const usage = {
         inputTokens: 1000,
         outputTokens: 500,
-        // These providers typically report total as input + output only,
-        // excluding cache read/write.
         totalTokens: 1500,
         cachedInputTokens: 200,
       }
@@ -1064,10 +1064,12 @@ describe("session.getUsage", () => {
           },
         })
 
-        expect(result.tokens.input).toBe(1000)
+        // inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500
+        expect(result.tokens.input).toBe(500)
         expect(result.tokens.cache.read).toBe(200)
         expect(result.tokens.cache.write).toBe(300)
-        expect(result.tokens.total).toBe(2000)
+        // total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300)
+        expect(result.tokens.total).toBe(1500)
         return
       }
 
@@ -1081,10 +1083,12 @@ describe("session.getUsage", () => {
         },
       })
 
-      expect(result.tokens.input).toBe(1000)
+      // inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500
+      expect(result.tokens.input).toBe(500)
       expect(result.tokens.cache.read).toBe(200)
       expect(result.tokens.cache.write).toBe(300)
-      expect(result.tokens.total).toBe(2000)
+      // total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300)
+      expect(result.tokens.total).toBe(1500)
     },
   )
 })