Michelle/api ref correction (#505)

MAsuamah · web-flow · commit b1cfe23357f1 · 2025-12-03T10:54:26.000-08:00
* corrected llm gateway usage response

* corrected llm gateway usage response in API ref

* fixed cerebras param
diff --git a/fern/pages/07-llm-gateway/agentic-workflows.mdx b/fern/pages/07-llm-gateway/agentic-workflows.mdx
@@ -189,8 +189,8 @@ The API returns a JSON response. In agentic workflows, the model may make multip
     "max_tokens": 1000
   },
   "usage": {
-    "prompt_tokens": 150,
-    "completion_tokens": 30,
+    "input_tokens": 150,
+    "output_tokens": 30,
     "total_tokens": 180
   }
 }
@@ -227,8 +227,8 @@ After adding the function result to conversation history:
     "max_tokens": 1000
   },
   "usage": {
-    "prompt_tokens": 220,
-    "completion_tokens": 35,
+    "input_tokens": 220,
+    "output_tokens": 35,
     "total_tokens": 255
   }
 }
@@ -255,8 +255,8 @@ After all tool calls are complete:
     "max_tokens": 1000
   },
   "usage": {
-    "prompt_tokens": 280,
-    "completion_tokens": 20,
+    "input_tokens": 280,
+    "output_tokens": 20,
     "total_tokens": 300
   }
 }
@@ -275,8 +275,8 @@ After all tool calls are complete:
 | `choices[i].tool_calls` | array | Present when the model wants to call tools. Contains function call objects. |
 | `request` | object | Echo of the request parameters (excluding `messages`). |
 | `usage` | object | Token usage statistics for the request. |
-| `usage.prompt_tokens` | number | Number of tokens in the prompt. |
-| `usage.completion_tokens` | number | Number of tokens in the completion. |
+| `usage.input_tokens` | number | Number of tokens in the prompt. |
+| `usage.output_tokens` | number | Number of tokens in the completion. |
 | `usage.total_tokens` | number | Total tokens used (prompt + completion). |
 
 #### Tool call object
diff --git a/fern/pages/07-llm-gateway/chat-completions.mdx b/fern/pages/07-llm-gateway/chat-completions.mdx
@@ -136,8 +136,8 @@ The API returns a JSON response with the model's completion:
     "max_tokens": 1000
   },
   "usage": {
-    "prompt_tokens": 15,
-    "completion_tokens": 8,
+    "input_tokens": 15,
+    "output_tokens": 8,
     "total_tokens": 23
   }
 }
@@ -155,8 +155,8 @@ The API returns a JSON response with the model's completion:
 | `choices[i].finish_reason` | string | The reason the model stopped generating. Common values: `"stop"`, `"length"`. |
 | `request` | object | Echo of the request parameters (excluding `prompt` and `messages`). |
 | `usage` | object | Token usage statistics for the request. |
-| `usage.prompt_tokens` | number | Number of tokens in the prompt. |
-| `usage.completion_tokens` | number | Number of tokens in the completion. |
+| `usage.input_tokens` | number | Number of tokens in the prompt. |
+| `usage.output_tokens` | number | Number of tokens in the completion. |
 | `usage.total_tokens` | number | Total tokens used (prompt + completion). |
 
 ### Error response
diff --git a/fern/pages/07-llm-gateway/conversations.mdx b/fern/pages/07-llm-gateway/conversations.mdx
@@ -201,8 +201,8 @@ The API returns a JSON response with the model's completion:
     "max_tokens": 1000
   },
   "usage": {
-    "prompt_tokens": 45,
-    "completion_tokens": 35,
+    "input_tokens": 45,
+    "output_tokens": 35,
     "total_tokens": 80
   }
 }
@@ -220,8 +220,8 @@ The API returns a JSON response with the model's completion:
 | `choices[i].finish_reason` | string | The reason the model stopped generating. Common values: `"stop"`, `"length"`. |
 | `request` | object | Echo of the request parameters (excluding `messages`). |
 | `usage` | object | Token usage statistics for the request. |
-| `usage.prompt_tokens` | number | Number of tokens in the prompt. |
-| `usage.completion_tokens` | number | Number of tokens in the completion. |
+| `usage.input_tokens` | number | Number of tokens in the prompt. |
+| `usage.output_tokens` | number | Number of tokens in the completion. |
 | `usage.total_tokens` | number | Total tokens used (prompt + completion). |
 
 ### Error response
diff --git a/fern/pages/07-llm-gateway/tool-calling.mdx b/fern/pages/07-llm-gateway/tool-calling.mdx
@@ -241,8 +241,8 @@ The API returns a JSON response. When the model wants to call a tool:
     "max_tokens": 1000
   },
   "usage": {
-    "prompt_tokens": 120,
-    "completion_tokens": 25,
+    "input_tokens": 120,
+    "output_tokens": 25,
     "total_tokens": 145
   }
 }
@@ -261,8 +261,8 @@ The API returns a JSON response. When the model wants to call a tool:
 | `choices[i].tool_calls` | array | Present when the model wants to call tools. Contains function call objects. |
 | `request` | object | Echo of the request parameters (excluding `messages`). |
 | `usage` | object | Token usage statistics for the request. |
-| `usage.prompt_tokens` | number | Number of tokens in the prompt. |
-| `usage.completion_tokens` | number | Number of tokens in the completion. |
+| `usage.input_tokens` | number | Number of tokens in the prompt. |
+| `usage.output_tokens` | number | Number of tokens in the completion. |
 | `usage.total_tokens` | number | Total tokens used (prompt + completion). |
 
 #### Tool call object
diff --git a/llm-gateway.yml b/llm-gateway.yml
@@ -367,15 +367,15 @@ components:
     Usage:
       type: object
       properties:
-        prompt_tokens:
+        input_tokens:
           type: integer
-        completion_tokens:
+        output_tokens:
           type: integer
         total_tokens:
           type: integer
       required:
-        - prompt_tokens
-        - completion_tokens
+        - input_tokens
+        - output_tokens
         - total_tokens
 
     # Understanding Request/Response Schemas