Skip to content

Commit 9bc596a

Browse files
authored
fix(google): add usage data (#573)
## Description <!-- What does this PR do? --> Found that gemini/vertexai didn't have usage data being added to their completionchunks. ## PR Type <!-- Delete the types that don't apply --!> 🐛 Bug Fix ## Relevant issues <!-- e.g. "Fixes #123" --> ## Checklist - [x] I have added unit tests that prove my fix/feature works - [x] New and existing tests pass locally - [x] Documentation was updated where necessary - [x] I have read and followed the [contribution guidelines](https://github.com/mozilla-ai/any-llm/blob/main/CONTRIBUTING.md)```
1 parent fefbc19 commit 9bc596a

File tree

2 files changed

+61
-0
lines changed

2 files changed

+61
-0
lines changed

src/any_llm/providers/gemini/utils.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
ChatCompletionChunk,
1010
ChoiceDelta,
1111
ChunkChoice,
12+
CompletionUsage,
1213
CreateEmbeddingResponse,
1314
Embedding,
1415
Reasoning,
@@ -260,12 +261,21 @@ def _create_openai_chunk_from_google_chunk(
260261
finish_reason="stop" if getattr(candidate.finish_reason, "value", None) == "STOP" else None,
261262
)
262263

264+
usage = None
265+
if response.usage_metadata:
266+
usage = CompletionUsage(
267+
prompt_tokens=response.usage_metadata.prompt_token_count or 0,
268+
completion_tokens=response.usage_metadata.candidates_token_count or 0,
269+
total_tokens=response.usage_metadata.total_token_count or 0,
270+
)
271+
263272
return ChatCompletionChunk(
264273
id=f"chatcmpl-{time()}",
265274
choices=[choice],
266275
created=int(time()),
267276
model=str(response.model_version),
268277
object="chat.completion.chunk",
278+
usage=usage,
269279
)
270280

271281

tests/unit/providers/test_gemini_provider.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,3 +396,54 @@ async def test_gemini_with_built_in_tools() -> None:
396396
assert generation_config.tools is not None
397397
assert len(generation_config.tools) == 1
398398
assert generation_config.tools[0] == google_search
399+
400+
401+
@pytest.mark.asyncio
402+
async def test_streaming_completion_includes_usage_data() -> None:
403+
"""Test that streaming chunks include usage metadata when available."""
404+
from any_llm.providers.gemini.utils import _create_openai_chunk_from_google_chunk
405+
406+
mock_response = Mock()
407+
mock_response.candidates = [Mock()]
408+
mock_response.candidates[0].content = Mock()
409+
mock_response.candidates[0].content.parts = [Mock()]
410+
mock_response.candidates[0].content.parts[0].text = "Hello"
411+
mock_response.candidates[0].content.parts[0].thought = None
412+
mock_response.candidates[0].finish_reason = Mock()
413+
mock_response.candidates[0].finish_reason.value = "STOP"
414+
mock_response.model_version = "gemini-2.5-flash"
415+
416+
mock_response.usage_metadata = Mock()
417+
mock_response.usage_metadata.prompt_token_count = 10
418+
mock_response.usage_metadata.candidates_token_count = 5
419+
mock_response.usage_metadata.total_token_count = 15
420+
421+
chunk = _create_openai_chunk_from_google_chunk(mock_response)
422+
423+
assert chunk.usage is not None, "Usage data should be included in streaming chunks"
424+
assert chunk.usage.prompt_tokens == 10
425+
assert chunk.usage.completion_tokens == 5
426+
assert chunk.usage.total_tokens == 15
427+
428+
429+
@pytest.mark.asyncio
430+
async def test_streaming_completion_without_usage_metadata() -> None:
431+
"""Test that streaming chunks handle missing usage metadata gracefully."""
432+
from any_llm.providers.gemini.utils import _create_openai_chunk_from_google_chunk
433+
434+
mock_response = Mock()
435+
mock_response.candidates = [Mock()]
436+
mock_response.candidates[0].content = Mock()
437+
mock_response.candidates[0].content.parts = [Mock()]
438+
mock_response.candidates[0].content.parts[0].text = "Hello"
439+
mock_response.candidates[0].content.parts[0].thought = None
440+
mock_response.candidates[0].finish_reason = Mock()
441+
mock_response.candidates[0].finish_reason.value = None
442+
mock_response.model_version = "gemini-2.5-flash"
443+
444+
mock_response.usage_metadata = None
445+
446+
chunk = _create_openai_chunk_from_google_chunk(mock_response)
447+
448+
assert chunk.usage is None, "Usage should be None when metadata is not available"
449+
assert chunk.choices[0].delta.content == "Hello"

0 commit comments

Comments
 (0)