feat(api): Add top-level cache control (automatic caching)

stainless-app[bot] · stainless-app[bot] · commit a940123da34a · 2026-02-19T19:15:48.000Z
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 34
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/anthropic%2Fanthropic-51af8e63ed8396461bff3a89a65124496f905d2f8ac211f0b4c9a6588f6cf20f.yml
-openapi_spec_hash: 13d3d0a8e62a955b8b4df99c18d387d0
-config_hash: 5662eb02a2b78e86e8254f0934d1a870
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/anthropic%2Fanthropic-29a6b7ba51942cd606e5bf4b533e5aac1bef42f6d4b1f7f45f756304cf676782.yml
+openapi_spec_hash: 58021ab18daccd5c45a930ffd7d6ab4d
+config_hash: 4e204fead5f0af80eb9effa1d1e34dca
diff --git a/README.md b/README.md
@@ -482,9 +482,9 @@ message = client.messages.create(
         }
     ],
     model="claude-opus-4-6",
-    metadata={},
+    cache_control={"type": "ephemeral"},
 )
-print(message.metadata)
+print(message.cache_control)
 ```
 
 ## File uploads
diff --git a/src/anthropic/resources/beta/messages/messages.py b/src/anthropic/resources/beta/messages/messages.py
diff --git a/src/anthropic/resources/messages/messages.py b/src/anthropic/resources/messages/messages.py
@@ -50,6 +50,7 @@
 from ...types.thinking_config_param import ThinkingConfigParam
 from ...types.json_output_format_param import JSONOutputFormatParam
 from ...types.raw_message_stream_event import RawMessageStreamEvent
+from ...types.cache_control_ephemeral_param import CacheControlEphemeralParam
 from ...types.message_count_tokens_tool_param import MessageCountTokensToolParam
 
 __all__ = ["Messages", "AsyncMessages"]
@@ -105,6 +106,7 @@ def create(
         max_tokens: int,
         messages: Iterable[MessageParam],
         model: ModelParam,
+        cache_control: Optional[CacheControlEphemeralParam] | Omit = omit,
         container: Optional[str] | Omit = omit,
         inference_geo: Optional[str] | Omit = omit,
         metadata: MetadataParam | Omit = omit,
@@ -215,6 +217,9 @@ def create(
               [models](https://docs.anthropic.com/en/docs/models-overview) for additional
               details and options.
 
+          cache_control: Top-level cache control automatically applies a cache_control marker to the last
+              cacheable block in the request.
+
           container: Container identifier for reuse across requests.
 
           inference_geo: Specifies the geographic region for inference processing. If not specified, the
@@ -384,6 +389,7 @@ def create(
         messages: Iterable[MessageParam],
         model: ModelParam,
         stream: Literal[True],
+        cache_control: Optional[CacheControlEphemeralParam] | Omit = omit,
         container: Optional[str] | Omit = omit,
         inference_geo: Optional[str] | Omit = omit,
         metadata: MetadataParam | Omit = omit,
@@ -497,6 +503,9 @@ def create(
 
               See [streaming](https://docs.claude.com/en/api/messages-streaming) for details.
 
+          cache_control: Top-level cache control automatically applies a cache_control marker to the last
+              cacheable block in the request.
+
           container: Container identifier for reuse across requests.
 
           inference_geo: Specifies the geographic region for inference processing. If not specified, the
@@ -662,6 +671,7 @@ def create(
         messages: Iterable[MessageParam],
         model: ModelParam,
         stream: bool,
+        cache_control: Optional[CacheControlEphemeralParam] | Omit = omit,
         container: Optional[str] | Omit = omit,
         inference_geo: Optional[str] | Omit = omit,
         metadata: MetadataParam | Omit = omit,
@@ -775,6 +785,9 @@ def create(
 
               See [streaming](https://docs.claude.com/en/api/messages-streaming) for details.
 
+          cache_control: Top-level cache control automatically applies a cache_control marker to the last
+              cacheable block in the request.
+
           container: Container identifier for reuse across requests.
 
           inference_geo: Specifies the geographic region for inference processing. If not specified, the
@@ -939,6 +952,7 @@ def create(
         max_tokens: int,
         messages: Iterable[MessageParam],
         model: ModelParam,
+        cache_control: Optional[CacheControlEphemeralParam] | Omit = omit,
         container: Optional[str] | Omit = omit,
         inference_geo: Optional[str] | Omit = omit,
         metadata: MetadataParam | Omit = omit,
@@ -986,6 +1000,7 @@ def create(
                     "max_tokens": max_tokens,
                     "messages": messages,
                     "model": model,
+                    "cache_control": cache_control,
                     "container": container,
                     "inference_geo": inference_geo,
                     "metadata": metadata,
@@ -1019,6 +1034,7 @@ def stream(
         max_tokens: int,
         messages: Iterable[MessageParam],
         model: ModelParam,
+        cache_control: Optional[CacheControlEphemeralParam] | Omit = omit,
         inference_geo: Optional[str] | Omit = omit,
         metadata: MetadataParam | Omit = omit,
         output_config: OutputConfigParam | Omit = omit,
@@ -1097,6 +1113,7 @@ def stream(
                     "max_tokens": max_tokens,
                     "messages": messages,
                     "model": model,
+                    "cache_control": cache_control,
                     "inference_geo": inference_geo,
                     "metadata": metadata,
                     "output_config": merged_output_config,
@@ -1249,6 +1266,7 @@ def count_tokens(
         *,
         messages: Iterable[MessageParam],
         model: ModelParam,
+        cache_control: Optional[CacheControlEphemeralParam] | Omit = omit,
         output_config: OutputConfigParam | Omit = omit,
         output_format: None | JSONOutputFormatParam | type | Omit = omit,
         system: Union[str, Iterable[TextBlockParam]] | Omit = omit,
@@ -1342,6 +1360,9 @@ def count_tokens(
               [models](https://docs.anthropic.com/en/docs/models-overview) for additional
               details and options.
 
+          cache_control: Top-level cache control automatically applies a cache_control marker to the last
+              cacheable block in the request.
+
           output_config: Configuration options for the model's output, such as the output format.
 
 
@@ -1485,6 +1506,7 @@ def count_tokens(
                     "model": model,
                     "messages": messages,
                     "model": model,
+                    "cache_control": cache_control,
                     "output_config": merged_output_config,
                     "system": system,
                     "thinking": thinking,
@@ -1531,6 +1553,7 @@ async def create(
         max_tokens: int,
         messages: Iterable[MessageParam],
         model: ModelParam,
+        cache_control: Optional[CacheControlEphemeralParam] | Omit = omit,
         container: Optional[str] | Omit = omit,
         inference_geo: Optional[str] | Omit = omit,
         metadata: MetadataParam | Omit = omit,
@@ -1641,6 +1664,9 @@ async def create(
               [models](https://docs.anthropic.com/en/docs/models-overview) for additional
               details and options.
 
+          cache_control: Top-level cache control automatically applies a cache_control marker to the last
+              cacheable block in the request.
+
           container: Container identifier for reuse across requests.
 
           inference_geo: Specifies the geographic region for inference processing. If not specified, the
@@ -1810,6 +1836,7 @@ async def create(
         messages: Iterable[MessageParam],
         model: ModelParam,
         stream: Literal[True],
+        cache_control: Optional[CacheControlEphemeralParam] | Omit = omit,
         container: Optional[str] | Omit = omit,
         inference_geo: Optional[str] | Omit = omit,
         metadata: MetadataParam | Omit = omit,
@@ -1923,6 +1950,9 @@ async def create(
 
               See [streaming](https://docs.claude.com/en/api/messages-streaming) for details.
 
+          cache_control: Top-level cache control automatically applies a cache_control marker to the last
+              cacheable block in the request.
+
           container: Container identifier for reuse across requests.
 
           inference_geo: Specifies the geographic region for inference processing. If not specified, the
@@ -2088,6 +2118,7 @@ async def create(
         messages: Iterable[MessageParam],
         model: ModelParam,
         stream: bool,
+        cache_control: Optional[CacheControlEphemeralParam] | Omit = omit,
         container: Optional[str] | Omit = omit,
         inference_geo: Optional[str] | Omit = omit,
         metadata: MetadataParam | Omit = omit,
@@ -2201,6 +2232,9 @@ async def create(
 
               See [streaming](https://docs.claude.com/en/api/messages-streaming) for details.
 
+          cache_control: Top-level cache control automatically applies a cache_control marker to the last
+              cacheable block in the request.
+
           container: Container identifier for reuse across requests.
 
           inference_geo: Specifies the geographic region for inference processing. If not specified, the
@@ -2365,6 +2399,7 @@ async def create(
         max_tokens: int,
         messages: Iterable[MessageParam],
         model: ModelParam,
+        cache_control: Optional[CacheControlEphemeralParam] | Omit = omit,
         container: Optional[str] | Omit = omit,
         inference_geo: Optional[str] | Omit = omit,
         metadata: MetadataParam | Omit = omit,
@@ -2412,6 +2447,7 @@ async def create(
                     "max_tokens": max_tokens,
                     "messages": messages,
                     "model": model,
+                    "cache_control": cache_control,
                     "container": container,
                     "inference_geo": inference_geo,
                     "metadata": metadata,
@@ -2445,6 +2481,7 @@ def stream(
         max_tokens: int,
         messages: Iterable[MessageParam],
         model: ModelParam,
+        cache_control: Optional[CacheControlEphemeralParam] | Omit = omit,
         inference_geo: Optional[str] | Omit = omit,
         metadata: MetadataParam | Omit = omit,
         output_config: OutputConfigParam | Omit = omit,
@@ -2522,6 +2559,7 @@ def stream(
                     "max_tokens": max_tokens,
                     "messages": messages,
                     "model": model,
+                    "cache_control": cache_control,
                     "inference_geo": inference_geo,
                     "metadata": metadata,
                     "output_config": merged_output_config,
@@ -2674,6 +2712,7 @@ async def count_tokens(
         *,
         messages: Iterable[MessageParam],
         model: ModelParam,
+        cache_control: Optional[CacheControlEphemeralParam] | Omit = omit,
         output_config: OutputConfigParam | Omit = omit,
         output_format: None | JSONOutputFormatParam | type | Omit = omit,
         system: Union[str, Iterable[TextBlockParam]] | Omit = omit,
@@ -2767,6 +2806,9 @@ async def count_tokens(
               [models](https://docs.anthropic.com/en/docs/models-overview) for additional
               details and options.
 
+          cache_control: Top-level cache control automatically applies a cache_control marker to the last
+              cacheable block in the request.
+
           output_config: Configuration options for the model's output, such as the output format.
 
 
@@ -2910,6 +2952,7 @@ async def count_tokens(
                     "model": model,
                     "messages": messages,
                     "model": model,
+                    "cache_control": cache_control,
                     "output_config": merged_output_config,
                     "system": system,
                     "thinking": thinking,
diff --git a/src/anthropic/types/beta/message_count_tokens_params.py b/src/anthropic/types/beta/message_count_tokens_params.py
@@ -19,6 +19,7 @@
 from .beta_tool_bash_20241022_param import BetaToolBash20241022Param
 from .beta_tool_bash_20250124_param import BetaToolBash20250124Param
 from .beta_memory_tool_20250818_param import BetaMemoryTool20250818Param
+from .beta_cache_control_ephemeral_param import BetaCacheControlEphemeralParam
 from .beta_web_fetch_tool_20250910_param import BetaWebFetchTool20250910Param
 from .beta_web_fetch_tool_20260209_param import BetaWebFetchTool20260209Param
 from .beta_web_search_tool_20250305_param import BetaWebSearchTool20250305Param
@@ -117,6 +118,12 @@ class MessageCountTokensParams(TypedDict, total=False):
     details and options.
     """
 
+    cache_control: Optional[BetaCacheControlEphemeralParam]
+    """
+    Top-level cache control automatically applies a cache_control marker to the last
+    cacheable block in the request.
+    """
+
     context_management: Optional[BetaContextManagementConfigParam]
     """Context management configuration.
 
diff --git a/src/anthropic/types/beta/message_create_params.py b/src/anthropic/types/beta/message_create_params.py
@@ -19,6 +19,7 @@
 from .beta_output_config_param import BetaOutputConfigParam
 from .beta_thinking_config_param import BetaThinkingConfigParam
 from .beta_json_output_format_param import BetaJSONOutputFormatParam
+from .beta_cache_control_ephemeral_param import BetaCacheControlEphemeralParam
 from .beta_context_management_config_param import BetaContextManagementConfigParam
 from .beta_request_mcp_server_url_definition_param import BetaRequestMCPServerURLDefinitionParam
 
@@ -117,6 +118,12 @@ class MessageCreateParamsBase(TypedDict, total=False):
     details and options.
     """
 
+    cache_control: Optional[BetaCacheControlEphemeralParam]
+    """
+    Top-level cache control automatically applies a cache_control marker to the last
+    cacheable block in the request.
+    """
+
     container: Optional[Container]
     """Container identifier for reuse across requests."""
 
diff --git a/src/anthropic/types/beta/messages/batch_create_params.py b/src/anthropic/types/beta/messages/batch_create_params.py
@@ -23,6 +23,7 @@ class BatchCreateParams(TypedDict, total=False):
     """Optional header to specify the beta version(s) you want to use."""
 
 
+
 class Request(TypedDict, total=False):
     custom_id: Required[str]
     """Developer-provided ID created for each request in a Message Batch.
diff --git a/src/anthropic/types/message_count_tokens_params.py b/src/anthropic/types/message_count_tokens_params.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Union, Iterable
+from typing import Union, Iterable, Optional
 from typing_extensions import Required, TypedDict
 
 from .model_param import ModelParam
@@ -11,6 +11,7 @@
 from .tool_choice_param import ToolChoiceParam
 from .output_config_param import OutputConfigParam
 from .thinking_config_param import ThinkingConfigParam
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
 from .message_count_tokens_tool_param import MessageCountTokensToolParam
 
 __all__ = ["MessageCountTokensParams"]
@@ -92,6 +93,12 @@ class MessageCountTokensParams(TypedDict, total=False):
     details and options.
     """
 
+    cache_control: Optional[CacheControlEphemeralParam]
+    """
+    Top-level cache control automatically applies a cache_control marker to the last
+    cacheable block in the request.
+    """
+
     output_config: OutputConfigParam
     """Configuration options for the model's output, such as the output format."""
 
diff --git a/src/anthropic/types/message_create_params.py b/src/anthropic/types/message_create_params.py
@@ -17,6 +17,7 @@
 from .tool_choice_any_param import ToolChoiceAnyParam
 from .tool_choice_auto_param import ToolChoiceAutoParam
 from .tool_choice_tool_param import ToolChoiceToolParam
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
 
 __all__ = [
     "MessageCreateParamsBase",
@@ -116,6 +117,12 @@ class MessageCreateParamsBase(TypedDict, total=False):
     details and options.
     """
 
+    cache_control: Optional[CacheControlEphemeralParam]
+    """
+    Top-level cache control automatically applies a cache_control marker to the last
+    cacheable block in the request.
+    """
+
     container: Optional[str]
     """Container identifier for reuse across requests."""
 
diff --git a/src/anthropic/types/messages/batch_create_params.py b/src/anthropic/types/messages/batch_create_params.py
@@ -18,6 +18,7 @@ class BatchCreateParams(TypedDict, total=False):
     """
 
 
+
 class Request(TypedDict, total=False):
     custom_id: Required[str]
     """Developer-provided ID created for each request in a Message Batch.
diff --git a/tests/api_resources/beta/messages/test_batches.py b/tests/api_resources/beta/messages/test_batches.py
@@ -62,6 +62,10 @@ def test_method_create_with_all_params(self, client: Anthropic) -> None:
                             }
                         ],
                         "model": "claude-opus-4-6",
+                        "cache_control": {
+                            "type": "ephemeral",
+                            "ttl": "5m",
+                        },
                         "container": {
                             "id": "id",
                             "skills": [
@@ -494,6 +498,10 @@ async def test_method_create_with_all_params(self, async_client: AsyncAnthropic)
                             }
                         ],
                         "model": "claude-opus-4-6",
+                        "cache_control": {
+                            "type": "ephemeral",
+                            "ttl": "5m",
+                        },
                         "container": {
                             "id": "id",
                             "skills": [
diff --git a/tests/api_resources/beta/test_messages.py b/tests/api_resources/beta/test_messages.py
diff --git a/tests/api_resources/test_messages.py b/tests/api_resources/test_messages.py

Original file line number	Diff line number	Diff line change
`@@ -482,9 +482,9 @@ message = client.messages.create(`
`482`	`482`	`}`
`483`	`483`	`],`
`484`	`484`	`model="claude-opus-4-6",`
`485`		`- metadata={},`
	`485`	`+ cache_control={"type": "ephemeral"},`
`486`	`486`	`)`
`487`		`-print(message.metadata)`
	`487`	`+print(message.cache_control)`
`488`	`488`	```
`489`	`489`
`490`	`490`	`## File uploads`