openai · aibrahim-oai · Oct 20, 2025 · Oct 17, 2025 · Oct 17, 2025 · Oct 17, 2025
diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs
@@ -114,6 +114,7 @@ impl ModelClient {
         self.config
             .model_context_window
             .or_else(|| get_model_info(&self.config.model_family).map(|info| info.context_window))
+            .map(apply_context_window_margin)
     }
 
     pub fn get_auto_compact_token_limit(&self) -> Option<i64> {
@@ -521,6 +522,10 @@ impl StreamAttemptError {
     }
 }
 
+const fn apply_context_window_margin(context_window: u64) -> u64 {
+    (context_window * 95) / 100
+}
+
 #[derive(Debug, Deserialize, Serialize)]
 struct SseEvent {
     #[serde(rename = "type")]

diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs
@@ -2709,7 +2709,7 @@ model_verbosity = "high"
                 model_family: find_family_for_model("o3").expect("known model slug"),
                 model_context_window: Some(200_000),
                 model_max_output_tokens: Some(100_000),
-                model_auto_compact_token_limit: None,
+                model_auto_compact_token_limit: Some(180_000),
                 model_provider_id: "openai".to_string(),
                 model_provider: fixture.openai_provider.clone(),
                 approval_policy: AskForApproval::Never,
@@ -2776,7 +2776,7 @@ model_verbosity = "high"
             model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"),
             model_context_window: Some(16_385),
             model_max_output_tokens: Some(4_096),
-            model_auto_compact_token_limit: None,
+            model_auto_compact_token_limit: Some(14_746),
             model_provider_id: "openai-chat-completions".to_string(),
             model_provider: fixture.openai_chat_completions_provider.clone(),
             approval_policy: AskForApproval::UnlessTrusted,
@@ -2858,7 +2858,7 @@ model_verbosity = "high"
             model_family: find_family_for_model("o3").expect("known model slug"),
             model_context_window: Some(200_000),
             model_max_output_tokens: Some(100_000),
-            model_auto_compact_token_limit: None,
+            model_auto_compact_token_limit: Some(180_000),
             model_provider_id: "openai".to_string(),
             model_provider: fixture.openai_provider.clone(),
             approval_policy: AskForApproval::OnFailure,
@@ -2926,7 +2926,7 @@ model_verbosity = "high"
             model_family: find_family_for_model("gpt-5").expect("known model slug"),
             model_context_window: Some(272_000),
             model_max_output_tokens: Some(128_000),
-            model_auto_compact_token_limit: None,
+            model_auto_compact_token_limit: Some(244_800),
             model_provider_id: "openai".to_string(),
             model_provider: fixture.openai_provider.clone(),
             approval_policy: AskForApproval::OnFailure,

diff --git a/codex-rs/core/src/openai_model_info.rs b/codex-rs/core/src/openai_model_info.rs
@@ -23,9 +23,13 @@ impl ModelInfo {
         Self {
             context_window,
             max_output_tokens,
-            auto_compact_token_limit: None,
+            auto_compact_token_limit: Some(Self::default_auto_compact_limit(context_window)),
         }
     }
+
+    const fn default_auto_compact_limit(context_window: u64) -> i64 {
+        (context_window as i64 * 9) / 10
+    }
 }
 
 pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
@@ -62,11 +66,7 @@ pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
         // https://platform.openai.com/docs/models/gpt-3.5-turbo
         "gpt-3.5-turbo" => Some(ModelInfo::new(16_385, 4_096)),
 
-        _ if slug.starts_with("gpt-5-codex") => Some(ModelInfo {
-            context_window: 272_000,
-            max_output_tokens: 128_000,
-            auto_compact_token_limit: Some(350_000),
-        }),
+        _ if slug.starts_with("gpt-5-codex") => Some(ModelInfo::new(272_000, 128_000)),
 
         _ if slug.starts_with("gpt-5") => Some(ModelInfo::new(272_000, 128_000)),
 

diff --git a/codex-rs/core/tests/suite/client.rs b/codex-rs/core/tests/suite/client.rs
@@ -858,8 +858,8 @@ async fn token_count_includes_rate_limits_snapshot() {
                     "reasoning_output_tokens": 0,
                     "total_tokens": 123
                 },
-                // Default model is gpt-5-codex in tests → 272000 context window
-                "model_context_window": 272000
+                // Default model is gpt-5-codex in tests → 95% usable context window
+                "model_context_window": 258400
             },
             "rate_limits": {
                 "primary": {
@@ -985,6 +985,8 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
     skip_if_no_network!(Ok(()));
     let server = MockServer::start().await;
 
+    const EFFECTIVE_CONTEXT_WINDOW: u64 = (272_000 * 95) / 100;
+
     responses::mount_sse_once_match(
         &server,
         body_string_contains("trigger context window"),
@@ -1056,8 +1058,11 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
         .info
         .expect("token usage info present when context window is exceeded");
 
-    assert_eq!(info.model_context_window, Some(272_000));
-    assert_eq!(info.total_token_usage.total_tokens, 272_000);
+    assert_eq!(info.model_context_window, Some(EFFECTIVE_CONTEXT_WINDOW));
+    assert_eq!(
+        info.total_token_usage.total_tokens,
+        EFFECTIVE_CONTEXT_WINDOW
+    );
 
     let error_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Error(_))).await;
     let expected_context_window_message = CodexErr::ContextWindowExceeded.to_string();

diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs
@@ -43,6 +43,7 @@ const CONTEXT_LIMIT_MESSAGE: &str =
     "Your input exceeds the context window of this model. Please adjust your input and try again.";
 const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
 const DUMMY_CALL_ID: &str = "call-multi-auto";
+const FUNCTION_CALL_LIMIT_MSG: &str = "function call limit push";
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn summarize_context_three_requests_and_instructions() {
@@ -860,3 +861,105 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
         "second auto compact request should include the summarization prompt"
     );
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
+    skip_if_no_network!();
+
+    let server = start_mock_server().await;
+
+    let context_window = 100;
+    let limit = (context_window * 90 / 100) as i64;
+    let over_limit_tokens = context_window * 95 / 100 + 1;
+
+    let first_turn = sse(vec![
+        ev_function_call(DUMMY_CALL_ID, DUMMY_FUNCTION_NAME, "{}"),
+        ev_completed_with_tokens("r1", 50),
+    ]);
+    let function_call_follow_up = sse(vec![
+        ev_assistant_message("m2", FINAL_REPLY),
+        ev_completed_with_tokens("r2", over_limit_tokens),
+    ]);
+    let auto_compact_turn = sse(vec![
+        ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
+        ev_completed_with_tokens("r3", 10),
+    ]);
+    let post_auto_compact_turn = sse(vec![ev_completed_with_tokens("r4", 10)]);
+
+    let request_log = mount_sse_sequence(
+        &server,
+        vec![
+            first_turn,
+            function_call_follow_up,
+            auto_compact_turn,
+            post_auto_compact_turn,
+        ],
+    )
+    .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    let home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&home);
+    config.model_provider = model_provider;
+    config.model_context_window = Some(context_window);
+    config.model_auto_compact_token_limit = Some(limit);
+
+    let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"))
+        .new_conversation(config)
+        .await
+        .unwrap()
+        .conversation;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: FUNCTION_CALL_LIMIT_MSG.into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event(&codex, |msg| matches!(msg, EventMsg::TaskComplete(_))).await;
+
+    let requests = request_log.requests();
+    assert_eq!(
+        requests.len(),
+        4,
+        "expected user request, function call follow-up, auto compact, and post-compact requests"
+    );
+
+    let first_request = requests[0].input();
+    assert!(
+        first_request.iter().any(|item| {
+            item.get("type").and_then(|value| value.as_str()) == Some("message")
+                && item
+                    .get("content")
+                    .and_then(|content| content.as_array())
+                    .and_then(|entries| entries.first())
+                    .and_then(|entry| entry.get("text"))
+                    .and_then(|value| value.as_str())
+                    == Some(FUNCTION_CALL_LIMIT_MSG)
+        }),
+        "first request should include the user message that triggers the function call"
+    );
+
+    let function_call_output = requests[1].function_call_output(DUMMY_CALL_ID);
+    let output_text = function_call_output
+        .get("output")
+        .and_then(|value| value.as_str())
+        .unwrap_or_default();
+    assert!(
+        output_text.contains(DUMMY_FUNCTION_NAME),
+        "function call output should be sent before auto compact"
+    );
+
+    let auto_compact_body = requests[2].body_json().to_string();
+    assert!(
+        auto_compact_body.contains("You have exceeded the maximum number of tokens"),
+        "auto compact request should include the summarization prompt after exceeding 95% (limit {limit})"
+    );
+}