Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions codex-rs/core/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ impl ModelClient {
self.config
.model_context_window
.or_else(|| get_model_info(&self.config.model_family).map(|info| info.context_window))
.map(apply_context_window_margin)
}

pub fn get_auto_compact_token_limit(&self) -> Option<i64> {
Expand Down Expand Up @@ -521,6 +522,10 @@ impl StreamAttemptError {
}
}

const fn apply_context_window_margin(context_window: u64) -> u64 {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

General comment but let's try to avoid using unsigned int. Even if the value is non-negative, the modulo arithmetic is pretty different

(context_window * 95) / 100
}

#[derive(Debug, Deserialize, Serialize)]
struct SseEvent {
#[serde(rename = "type")]
Expand Down
8 changes: 4 additions & 4 deletions codex-rs/core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2709,7 +2709,7 @@ model_verbosity = "high"
model_family: find_family_for_model("o3").expect("known model slug"),
model_context_window: Some(200_000),
model_max_output_tokens: Some(100_000),
model_auto_compact_token_limit: None,
model_auto_compact_token_limit: Some(180_000),
model_provider_id: "openai".to_string(),
model_provider: fixture.openai_provider.clone(),
approval_policy: AskForApproval::Never,
Expand Down Expand Up @@ -2776,7 +2776,7 @@ model_verbosity = "high"
model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"),
model_context_window: Some(16_385),
model_max_output_tokens: Some(4_096),
model_auto_compact_token_limit: None,
model_auto_compact_token_limit: Some(14_746),
model_provider_id: "openai-chat-completions".to_string(),
model_provider: fixture.openai_chat_completions_provider.clone(),
approval_policy: AskForApproval::UnlessTrusted,
Expand Down Expand Up @@ -2858,7 +2858,7 @@ model_verbosity = "high"
model_family: find_family_for_model("o3").expect("known model slug"),
model_context_window: Some(200_000),
model_max_output_tokens: Some(100_000),
model_auto_compact_token_limit: None,
model_auto_compact_token_limit: Some(180_000),
model_provider_id: "openai".to_string(),
model_provider: fixture.openai_provider.clone(),
approval_policy: AskForApproval::OnFailure,
Expand Down Expand Up @@ -2926,7 +2926,7 @@ model_verbosity = "high"
model_family: find_family_for_model("gpt-5").expect("known model slug"),
model_context_window: Some(272_000),
model_max_output_tokens: Some(128_000),
model_auto_compact_token_limit: None,
model_auto_compact_token_limit: Some(244_800),
model_provider_id: "openai".to_string(),
model_provider: fixture.openai_provider.clone(),
approval_policy: AskForApproval::OnFailure,
Expand Down
12 changes: 6 additions & 6 deletions codex-rs/core/src/openai_model_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,13 @@ impl ModelInfo {
Self {
context_window,
max_output_tokens,
auto_compact_token_limit: None,
auto_compact_token_limit: Some(Self::default_auto_compact_limit(context_window)),
}
}

const fn default_auto_compact_limit(context_window: u64) -> i64 {
(context_window as i64 * 9) / 10
}
}

pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
Expand Down Expand Up @@ -62,11 +66,7 @@ pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
// https://platform.openai.com/docs/models/gpt-3.5-turbo
"gpt-3.5-turbo" => Some(ModelInfo::new(16_385, 4_096)),

_ if slug.starts_with("gpt-5-codex") => Some(ModelInfo {
context_window: 272_000,
max_output_tokens: 128_000,
auto_compact_token_limit: Some(350_000),
}),
_ if slug.starts_with("gpt-5-codex") => Some(ModelInfo::new(272_000, 128_000)),

_ if slug.starts_with("gpt-5") => Some(ModelInfo::new(272_000, 128_000)),

Expand Down
13 changes: 9 additions & 4 deletions codex-rs/core/tests/suite/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -858,8 +858,8 @@ async fn token_count_includes_rate_limits_snapshot() {
"reasoning_output_tokens": 0,
"total_tokens": 123
},
// Default model is gpt-5-codex in tests → 272000 context window
"model_context_window": 272000
// Default model is gpt-5-codex in tests → 95% usable context window
"model_context_window": 258400
},
"rate_limits": {
"primary": {
Expand Down Expand Up @@ -985,6 +985,8 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
skip_if_no_network!(Ok(()));
let server = MockServer::start().await;

const EFFECTIVE_CONTEXT_WINDOW: u64 = (272_000 * 95) / 100;

responses::mount_sse_once_match(
&server,
body_string_contains("trigger context window"),
Expand Down Expand Up @@ -1056,8 +1058,11 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
.info
.expect("token usage info present when context window is exceeded");

assert_eq!(info.model_context_window, Some(272_000));
assert_eq!(info.total_token_usage.total_tokens, 272_000);
assert_eq!(info.model_context_window, Some(EFFECTIVE_CONTEXT_WINDOW));
assert_eq!(
info.total_token_usage.total_tokens,
EFFECTIVE_CONTEXT_WINDOW
);

let error_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Error(_))).await;
let expected_context_window_message = CodexErr::ContextWindowExceeded.to_string();
Expand Down
103 changes: 103 additions & 0 deletions codex-rs/core/tests/suite/compact.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ const CONTEXT_LIMIT_MESSAGE: &str =
"Your input exceeds the context window of this model. Please adjust your input and try again.";
const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
const DUMMY_CALL_ID: &str = "call-multi-auto";
const FUNCTION_CALL_LIMIT_MSG: &str = "function call limit push";

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn summarize_context_three_requests_and_instructions() {
Expand Down Expand Up @@ -860,3 +861,105 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
"second auto compact request should include the summarization prompt"
);
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
skip_if_no_network!();

let server = start_mock_server().await;

let context_window = 100;
let limit = (context_window * 90 / 100) as i64;
let over_limit_tokens = context_window * 95 / 100 + 1;

let first_turn = sse(vec![
ev_function_call(DUMMY_CALL_ID, DUMMY_FUNCTION_NAME, "{}"),
ev_completed_with_tokens("r1", 50),
]);
let function_call_follow_up = sse(vec![
ev_assistant_message("m2", FINAL_REPLY),
ev_completed_with_tokens("r2", over_limit_tokens),
]);
let auto_compact_turn = sse(vec![
ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
ev_completed_with_tokens("r3", 10),
]);
let post_auto_compact_turn = sse(vec![ev_completed_with_tokens("r4", 10)]);

let request_log = mount_sse_sequence(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: you can mount these one by one and only store mocks of the ones you care about into variables. Avoids needing to index into request_log

&server,
vec![
first_turn,
function_call_follow_up,
auto_compact_turn,
post_auto_compact_turn,
],
)
.await;

let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};

let home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&home);
config.model_provider = model_provider;
config.model_context_window = Some(context_window);
config.model_auto_compact_token_limit = Some(limit);

let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"))
.new_conversation(config)
.await
.unwrap()
.conversation;

codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: FUNCTION_CALL_LIMIT_MSG.into(),
}],
})
.await
.unwrap();

wait_for_event(&codex, |msg| matches!(msg, EventMsg::TaskComplete(_))).await;

let requests = request_log.requests();
assert_eq!(
requests.len(),
4,
"expected user request, function call follow-up, auto compact, and post-compact requests"
);

let first_request = requests[0].input();
assert!(
first_request.iter().any(|item| {
item.get("type").and_then(|value| value.as_str()) == Some("message")
&& item
.get("content")
.and_then(|content| content.as_array())
.and_then(|entries| entries.first())
.and_then(|entry| entry.get("text"))
.and_then(|value| value.as_str())
== Some(FUNCTION_CALL_LIMIT_MSG)
}),
"first request should include the user message that triggers the function call"
);

let function_call_output = requests[1].function_call_output(DUMMY_CALL_ID);
let output_text = function_call_output
.get("output")
.and_then(|value| value.as_str())
.unwrap_or_default();
assert!(
output_text.contains(DUMMY_FUNCTION_NAME),
"function call output should be sent before auto compact"
);

let auto_compact_body = requests[2].body_json().to_string();
assert!(
auto_compact_body.contains("You have exceeded the maximum number of tokens"),
"auto compact request should include the summarization prompt after exceeding 95% (limit {limit})"
);
}
Loading