Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 15 additions & 14 deletions codex-rs/core/src/tools/events.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,41 +274,42 @@ impl ToolEmitter {
ctx: ToolEventCtx<'_>,
out: Result<ExecToolCallOutput, ToolError>,
) -> Result<String, FunctionCallError> {
let event;
let result = match out {
let (event, result) = match out {
Ok(output) => {
let content = super::format_exec_output_for_model(&output);
let exit_code = output.exit_code;
event = ToolEventStage::Success(output);
if exit_code == 0 {
let event = ToolEventStage::Success(output);
let result = if exit_code == 0 {
Ok(content)
} else {
Err(FunctionCallError::RespondToModel(content))
}
};
(event, result)
}
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
| Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
let response = super::format_exec_output_for_model(&output);
event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
Err(FunctionCallError::RespondToModel(response))
let event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
let result = Err(FunctionCallError::RespondToModel(response));
(event, result)
}
Err(ToolError::Codex(err)) => {
let message = format!("execution error: {err:?}");
let response = message.clone();
event = ToolEventStage::Failure(ToolEventFailure::Message(message));
Err(FunctionCallError::RespondToModel(response))
let event = ToolEventStage::Failure(ToolEventFailure::Message(message.clone()));
let result = Err(FunctionCallError::RespondToModel(message));
(event, result)
}
Err(ToolError::Rejected(msg)) | Err(ToolError::SandboxDenied(msg)) => {
Err(ToolError::Rejected(msg)) => {
// Normalize common rejection messages for exec tools so tests and
// users see a clear, consistent phrase.
let normalized = if msg == "rejected by user" {
"exec command rejected by user".to_string()
} else {
msg
};
let response = &normalized;
event = ToolEventStage::Failure(ToolEventFailure::Message(normalized.clone()));
Err(FunctionCallError::RespondToModel(response.clone()))
let event = ToolEventStage::Failure(ToolEventFailure::Message(normalized.clone()));
let result = Err(FunctionCallError::RespondToModel(normalized));
(event, result)
}
};
self.emit(ctx, event).await;
Expand Down
38 changes: 8 additions & 30 deletions codex-rs/core/src/tools/orchestrator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,16 @@ impl ToolOrchestrator {
}
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
if !tool.escalate_on_failure() {
return Err(ToolError::SandboxDenied(
"sandbox denied and no retry".to_string(),
));
return Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied {
output,
})));
}
// Under `Never` or `OnRequest`, do not retry without sandbox; surface a concise message
// derived from the actual output (platform-agnostic).
// Under `Never` or `OnRequest`, do not retry without sandbox; surface a concise
// sandbox denial that preserves the original output.
if !tool.wants_no_sandbox_approval(approval_policy) {
let msg = build_never_denied_message_from_output(output.as_ref());
return Err(ToolError::SandboxDenied(msg));
return Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied {
output,
})));
}

// Ask for approval before retrying without sandbox.
Expand Down Expand Up @@ -167,29 +168,6 @@ impl ToolOrchestrator {
}
}

fn build_never_denied_message_from_output(output: &ExecToolCallOutput) -> String {
let body = format!(
"{}\n{}\n{}",
output.stderr.text, output.stdout.text, output.aggregated_output.text
)
.to_lowercase();

let detail = if body.contains("permission denied") {
Some("Permission denied")
} else if body.contains("operation not permitted") {
Some("Operation not permitted")
} else if body.contains("read-only file system") {
Some("Read-only file system")
} else {
None
};

match detail {
Some(tag) => format!("failed in sandbox: {tag}"),
None => "failed in sandbox".to_string(),
}
}

fn build_denial_reason_from_output(_output: &ExecToolCallOutput) -> String {
// Keep approval reason terse and stable for UX/tests, but accept the
// output so we can evolve heuristics later without touching call sites.
Expand Down
1 change: 0 additions & 1 deletion codex-rs/core/src/tools/sandboxing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ pub(crate) trait ProvidesSandboxRetryData {
#[derive(Debug)]
pub(crate) enum ToolError {
Rejected(String),
SandboxDenied(String),
Codex(CodexErr),
}

Expand Down
25 changes: 18 additions & 7 deletions codex-rs/core/tests/suite/approvals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,22 @@ impl Expectation {
"expected non-zero exit for {path:?}"
);
for needle in *message_contains {
assert!(
result.stdout.contains(needle),
"stdout missing {needle:?}: {}",
result.stdout
);
if needle.contains('|') {
let options: Vec<&str> = needle.split('|').collect();
let matches_any =
options.iter().any(|option| result.stdout.contains(option));
assert!(
matches_any,
"stdout missing one of {options:?}: {}",
result.stdout
);
} else {
assert!(
result.stdout.contains(needle),
"stdout missing {needle:?}: {}",
result.stdout
);
}
}
assert!(
!path.exists(),
Expand Down Expand Up @@ -901,7 +912,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
message_contains: if cfg!(target_os = "linux") {
&["Permission denied"]
} else {
&["failed in sandbox"]
&["Permission denied|Operation not permitted|Read-only file system"]
},
},
},
Expand Down Expand Up @@ -1045,7 +1056,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
message_contains: if cfg!(target_os = "linux") {
&["Permission denied"]
} else {
&["failed in sandbox"]
&["Permission denied|Operation not permitted|Read-only file system"]
},
},
},
Expand Down
98 changes: 98 additions & 0 deletions codex-rs/core/tests/suite/tools.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#![cfg(not(target_os = "windows"))]
#![allow(clippy::unwrap_used, clippy::expect_used)]

use anyhow::Context;
use anyhow::Result;
use codex_core::features::Feature;
use codex_core::model_family::find_family_for_model;
Expand Down Expand Up @@ -227,6 +228,103 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> {
Ok(())
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn sandbox_denied_shell_returns_original_output() -> Result<()> {
skip_if_no_network!(Ok(()));

let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5-codex model family");
});
let fixture = builder.build(&server).await?;

let call_id = "sandbox-denied-shell";
let target_path = fixture.workspace_path("sandbox-denied.txt");
let sentinel = "sandbox-denied sentinel output";
let command = vec![
"/bin/sh".to_string(),
"-c".to_string(),
format!(
"printf {sentinel:?}; printf {content:?} > {path:?}",
sentinel = format!("{sentinel}\n"),
content = "sandbox denied",
path = &target_path
),
];
let args = json!({
"command": command,
"timeout_ms": 1_000,
});

let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
let mock = mount_sse_sequence(&server, responses).await;

fixture
.submit_turn_with_policy(
"run a command that should be denied by the read-only sandbox",
SandboxPolicy::ReadOnly,
)
.await?;

let output_text = mock
.function_call_output_text(call_id)
.context("shell output present")?;
let exit_code_line = output_text
.lines()
.next()
.context("exit code line present")?;
let exit_code = exit_code_line
.strip_prefix("Exit code: ")
.context("exit code prefix present")?
.trim()
.parse::<i32>()
.context("exit code is integer")?;
let body = output_text;

let body_lower = body.to_lowercase();
// Required for multi-OS.
let has_denial = body_lower.contains("permission denied")
|| body_lower.contains("operation not permitted")
|| body_lower.contains("read-only file system");
assert!(
has_denial,
"expected sandbox denial details in tool output: {body}"
);
assert!(
body.contains(sentinel),
"expected sentinel output from command to reach the model: {body}"
);
let target_path_str = target_path
.to_str()
.context("target path string representation")?;
assert!(
body.contains(target_path_str),
"expected sandbox error to mention denied path: {body}"
);
assert!(
!body_lower.contains("failed in sandbox"),
"expected original tool output, found fallback message: {body}"
);
assert_ne!(
exit_code, 0,
"sandbox denial should surface a non-zero exit code"
);

Ok(())
}

async fn collect_tools(use_unified_exec: bool) -> Result<Vec<String>> {
let server = start_mock_server().await;

Expand Down
Loading