Skip to content

Commit ca6a035

Browse files
authored
bug: sandbox denied error logs (#4874)
Check on STDOUT / STDERR or aggregated output for some logs when sanbox is denied
1 parent 0026b12 commit ca6a035

File tree

4 files changed

+237
-89
lines changed

4 files changed

+237
-89
lines changed

codex-rs/core/src/error.rs

Lines changed: 101 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::exec::ExecToolCallOutput;
22
use crate::token_data::KnownPlan;
33
use crate::token_data::PlanType;
4+
use crate::truncate::truncate_middle;
45
use codex_protocol::ConversationId;
56
use codex_protocol::protocol::RateLimitSnapshot;
67
use reqwest::StatusCode;
@@ -12,6 +13,9 @@ use tokio::task::JoinError;
1213

1314
pub type Result<T> = std::result::Result<T, CodexErr>;
1415

16+
/// Limit UI error messages to a reasonable size while keeping useful context.
17+
const ERROR_MESSAGE_UI_MAX_BYTES: usize = 2 * 1024; // 4 KiB
18+
1519
#[derive(Error, Debug)]
1620
pub enum SandboxErr {
1721
/// Error from sandbox execution
@@ -304,21 +308,44 @@ impl CodexErr {
304308
}
305309

306310
pub fn get_error_message_ui(e: &CodexErr) -> String {
307-
match e {
308-
CodexErr::Sandbox(SandboxErr::Denied { output }) => output.stderr.text.clone(),
311+
let message = match e {
312+
CodexErr::Sandbox(SandboxErr::Denied { output }) => {
313+
let aggregated = output.aggregated_output.text.trim();
314+
if !aggregated.is_empty() {
315+
output.aggregated_output.text.clone()
316+
} else {
317+
let stderr = output.stderr.text.trim();
318+
let stdout = output.stdout.text.trim();
319+
match (stderr.is_empty(), stdout.is_empty()) {
320+
(false, false) => format!("{stderr}\n{stdout}"),
321+
(false, true) => output.stderr.text.clone(),
322+
(true, false) => output.stdout.text.clone(),
323+
(true, true) => format!(
324+
"command failed inside sandbox with exit code {}",
325+
output.exit_code
326+
),
327+
}
328+
}
329+
}
309330
// Timeouts are not sandbox errors from a UX perspective; present them plainly
310-
CodexErr::Sandbox(SandboxErr::Timeout { output }) => format!(
311-
"error: command timed out after {} ms",
312-
output.duration.as_millis()
313-
),
331+
CodexErr::Sandbox(SandboxErr::Timeout { output }) => {
332+
format!(
333+
"error: command timed out after {} ms",
334+
output.duration.as_millis()
335+
)
336+
}
314337
_ => e.to_string(),
315-
}
338+
};
339+
340+
truncate_middle(&message, ERROR_MESSAGE_UI_MAX_BYTES).0
316341
}
317342

318343
#[cfg(test)]
319344
mod tests {
320345
use super::*;
346+
use crate::exec::StreamOutput;
321347
use codex_protocol::protocol::RateLimitWindow;
348+
use pretty_assertions::assert_eq;
322349

323350
fn rate_limit_snapshot() -> RateLimitSnapshot {
324351
RateLimitSnapshot {
@@ -348,6 +375,73 @@ mod tests {
348375
);
349376
}
350377

378+
#[test]
379+
fn sandbox_denied_uses_aggregated_output_when_stderr_empty() {
380+
let output = ExecToolCallOutput {
381+
exit_code: 77,
382+
stdout: StreamOutput::new(String::new()),
383+
stderr: StreamOutput::new(String::new()),
384+
aggregated_output: StreamOutput::new("aggregate detail".to_string()),
385+
duration: Duration::from_millis(10),
386+
timed_out: false,
387+
};
388+
let err = CodexErr::Sandbox(SandboxErr::Denied {
389+
output: Box::new(output),
390+
});
391+
assert_eq!(get_error_message_ui(&err), "aggregate detail");
392+
}
393+
394+
#[test]
395+
fn sandbox_denied_reports_both_streams_when_available() {
396+
let output = ExecToolCallOutput {
397+
exit_code: 9,
398+
stdout: StreamOutput::new("stdout detail".to_string()),
399+
stderr: StreamOutput::new("stderr detail".to_string()),
400+
aggregated_output: StreamOutput::new(String::new()),
401+
duration: Duration::from_millis(10),
402+
timed_out: false,
403+
};
404+
let err = CodexErr::Sandbox(SandboxErr::Denied {
405+
output: Box::new(output),
406+
});
407+
assert_eq!(get_error_message_ui(&err), "stderr detail\nstdout detail");
408+
}
409+
410+
#[test]
411+
fn sandbox_denied_reports_stdout_when_no_stderr() {
412+
let output = ExecToolCallOutput {
413+
exit_code: 11,
414+
stdout: StreamOutput::new("stdout only".to_string()),
415+
stderr: StreamOutput::new(String::new()),
416+
aggregated_output: StreamOutput::new(String::new()),
417+
duration: Duration::from_millis(8),
418+
timed_out: false,
419+
};
420+
let err = CodexErr::Sandbox(SandboxErr::Denied {
421+
output: Box::new(output),
422+
});
423+
assert_eq!(get_error_message_ui(&err), "stdout only");
424+
}
425+
426+
#[test]
427+
fn sandbox_denied_reports_exit_code_when_no_output_available() {
428+
let output = ExecToolCallOutput {
429+
exit_code: 13,
430+
stdout: StreamOutput::new(String::new()),
431+
stderr: StreamOutput::new(String::new()),
432+
aggregated_output: StreamOutput::new(String::new()),
433+
duration: Duration::from_millis(5),
434+
timed_out: false,
435+
};
436+
let err = CodexErr::Sandbox(SandboxErr::Denied {
437+
output: Box::new(output),
438+
});
439+
assert_eq!(
440+
get_error_message_ui(&err),
441+
"command failed inside sandbox with exit code 13"
442+
);
443+
}
444+
351445
#[test]
352446
fn usage_limit_reached_error_formats_free_plan() {
353447
let err = UsageLimitReachedError {

codex-rs/core/src/exec.rs

Lines changed: 119 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ pub async fn process_exec_tool_call(
177177
}));
178178
}
179179

180-
if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
180+
if is_likely_sandbox_denied(sandbox_type, &exec_output) {
181181
return Err(CodexErr::Sandbox(SandboxErr::Denied {
182182
output: Box::new(exec_output),
183183
}));
@@ -195,21 +195,57 @@ pub async fn process_exec_tool_call(
195195
/// We don't have a fully deterministic way to tell if our command failed
196196
/// because of the sandbox - a command in the user's zshrc file might hit an
197197
/// error, but the command itself might fail or succeed for other reasons.
198-
/// For now, we conservatively check for 'command not found' (exit code 127),
199-
/// and can add additional cases as necessary.
200-
fn is_likely_sandbox_denied(sandbox_type: SandboxType, exit_code: i32) -> bool {
201-
if sandbox_type == SandboxType::None {
198+
/// For now, we conservatively check for well known command failure exit codes and
199+
/// also look for common sandbox denial keywords in the command output.
200+
fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCallOutput) -> bool {
201+
if sandbox_type == SandboxType::None || exec_output.exit_code == 0 {
202202
return false;
203203
}
204204

205205
// Quick rejects: well-known non-sandbox shell exit codes
206-
// 127: command not found, 2: misuse of shell builtins
207-
if exit_code == 127 {
206+
// 2: misuse of shell builtins
207+
// 126: permission denied
208+
// 127: command not found
209+
const QUICK_REJECT_EXIT_CODES: [i32; 3] = [2, 126, 127];
210+
if QUICK_REJECT_EXIT_CODES.contains(&exec_output.exit_code) {
208211
return false;
209212
}
210213

211-
// For all other cases, we assume the sandbox is the cause
212-
true
214+
const SANDBOX_DENIED_KEYWORDS: [&str; 6] = [
215+
"operation not permitted",
216+
"permission denied",
217+
"read-only file system",
218+
"seccomp",
219+
"sandbox",
220+
"landlock",
221+
];
222+
223+
if [
224+
&exec_output.stderr.text,
225+
&exec_output.stdout.text,
226+
&exec_output.aggregated_output.text,
227+
]
228+
.into_iter()
229+
.any(|section| {
230+
let lower = section.to_lowercase();
231+
SANDBOX_DENIED_KEYWORDS
232+
.iter()
233+
.any(|needle| lower.contains(needle))
234+
}) {
235+
return true;
236+
}
237+
238+
#[cfg(unix)]
239+
{
240+
const SIGSYS_CODE: i32 = libc::SIGSYS;
241+
if sandbox_type == SandboxType::LinuxSeccomp
242+
&& exec_output.exit_code == EXIT_CODE_SIGNAL_BASE + SIGSYS_CODE
243+
{
244+
return true;
245+
}
246+
}
247+
248+
false
213249
}
214250

215251
#[derive(Debug)]
@@ -436,3 +472,77 @@ fn synthetic_exit_status(code: i32) -> ExitStatus {
436472
#[expect(clippy::unwrap_used)]
437473
std::process::ExitStatus::from_raw(code.try_into().unwrap())
438474
}
475+
476+
#[cfg(test)]
477+
mod tests {
478+
use super::*;
479+
use std::time::Duration;
480+
481+
fn make_exec_output(
482+
exit_code: i32,
483+
stdout: &str,
484+
stderr: &str,
485+
aggregated: &str,
486+
) -> ExecToolCallOutput {
487+
ExecToolCallOutput {
488+
exit_code,
489+
stdout: StreamOutput::new(stdout.to_string()),
490+
stderr: StreamOutput::new(stderr.to_string()),
491+
aggregated_output: StreamOutput::new(aggregated.to_string()),
492+
duration: Duration::from_millis(1),
493+
timed_out: false,
494+
}
495+
}
496+
497+
#[test]
498+
fn sandbox_detection_requires_keywords() {
499+
let output = make_exec_output(1, "", "", "");
500+
assert!(!is_likely_sandbox_denied(
501+
SandboxType::LinuxSeccomp,
502+
&output
503+
));
504+
}
505+
506+
#[test]
507+
fn sandbox_detection_identifies_keyword_in_stderr() {
508+
let output = make_exec_output(1, "", "Operation not permitted", "");
509+
assert!(is_likely_sandbox_denied(SandboxType::LinuxSeccomp, &output));
510+
}
511+
512+
#[test]
513+
fn sandbox_detection_respects_quick_reject_exit_codes() {
514+
let output = make_exec_output(127, "", "command not found", "");
515+
assert!(!is_likely_sandbox_denied(
516+
SandboxType::LinuxSeccomp,
517+
&output
518+
));
519+
}
520+
521+
#[test]
522+
fn sandbox_detection_ignores_non_sandbox_mode() {
523+
let output = make_exec_output(1, "", "Operation not permitted", "");
524+
assert!(!is_likely_sandbox_denied(SandboxType::None, &output));
525+
}
526+
527+
#[test]
528+
fn sandbox_detection_uses_aggregated_output() {
529+
let output = make_exec_output(
530+
101,
531+
"",
532+
"",
533+
"cargo failed: Read-only file system when writing target",
534+
);
535+
assert!(is_likely_sandbox_denied(
536+
SandboxType::MacosSeatbelt,
537+
&output
538+
));
539+
}
540+
541+
#[cfg(unix)]
542+
#[test]
543+
fn sandbox_detection_flags_sigsys_exit_code() {
544+
let exit_code = EXIT_CODE_SIGNAL_BASE + libc::SIGSYS;
545+
let output = make_exec_output(exit_code, "", "", "");
546+
assert!(is_likely_sandbox_denied(SandboxType::LinuxSeccomp, &output));
547+
}
548+
}

codex-rs/core/src/executor/runner.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,23 @@ mod tests {
380380
assert_eq!(message, "failed in sandbox: sandbox stderr");
381381
}
382382

383+
#[test]
384+
fn sandbox_failure_message_falls_back_to_aggregated_output() {
385+
let output = ExecToolCallOutput {
386+
exit_code: 101,
387+
stdout: StreamOutput::new(String::new()),
388+
stderr: StreamOutput::new(String::new()),
389+
aggregated_output: StreamOutput::new("aggregate text".to_string()),
390+
duration: Duration::from_millis(10),
391+
timed_out: false,
392+
};
393+
let err = SandboxErr::Denied {
394+
output: Box::new(output),
395+
};
396+
let message = sandbox_failure_message(err);
397+
assert_eq!(message, "failed in sandbox: aggregate text");
398+
}
399+
383400
#[test]
384401
fn normalize_function_error_synthesizes_payload() {
385402
let err = FunctionCallError::RespondToModel("boom".to_string());

0 commit comments

Comments
 (0)