Skip to content

Commit fce7a83

Browse files
authored
Fix stream did not contain valid UTF-8 (#8120)
## Summary Related issues: #8009 #7549 Although `PYTHONIOENCODING=utf-8` forces python to use UTF-8 for `stdout`/`stderr`, it can't prevent code like `sys.stdout.buffer.write()` or `subprocess.call(["cl.exe", ...])` to bypass the encoder. This PR uses lossy UTF-8 conversion to avoid decoding error. ## Alternative Using `bstr` crate might be better since it can preserve original information. Or we should follow the Windows convention, unset `PYTHONIOENCODING` and decode with system default encoding. ## Test Plan Running locally with non-ASCII character in `UV_CACHE_DIR` works fine, but I have no unit test plan. Testing locale problem is hard :(
1 parent 7bd0d97 commit fce7a83

File tree

1 file changed

+8
-6
lines changed
  • crates/uv-build-frontend/src

1 file changed

+8
-6
lines changed

crates/uv-build-frontend/src/lib.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -921,13 +921,15 @@ impl PythonRunner {
921921
) -> Result<PythonRunnerOutput, Error> {
922922
/// Read lines from a reader and store them in a buffer.
923923
async fn read_from(
924-
mut reader: tokio::io::Lines<tokio::io::BufReader<impl tokio::io::AsyncRead + Unpin>>,
924+
mut reader: tokio::io::Split<tokio::io::BufReader<impl tokio::io::AsyncRead + Unpin>>,
925925
mut printer: Printer,
926926
buffer: &mut Vec<String>,
927927
) -> io::Result<()> {
928928
loop {
929-
match reader.next_line().await? {
930-
Some(line) => {
929+
match reader.next_segment().await? {
930+
Some(line_buf) => {
931+
let line_buf = line_buf.strip_suffix(b"\r").unwrap_or(&line_buf);
932+
let line = String::from_utf8_lossy(line_buf).into();
931933
let _ = write!(printer, "{line}");
932934
buffer.push(line);
933935
}
@@ -945,7 +947,7 @@ impl PythonRunner {
945947
.env("PATH", modified_path)
946948
.env("VIRTUAL_ENV", venv.root())
947949
.env("CLICOLOR_FORCE", "1")
948-
.env("PYTHONIOENCODING", "utf-8")
950+
.env("PYTHONIOENCODING", "utf-8:backslashreplace")
949951
.stdout(std::process::Stdio::piped())
950952
.stderr(std::process::Stdio::piped())
951953
.spawn()
@@ -956,8 +958,8 @@ impl PythonRunner {
956958
let mut stderr_buf = Vec::with_capacity(1024);
957959

958960
// Create separate readers for `stdout` and `stderr`.
959-
let stdout_reader = tokio::io::BufReader::new(child.stdout.take().unwrap()).lines();
960-
let stderr_reader = tokio::io::BufReader::new(child.stderr.take().unwrap()).lines();
961+
let stdout_reader = tokio::io::BufReader::new(child.stdout.take().unwrap()).split(b'\n');
962+
let stderr_reader = tokio::io::BufReader::new(child.stderr.take().unwrap()).split(b'\n');
961963

962964
// Asynchronously read from the in-memory pipes.
963965
let printer = Printer::from(self.level);

0 commit comments

Comments
 (0)