Skip to content

Commit 8a22bab

Browse files
Rollup merge of rust-lang#150895 - rustc_colored_explain, r=Kivooeo
rustc_errors: Add (heuristic) Syntax Highlighting for `rustc --explain` This PR adds a feature that enables `rustc --explain <error>` to have syntax highlighted code blocks. Due to performance, size and complexity constraints, the highlighter is very heuristc, relying on conventions for capitalizations and such to infer what an identifier represents. The details for the implementation are specified below. # Changes 1. Change `term::entrypoint` to `term::entrypoint_with_formatter`, which takes an optional third argument, which is a function pointer to a formatter. ([compiler/rustc_errors/src/markdown/mod.rs](https://github.com/rust-lang/rust/compare/main...JayanAXHF:rust:rustc_colored_explain?expand=1#diff-a6e139cadbc2e6922d816eb08f9e2c7b48304d09e6588227e2b70215c4f0725c)) 2. Change `MdStream::write_anstream_buf` to be a wrapper around a new function, `MdStream::write_anstream_buf_with_formatter`, which takes a function pointer to a formatter. ([compiler/rustc_errors/src/markdown/mod.rs](https://github.com/rust-lang/rust/compare/main...JayanAXHF:rust:rustc_colored_explain?expand=1#diff-a6e139cadbc2e6922d816eb08f9e2c7b48304d09e6588227e2b70215c4f0725c)) 3. Change [`compiler/rustc_driver_impl/src/lib.rs`](https://github.com/rust-lang/rust/compare/main...JayanAXHF:rust:rustc_colored_explain?expand=1#diff-39877a2556ea309c89384956740d5892a59cef024aa9473cce16bbdd99287937) to call `MdStream::write_anstream_buf_with_formatter` instead of `MdStream::write_anstream_buf`. 4. Add a `compiler/rustc_driver_impl/src/highlighter.rs` file, which contains the actual syntax highlighter. # Implementation Details 1. The highlighter starts from the `highlight` function defined in `compiler/rustc_driver_impl/src/highlighter.rs`. It creates a new instance of the `Highlighter` struct, and calls its `highlight_rustc_lexer` function to start highlighting. 2. The `highlight_rustc_lexer` function uses `rustc_lexer` to lex the code into `Token`s. `rustc_lexer` was chosen since it preserves the newlines after scanning. 3. Based on the kind of token (`TokenKind`), we color the corresponding lexeme. ## Highlighter Implementation ### Identifiers 1. All identifiers that match a (non-exhaustive and minimal) list of keywords are coloured magenta. 2. An identifier that begins with a capital letter is assumed as a type. There is no distinction between a `Trait` and a type, since that would involve name resolution, and the parts of `rustc` that perform name resolution on code do not preserve the original formatting. (An attempt to use `rustc_parse`'s lexer and `TokenStream` was made, which was then printed with the pretty printer, but failed to preserve the formatting and was generally more complex to work with) 3. An identifier that is immediately followed by a parenthesis is recognized as a function identifier, and coloured blue. ## Literals 5. A `String` literal (or its corresponding `Raw`, `C` and `Byte` versions) is colored green. 6. All other literals are colored bright red (orange-esque) ## Everything Else Everything else is colored bright white and dimmed, to create a grayish colour. --- # Demo <img width="1864" height="2136" alt="image" src="https://github.com/user-attachments/assets/b17d3a71-e641-4457-be85-5e5b1cea2954" /> <caption> Command: <code>rustc --explain E0520</code> </caption> --- This description was not generated by an LLM (:p) cc: @bjorn3
2 parents 958d1f9 + 67c45b7 commit 8a22bab

File tree

10 files changed

+249
-26
lines changed

10 files changed

+249
-26
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3767,6 +3767,7 @@ dependencies = [
37673767
name = "rustc_driver_impl"
37683768
version = "0.0.0"
37693769
dependencies = [
3770+
"anstyle",
37703771
"ctrlc",
37713772
"jiff",
37723773
"libc",
@@ -3792,6 +3793,7 @@ dependencies = [
37923793
"rustc_index",
37933794
"rustc_infer",
37943795
"rustc_interface",
3796+
"rustc_lexer",
37953797
"rustc_lint",
37963798
"rustc_log",
37973799
"rustc_macros",

compiler/rustc_driver_impl/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ edition = "2024"
55

66
[dependencies]
77
# tidy-alphabetical-start
8+
anstyle = "1.0.13"
89
jiff = { version = "0.2.5", default-features = false, features = ["std"] }
910
rustc_abi = { path = "../rustc_abi" }
1011
rustc_ast = { path = "../rustc_ast" }
@@ -28,6 +29,7 @@ rustc_incremental = { path = "../rustc_incremental" }
2829
rustc_index = { path = "../rustc_index" }
2930
rustc_infer = { path = "../rustc_infer" }
3031
rustc_interface = { path = "../rustc_interface" }
32+
rustc_lexer = { path = "../rustc_lexer" }
3133
rustc_lint = { path = "../rustc_lint" }
3234
rustc_log = { path = "../rustc_log" }
3335
rustc_macros = { path = "../rustc_macros" }
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
//! This module provides a syntax highlighter for Rust code.
2+
//! It is used by the `rustc --explain` command.
3+
//!
4+
//! The syntax highlighter uses `rustc_lexer`'s `tokenize`
5+
//! function to parse the Rust code into a `Vec` of tokens.
6+
//! The highlighter then highlights the tokens in the `Vec`,
7+
//! and writes the highlighted output to the buffer.
8+
use std::io::{self, Write};
9+
10+
use anstyle::{AnsiColor, Color, Effects, Style};
11+
use rustc_lexer::{LiteralKind, strip_shebang, tokenize};
12+
13+
const PRIMITIVE_TYPES: &'static [&str] = &[
14+
"i8", "i16", "i32", "i64", "i128", "isize", // signed integers
15+
"u8", "u16", "u32", "u64", "u128", "usize", // unsigned integers
16+
"f32", "f64", // floating point
17+
"char", "bool", // others
18+
];
19+
20+
const KEYWORDS: &'static [&str] = &[
21+
"static", "struct", "super", "trait", "true", "type", "unsafe", "use", "where", "while", "as",
22+
"async", "await", "break", "const", "continue", "crate", "dyn", "else", "enum", "extern",
23+
"false", "fn", "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub",
24+
"ref",
25+
];
26+
27+
const STR_LITERAL_COLOR: AnsiColor = AnsiColor::Green;
28+
const OTHER_LITERAL_COLOR: AnsiColor = AnsiColor::BrightRed;
29+
const DERIVE_COLOR: AnsiColor = AnsiColor::BrightRed;
30+
const KEYWORD_COLOR: AnsiColor = AnsiColor::BrightMagenta;
31+
const TYPE_COLOR: AnsiColor = AnsiColor::Yellow;
32+
const FUNCTION_COLOR: AnsiColor = AnsiColor::Blue;
33+
const USE_COLOR: AnsiColor = AnsiColor::BrightMagenta;
34+
const PRIMITIVE_TYPE_COLOR: AnsiColor = AnsiColor::Cyan;
35+
36+
/// Highlight a Rust code string and write the highlighted
37+
/// output to the buffer. It serves as a wrapper around
38+
/// `Highlighter::highlight_rustc_lexer`. It is passed to
39+
/// `write_anstream_buf` in the `lib.rs` file.
40+
pub fn highlight(code: &str, buf: &mut Vec<u8>) -> io::Result<()> {
41+
let mut highlighter = Highlighter::default();
42+
highlighter.highlight_rustc_lexer(code, buf)
43+
}
44+
45+
/// A syntax highlighter for Rust code
46+
/// It is used by the `rustc --explain` command.
47+
#[derive(Default)]
48+
pub struct Highlighter {
49+
/// Used to track if the previous token was a token
50+
/// that warrants the next token to be colored differently
51+
///
52+
/// For example, the keyword `fn` requires the next token
53+
/// (the function name) to be colored differently.
54+
prev_was_special: bool,
55+
/// Used to track the length of tokens that have been
56+
/// written so far. This is used to find the original
57+
/// lexeme for a token from the code string.
58+
len_accum: usize,
59+
}
60+
61+
impl Highlighter {
62+
/// Create a new highlighter
63+
pub fn new() -> Self {
64+
Self::default()
65+
}
66+
67+
/// Highlight a Rust code string and write the highlighted
68+
/// output to the buffer.
69+
pub fn highlight_rustc_lexer(&mut self, code: &str, buf: &mut Vec<u8>) -> io::Result<()> {
70+
use rustc_lexer::TokenKind;
71+
72+
// Remove shebang from code string
73+
let stripped_idx = strip_shebang(code).unwrap_or(0);
74+
let stripped_code = &code[stripped_idx..];
75+
self.len_accum = stripped_idx;
76+
let len_accum = &mut self.len_accum;
77+
let tokens = tokenize(stripped_code, rustc_lexer::FrontmatterAllowed::No);
78+
for token in tokens {
79+
let len = token.len as usize;
80+
// If the previous token was a special token, and this token is
81+
// not a whitespace token, then it should be colored differently
82+
let token_str = &code[*len_accum..*len_accum + len];
83+
if self.prev_was_special {
84+
if token_str != " " {
85+
self.prev_was_special = false;
86+
}
87+
let style = Style::new().fg_color(Some(Color::Ansi(AnsiColor::Blue)));
88+
write!(buf, "{style}{token_str}{style:#}")?;
89+
*len_accum += len;
90+
continue;
91+
}
92+
match token.kind {
93+
TokenKind::Ident => {
94+
let mut style = Style::new();
95+
// Match if an identifier is a (well-known) keyword
96+
if KEYWORDS.contains(&token_str) {
97+
if token_str == "fn" {
98+
self.prev_was_special = true;
99+
}
100+
style = style.fg_color(Some(Color::Ansi(KEYWORD_COLOR)));
101+
}
102+
// The `use` keyword is colored differently
103+
if matches!(token_str, "use") {
104+
style = style.fg_color(Some(Color::Ansi(USE_COLOR)));
105+
}
106+
// This heuristic test is to detect if the identifier is
107+
// a function call. If it is, then the function identifier is
108+
// colored differently.
109+
if code[*len_accum..*len_accum + len + 1].ends_with('(') {
110+
style = style.fg_color(Some(Color::Ansi(FUNCTION_COLOR)));
111+
}
112+
// The `derive` keyword is colored differently.
113+
if token_str == "derive" {
114+
style = style.fg_color(Some(Color::Ansi(DERIVE_COLOR)));
115+
}
116+
// This heuristic test is to detect if the identifier is
117+
// a type. If it is, then the identifier is colored differently.
118+
if matches!(token_str.chars().next().map(|c| c.is_uppercase()), Some(true)) {
119+
style = style.fg_color(Some(Color::Ansi(TYPE_COLOR)));
120+
}
121+
// This if statement is to detect if the identifier is a primitive type.
122+
if PRIMITIVE_TYPES.contains(&token_str) {
123+
style = style.fg_color(Some(Color::Ansi(PRIMITIVE_TYPE_COLOR)));
124+
}
125+
write!(buf, "{style}{token_str}{style:#}")?;
126+
}
127+
128+
// Color literals
129+
TokenKind::Literal { kind, suffix_start: _ } => {
130+
// Strings -> Green
131+
// Chars -> Green
132+
// Raw strings -> Green
133+
// C strings -> Green
134+
// Byte Strings -> Green
135+
// Other literals -> Bright Red (Orage-esque)
136+
let style = match kind {
137+
LiteralKind::Str { terminated: _ }
138+
| LiteralKind::Char { terminated: _ }
139+
| LiteralKind::RawStr { n_hashes: _ }
140+
| LiteralKind::CStr { terminated: _ } => {
141+
Style::new().fg_color(Some(Color::Ansi(STR_LITERAL_COLOR)))
142+
}
143+
_ => Style::new().fg_color(Some(Color::Ansi(OTHER_LITERAL_COLOR))),
144+
};
145+
write!(buf, "{style}{token_str}{style:#}")?;
146+
}
147+
_ => {
148+
// All other tokens are dimmed
149+
let style = Style::new()
150+
.fg_color(Some(Color::Ansi(AnsiColor::BrightWhite)))
151+
.effects(Effects::DIMMED);
152+
write!(buf, "{style}{token_str}{style:#}")?;
153+
}
154+
}
155+
*len_accum += len;
156+
}
157+
Ok(())
158+
}
159+
}

compiler/rustc_driver_impl/src/lib.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ pub mod args;
8585
pub mod pretty;
8686
#[macro_use]
8787
mod print;
88+
pub mod highlighter;
8889
mod session_diagnostics;
8990

9091
// Keep the OS parts of this `cfg` in sync with the `cfg` on the `libc`
@@ -526,7 +527,11 @@ fn show_md_content_with_pager(content: &str, color: ColorConfig) {
526527
let mdstream = markdown::MdStream::parse_str(content);
527528
let bufwtr = markdown::create_stdout_bufwtr();
528529
let mut mdbuf = Vec::new();
529-
if mdstream.write_anstream_buf(&mut mdbuf).is_ok() { Some((bufwtr, mdbuf)) } else { None }
530+
if mdstream.write_anstream_buf(&mut mdbuf, Some(&highlighter::highlight)).is_ok() {
531+
Some((bufwtr, mdbuf))
532+
} else {
533+
None
534+
}
530535
};
531536

532537
// Try to print via the pager, pretty output if possible.

compiler/rustc_errors/src/markdown/mod.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,14 @@ impl<'a> MdStream<'a> {
1818
parse::entrypoint(s)
1919
}
2020

21-
/// Write formatted output to an anstream buffer
22-
pub fn write_anstream_buf(&self, buf: &mut Vec<u8>) -> io::Result<()> {
23-
term::entrypoint(self, buf)
21+
/// Write formatted output to a stdout buffer, optionally with
22+
/// a formatter for code blocks
23+
pub fn write_anstream_buf(
24+
&self,
25+
buf: &mut Vec<u8>,
26+
formatter: Option<&(dyn Fn(&str, &mut Vec<u8>) -> io::Result<()> + 'static)>,
27+
) -> io::Result<()> {
28+
term::entrypoint(self, buf, formatter)
2429
}
2530
}
2631

compiler/rustc_errors/src/markdown/term.rs

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,29 +12,33 @@ thread_local! {
1212
static CURSOR: Cell<usize> = const { Cell::new(0) };
1313
/// Width of the terminal
1414
static WIDTH: Cell<usize> = const { Cell::new(DEFAULT_COLUMN_WIDTH) };
15+
1516
}
1617

17-
/// Print to terminal output to a buffer
18-
pub(crate) fn entrypoint(stream: &MdStream<'_>, buf: &mut Vec<u8>) -> io::Result<()> {
19-
#[cfg(not(test))]
20-
if let Some((w, _)) = termize::dimensions() {
21-
WIDTH.set(std::cmp::min(w, DEFAULT_COLUMN_WIDTH));
22-
}
23-
write_stream(stream, buf, None, 0)?;
18+
/// Print to the terminal output to a buffer
19+
/// optionally with a formatter for code blocks
20+
pub(crate) fn entrypoint(
21+
stream: &MdStream<'_>,
22+
buf: &mut Vec<u8>,
23+
formatter: Option<&(dyn Fn(&str, &mut Vec<u8>) -> io::Result<()> + 'static)>,
24+
) -> io::Result<()> {
25+
write_stream(stream, buf, None, 0, formatter)?;
2426
buf.write_all(b"\n")
2527
}
26-
/// Write the buffer, reset to the default style after each
28+
29+
/// Write the buffer, reset to the default style after each,
30+
/// optionally with a formatter for code blocks
2731
fn write_stream(
2832
MdStream(stream): &MdStream<'_>,
2933
buf: &mut Vec<u8>,
34+
3035
default: Option<Style>,
3136
indent: usize,
37+
formatter: Option<&(dyn Fn(&str, &mut Vec<u8>) -> io::Result<()> + 'static)>,
3238
) -> io::Result<()> {
3339
for tt in stream {
34-
write_tt(tt, buf, default, indent)?;
40+
write_tt(tt, buf, default, indent, formatter)?;
3541
}
36-
reset_opt_style(buf, default)?;
37-
3842
Ok(())
3943
}
4044

@@ -43,12 +47,17 @@ fn write_tt(
4347
buf: &mut Vec<u8>,
4448
default: Option<Style>,
4549
indent: usize,
50+
formatter: Option<&(dyn Fn(&str, &mut Vec<u8>) -> io::Result<()> + 'static)>,
4651
) -> io::Result<()> {
4752
match tt {
4853
MdTree::CodeBlock { txt, lang: _ } => {
4954
reset_opt_style(buf, default)?;
50-
let style = Style::new().effects(Effects::DIMMED);
51-
write!(buf, "{style}{txt}{style:#}")?;
55+
if let Some(formatter) = formatter {
56+
formatter(txt, buf)?;
57+
} else {
58+
let style = Style::new().effects(Effects::DIMMED);
59+
write!(buf, "{style}{txt}{style:#}")?;
60+
}
5261
render_opt_style(buf, default)?;
5362
}
5463
MdTree::CodeInline(txt) => {
@@ -105,20 +114,20 @@ fn write_tt(
105114
};
106115
reset_opt_style(buf, default)?;
107116
write!(buf, "{cs}")?;
108-
write_stream(stream, buf, Some(cs), 0)?;
117+
write_stream(stream, buf, Some(cs), 0, None)?;
109118
write!(buf, "{cs:#}")?;
110119
render_opt_style(buf, default)?;
111120
buf.write_all(b"\n")?;
112121
}
113122
MdTree::OrderedListItem(n, stream) => {
114123
let base = format!("{n}. ");
115124
write_wrapping(buf, &format!("{base:<4}"), indent, None, None)?;
116-
write_stream(stream, buf, None, indent + 4)?;
125+
write_stream(stream, buf, None, indent + 4, None)?;
117126
}
118127
MdTree::UnorderedListItem(stream) => {
119128
let base = "* ";
120129
write_wrapping(buf, &format!("{base:<4}"), indent, None, None)?;
121-
write_stream(stream, buf, None, indent + 4)?;
130+
write_stream(stream, buf, None, indent + 4, None)?;
122131
}
123132
// Patterns popped in previous step
124133
MdTree::Comment(_) | MdTree::LinkDef { .. } | MdTree::RefLink { .. } => unreachable!(),

compiler/rustc_errors/src/markdown/tests/output.stdout

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
1-
H1 Heading ]8;;http://docs.rs\with a link]8;;\
1+
H1 Heading ]8;;http://docs.rs\with a link]8;;\
22
H1 content: some words in bold and so does inline code
33

4-
H2 Heading
4+
H2 Heading
55
H2 content: some words in italic
66

7-
H3 Heading
7+
H3 Heading
88
H3 content: strikethrough text
99

10-
H4 Heading
10+
H4 Heading
1111
H4 content: A ]8;;https://docs.rs\simple link]8;;\ and a ]8;;http://docs.rs\remote-link]8;;\.
1212
--------------------------------------------------------------------------------------------------------------------------------------------
1313
A section break was above. We can also do paragraph breaks:
@@ -24,7 +24,7 @@ Or ordered:
2424
elit quam, pulvinar ac risus in, dictum vehicula turpis. Vestibulum neque est, accumsan in cursus sit amet, dictum a nunc. Suspendisse
2525
aliquet, lorem eu eleifend accumsan, magna neque sodales nisi, a aliquet lectus leo eu sem.
2626
--------------------------------------------------------------------------------------------------------------------------------------------
27-
Code
27+
Code
2828
Both inline code and code blocks are supported:
2929

3030
/// A rust enum

compiler/rustc_errors/src/markdown/tests/term.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ fn test_output() {
6565
let bless = std::env::var_os("RUSTC_BLESS").is_some_and(|v| v != "0");
6666
let ast = MdStream::parse_str(INPUT);
6767
let mut buffer = Vec::new();
68-
ast.write_anstream_buf(&mut buffer).unwrap();
68+
ast.write_anstream_buf(&mut buffer, None).unwrap();
6969

7070
let mut blessed = PathBuf::new();
7171
blessed.extend(OUTPUT_PATH);
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//@ run-pass
2+
//@ check-run-results
3+
4+
#![feature(rustc_private)]
5+
use std::io::Write;
6+
extern crate rustc_driver;
7+
extern crate rustc_driver_impl;
8+
9+
use rustc_driver_impl::highlighter::highlight;
10+
11+
const TEST_INPUT: &str = "
12+
struct Foo;
13+
14+
fn baz(x: i32) {
15+
// A function
16+
}
17+
18+
fn main() {
19+
let foo = Foo;
20+
foo.bar();
21+
}
22+
";
23+
24+
fn main() {
25+
let mut buf = Vec::new();
26+
highlight(TEST_INPUT, &mut buf).unwrap();
27+
let mut stdout = std::io::stdout();
28+
stdout.write_all(&buf).unwrap();
29+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+

2+
struct Foo;
3+
4+
fn baz(x: i32) {
5+
// A function
6+
}
7+
8+
fn main() {
9+
let foo = Foo;
10+
foo.bar();
11+
}
12+


0 commit comments

Comments
 (0)