astral-sh · ntBre · Jul 29, 2025 · Jul 24, 2025 · Jul 24, 2025 · Jul 25, 2025
diff --git a/Cargo.lock b/Cargo.lock
@@ -42,6 +42,7 @@ serde_json = { workspace = true, optional = true }
 thiserror = { workspace = true }
 tracing = { workspace = true }
 tracing-subscriber = { workspace = true, optional = true }
+unicode-width = { workspace = true }
 zip = { workspace = true }
 
 [target.'cfg(target_arch="wasm32")'.dependencies]

@@ -6,7 +6,9 @@ use ruff_source_file::{LineColumn, SourceCode, SourceFile};
 use ruff_annotate_snippets::Level as AnnotateLevel;
 use ruff_text_size::{Ranged, TextRange, TextSize};
 
-pub use self::render::{DisplayDiagnostic, DisplayDiagnostics, FileResolver, Input};
+pub use self::render::{
+    DisplayDiagnostic, DisplayDiagnostics, FileResolver, Input, ceil_char_boundary,
+};
 use crate::{Db, files::File};
 
 mod render;

@@ -1,3 +1,4 @@
+use std::borrow::Cow;
 use std::collections::BTreeMap;
 use std::path::Path;
 
@@ -7,7 +8,7 @@ use ruff_annotate_snippets::{
 };
 use ruff_notebook::{Notebook, NotebookIndex};
 use ruff_source_file::{LineIndex, OneIndexed, SourceCode};
-use ruff_text_size::{TextRange, TextSize};
+use ruff_text_size::{TextLen, TextRange, TextSize};
 
 use crate::diagnostic::stylesheet::DiagnosticStylesheet;
 use crate::{
@@ -520,7 +521,7 @@ impl<'r> RenderableSnippets<'r> {
 #[derive(Debug)]
 struct RenderableSnippet<'r> {
     /// The actual snippet text.
-    snippet: &'r str,
+    snippet: Cow<'r, str>,
     /// The absolute line number corresponding to where this
     /// snippet begins.
     line_start: OneIndexed,
@@ -580,6 +581,13 @@ impl<'r> RenderableSnippet<'r> {
             .iter()
             .map(|ann| RenderableAnnotation::new(snippet_start, ann))
             .collect();
+
+        let EscapedSourceCode {
+            text: snippet,
+            annotations,
+        } = replace_whitespace_and_unprintable(snippet, annotations)
+            .fix_up_empty_spans_after_line_terminator();
+
         RenderableSnippet {
             snippet,
             line_start,
@@ -590,7 +598,7 @@ impl<'r> RenderableSnippet<'r> {
 
     /// Convert this to an "annotate" snippet.
     fn to_annotate<'a>(&'a self, path: &'a str) -> AnnotateSnippet<'a> {
-        AnnotateSnippet::source(self.snippet)
+        AnnotateSnippet::source(&self.snippet)
             .origin(path)
             .line_start(self.line_start.get())
             .annotations(
@@ -820,6 +828,248 @@ fn relativize_path<'p>(cwd: &SystemPath, path: &'p str) -> &'p str {
     path
 }
 
+/// A measure of the width of a line of text.
+#[derive(Clone, Copy, Default)]
+struct LineWidthBuilder {
+    /// The width of the line.
+    width: usize,
+    /// The column of the line.
+    /// This is used to calculate the width of tabs.
+    column: usize,
+}
+
+impl LineWidthBuilder {
+    fn get(&self) -> usize {
+        self.width
+    }
+
+    /// Adds the given character to the line width.
+    #[must_use]
+    fn add_char(mut self, c: char) -> Self {
+        const TAB_SIZE: usize = 4;
+        match c {
+            '\t' => {
+                let tab_offset = TAB_SIZE - (self.column % TAB_SIZE);
+                self.width += tab_offset;
+                self.column += tab_offset;
+            }
+            '\n' | '\r' => {
+                self.width = 0;
+                self.column = 0;
+            }
+            _ => {
+                self.width += unicode_width::UnicodeWidthChar::width(c).unwrap_or(0);
+                self.column += 1;
+            }
+        }
+        self
+    }
+}
+
+/// Given some source code and annotation ranges, this routine replaces tabs
+/// with ASCII whitespace, and unprintable characters with printable
+/// representations of them.
+///
+/// The source code and annotations returned are updated to reflect changes made
+/// to the source code (if any).
+fn replace_whitespace_and_unprintable<'r>(
+    source: &'r str,
+    mut annotations: Vec<RenderableAnnotation<'r>>,
+) -> EscapedSourceCode<'r> {
+    let mut result = String::new();
+    let mut last_end = 0;
+    let original_ranges: Vec<TextRange> = annotations.iter().map(|ann| ann.range).collect();
+    let mut line_width = LineWidthBuilder::default();
+
+    // Updates the annotation ranges given by the caller whenever a single byte (at `index` in
+    // `source`) is replaced with `len` bytes.
+    //
+    // When the index occurs before the start of the range, the range is
+    // offset by `len`. When the range occurs after or at the start but before
+    // the end, then the end of the range only is offset by `len`.
+    let mut update_ranges = |index: usize, len: u32| {
+        for (ann, &original_range) in annotations.iter_mut().zip(&original_ranges) {
+            if index < usize::from(original_range.start()) {
 fn replace_whitespace_and_unprintable(source: &str, annotation_range: TextRange) -> SourceCode { 
     let mut result = String::new(); 
     let mut last_end = 0; 
     let mut range = annotation_range; 
     let mut line_width = LineWidthBuilder::new(IndentWidth::default()); 
 fn replace_whitespace_and_unprintable(source: &str, annotation_range: TextRange) -> SourceCode { 
     let mut result = String::new(); 
     let mut last_end = 0; 
     let mut range = annotation_range; 
     let mut line_width = LineWidthBuilder::new(IndentWidth::default()); 
+                ann.range += TextSize::new(len - 1);
+            } else if index < usize::from(original_range.end()) {
+                ann.range = ann.range.add_end(TextSize::new(len - 1));
+            }
+        }
+    };
+
+    // If `c` is an unprintable character, then this returns a printable
+    // representation of it (using a fancier Unicode codepoint).
+    let unprintable_replacement = |c: char| -> Option<char> {
+        match c {
+            '\x07' => Some('␇'),
+            '\x08' => Some('␈'),
+            '\x1b' => Some('␛'),
+            '\x7f' => Some('␡'),
+            _ => None,
+        }
+    };
+
+    for (index, c) in source.char_indices() {
+        let old_width = line_width.get();
+        line_width = line_width.add_char(c);
+
+        if matches!(c, '\t') {
+            let tab_width = u32::try_from(line_width.get() - old_width)
+                .expect("small width because of tab size");
+            result.push_str(&source[last_end..index]);
+            for _ in 0..tab_width {
+                result.push(' ');
+            }
+            last_end = index + 1;
+            update_ranges(index, tab_width);
+        } else if let Some(printable) = unprintable_replacement(c) {
+            result.push_str(&source[last_end..index]);
+            result.push(printable);
+            last_end = index + 1;
+
+            let len = printable.text_len().to_u32();
+            update_ranges(index, len);
+        }
+    }
+
+    // No tabs or unprintable chars
+    if result.is_empty() {
+        EscapedSourceCode {
+            annotations,
+            text: Cow::Borrowed(source),
+        }
+    } else {
+        result.push_str(&source[last_end..]);
+        EscapedSourceCode {
+            annotations,
+            text: Cow::Owned(result),
+        }
+    }
+}
+
+struct EscapedSourceCode<'r> {
+    text: Cow<'r, str>,
+    annotations: Vec<RenderableAnnotation<'r>>,
+}
+
+impl<'r> EscapedSourceCode<'r> {
+    // This attempts to "fix up" the spans on each annotation  in the case where
+    // it's an empty span immediately following a line terminator.
+    //
+    // At present, `annotate-snippets` (both upstream and our vendored copy)
+    // will render annotations of such spans to point to the space immediately
+    // following the previous line. But ideally, this should point to the space
+    // immediately preceding the next line.
+    //
+    // After attempting to fix `annotate-snippets` and giving up after a couple
+    // hours, this routine takes a different tact: it adjusts the span to be
+    // non-empty and it will cover the first codepoint of the following line.
+    // This forces `annotate-snippets` to point to the right place.
+    //
+    // See also: <https://github.com/astral-sh/ruff/issues/15509> and
+    // `ruff_linter::message::text::SourceCode::fix_up_empty_spans_after_line_terminator`,
+    // from which this was adapted.
+    fn fix_up_empty_spans_after_line_terminator(mut self) -> EscapedSourceCode<'r> {
+        for ann in &mut self.annotations {
+            let range = ann.range;
+            if !range.is_empty()
+                || range.start() == TextSize::from(0)
+                || range.start() >= self.text.text_len()
+            {
+                continue;
+            }
+            if self.text.as_bytes()[range.start().to_usize() - 1] != b'\n' {
+                continue;
+            }
+            let start = range.start();
+            let end = ceil_char_boundary(&self.text, start + TextSize::from(1));
+            ann.range = TextRange::new(start, end);
+        }
+
+        self
+    }
+}
+
+/// Finds the closest [`TextSize`] not less than the offset given for which
+/// `is_char_boundary` is `true`. Unless the offset given is greater than
+/// the length of the underlying contents, in which case, the length of the
+/// contents is returned.
+///
+/// Can be replaced with `str::ceil_char_boundary` once it's stable.
+///
+/// # Examples
+///
+/// From `std`:
+///
+/// ```
+/// use ruff_db::diagnostic::ceil_char_boundary;
+/// use ruff_text_size::{Ranged, TextLen, TextSize};
+///
+/// let source = "❤️🧡💛💚💙💜";
+/// assert_eq!(source.text_len(), TextSize::from(26));
+/// assert!(!source.is_char_boundary(13));
+///
+/// let closest = ceil_char_boundary(source, TextSize::from(13));
+/// assert_eq!(closest, TextSize::from(14));
+/// assert_eq!(&source[..closest.to_usize()], "❤️🧡💛");
+/// ```
+///
+/// Additional examples:
+///
+/// ```
+/// use ruff_db::diagnostic::ceil_char_boundary;
+/// use ruff_text_size::{Ranged, TextRange, TextSize};
+///
+/// let source = "Hello";
+///
+/// assert_eq!(
+///     ceil_char_boundary(source, TextSize::from(0)),
+///     TextSize::from(0)
+/// );
+///
+/// assert_eq!(
+///     ceil_char_boundary(source, TextSize::from(5)),
+///     TextSize::from(5)
+/// );
+///
+/// assert_eq!(
+///     ceil_char_boundary(source, TextSize::from(6)),
+///     TextSize::from(5)
+/// );
+///
+/// let source = "α";
+///
+/// assert_eq!(
+///     ceil_char_boundary(source, TextSize::from(0)),
+///     TextSize::from(0)
+/// );
+///
+/// assert_eq!(
+///     ceil_char_boundary(source, TextSize::from(1)),
+///     TextSize::from(2)
+/// );
+///
+/// assert_eq!(
+///     ceil_char_boundary(source, TextSize::from(2)),
+///     TextSize::from(2)
+/// );
+///
+/// assert_eq!(
+///     ceil_char_boundary(source, TextSize::from(3)),
+///     TextSize::from(2)
+/// );
+/// ```
+pub fn ceil_char_boundary(text: &str, offset: TextSize) -> TextSize {
+    let upper_bound = offset
+        .to_u32()
+        .saturating_add(4)
+        .min(text.text_len().to_u32());
+    (offset.to_u32()..upper_bound)
+        .map(TextSize::from)
+        .find(|offset| text.is_char_boundary(offset.to_usize()))
+        .unwrap_or_else(|| TextSize::from(upper_bound))
+}
+
 #[cfg(test)]
 mod tests {