astral-sh
diff --git a/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎crates/ruff_db/Cargo.toml‎
Lines changed: 1 addition & 0 deletions b/‎crates/ruff_db/Cargo.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎crates/ruff_db/src/diagnostic/mod.rs‎
Lines changed: 3 additions & 1 deletion b/‎crates/ruff_db/src/diagnostic/mod.rs‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎crates/ruff_db/src/diagnostic/render.rs‎
Lines changed: 237 additions & 5 deletions b/‎crates/ruff_db/src/diagnostic/render.rs‎
Lines changed: 237 additions & 5 deletions
@@ -42,6 +42,7 @@ serde_json = { workspace = true, optional = true }
 thiserror = { workspace = true }
 tracing = { workspace = true }
 tracing-subscriber = { workspace = true, optional = true }
+unicode-width = { workspace = true }
 zip = { workspace = true }
 
 [target.'cfg(target_arch="wasm32")'.dependencies]
 
@@ -6,7 +6,9 @@ use ruff_source_file::{LineColumn, SourceCode, SourceFile};
 use ruff_annotate_snippets::Level as AnnotateLevel;
 use ruff_text_size::{Ranged, TextRange, TextSize};
 
-pub use self::render::{DisplayDiagnostic, DisplayDiagnostics, FileResolver, Input};
+pub use self::render::{
+    DisplayDiagnostic, DisplayDiagnostics, FileResolver, Input, ceil_char_boundary,
+};
 use crate::{Db, files::File};
 
 mod render;
 
@@ -1,3 +1,4 @@
+use std::borrow::Cow;
 use std::collections::BTreeMap;
 use std::path::Path;
 
@@ -7,7 +8,7 @@ use ruff_annotate_snippets::{
 };
 use ruff_notebook::{Notebook, NotebookIndex};
 use ruff_source_file::{LineIndex, OneIndexed, SourceCode};
-use ruff_text_size::{TextRange, TextSize};
+use ruff_text_size::{TextLen, TextRange, TextSize};
 
 use crate::diagnostic::stylesheet::DiagnosticStylesheet;
 use crate::{
@@ -520,7 +521,7 @@ impl<'r> RenderableSnippets<'r> {
 #[derive(Debug)]
 struct RenderableSnippet<'r> {
     /// The actual snippet text.
-    snippet: &'r str,
+    snippet: Cow<'r, str>,
     /// The absolute line number corresponding to where this
     /// snippet begins.
     line_start: OneIndexed,
@@ -580,6 +581,13 @@ impl<'r> RenderableSnippet<'r> {
             .iter()
             .map(|ann| RenderableAnnotation::new(snippet_start, ann))
             .collect();
+
+        let EscapedSourceCode {
+            text: snippet,
+            annotations,
+        } = replace_whitespace_and_unprintable(snippet, annotations)
+            .fix_up_empty_spans_after_line_terminator();
+
         RenderableSnippet {
             snippet,
             line_start,
@@ -590,7 +598,7 @@ impl<'r> RenderableSnippet<'r> {
 
     /// Convert this to an "annotate" snippet.
     fn to_annotate<'a>(&'a self, path: &'a str) -> AnnotateSnippet<'a> {
-        AnnotateSnippet::source(self.snippet)
+        AnnotateSnippet::source(&self.snippet)
             .origin(path)
             .line_start(self.line_start.get())
             .annotations(
@@ -820,6 +828,230 @@ fn relativize_path<'p>(cwd: &SystemPath, path: &'p str) -> &'p str {
     path
 }
 
+/// Given some source code and annotation ranges, this routine replaces tabs
+/// with ASCII whitespace, and unprintable characters with printable
+/// representations of them.
+///
+/// The source code and annotations returned are updated to reflect changes made
+/// to the source code (if any).
+fn replace_whitespace_and_unprintable<'r>(
+    source: &'r str,
+    mut annotations: Vec<RenderableAnnotation<'r>>,
+) -> EscapedSourceCode<'r> {
+    // Updates the annotation ranges given by the caller whenever a single byte (at `index` in
+    // `source`) is replaced with `len` bytes.
+    //
+    // When the index occurs before the start of the range, the range is
+    // offset by `len`. When the range occurs after or at the start but before
+    // the end, then the end of the range only is offset by `len`.
+    let mut update_ranges = |index: usize, len: u32| {
+        for ann in &mut annotations {
+            if index < usize::from(ann.range.start()) {
+                ann.range += TextSize::new(len - 1);
+            } else if index < usize::from(ann.range.end()) {
+                ann.range = ann.range.add_end(TextSize::new(len - 1));
+            }
+        }
+    };
+
+    // If `c` is an unprintable character, then this returns a printable
+    // representation of it (using a fancier Unicode codepoint).
+    let unprintable_replacement = |c: char| -> Option<char> {
+        match c {
+            '\x07' => Some('␇'),
+            '\x08' => Some('␈'),
+            '\x1b' => Some('␛'),
+            '\x7f' => Some('␡'),
+            _ => None,
+        }
+    };
+
+    const TAB_SIZE: usize = 4;
+    let mut width = 0;
+    let mut column = 0;
+    let mut last_end = 0;
+    let mut result = String::new();
+    for (index, c) in source.char_indices() {
+        let old_width = width;
+        match c {
+            '\n' | '\r' => {
+                width = 0;
+                column = 0;
+            }
+            '\t' => {
+                let tab_offset = TAB_SIZE - (column % TAB_SIZE);
+                width += tab_offset;
+                column += tab_offset;
+
+                let tab_width =
+                    u32::try_from(width - old_width).expect("small width because of tab size");
+                result.push_str(&source[last_end..index]);
+
+                update_ranges(result.text_len().to_usize(), tab_width);
+
+                for _ in 0..tab_width {
+                    result.push(' ');
+                }
+                last_end = index + 1;
+            }
+            _ => {
+                width += unicode_width::UnicodeWidthChar::width(c).unwrap_or(0);
+                column += 1;
+
+                if let Some(printable) = unprintable_replacement(c) {
+                    result.push_str(&source[last_end..index]);
+
+                    let len = printable.text_len().to_u32();
+                    update_ranges(result.text_len().to_usize(), len);
+
+                    result.push(printable);
+                    last_end = index + 1;
+                }
+            }
+        }
+    }
+
+    // No tabs or unprintable chars
+    if result.is_empty() {
+        EscapedSourceCode {
+            annotations,
+            text: Cow::Borrowed(source),
+        }
+    } else {
+        result.push_str(&source[last_end..]);
+        EscapedSourceCode {
+            annotations,
+            text: Cow::Owned(result),
+        }
+    }
+}
+
+struct EscapedSourceCode<'r> {
+    text: Cow<'r, str>,
+    annotations: Vec<RenderableAnnotation<'r>>,
+}
+
+impl<'r> EscapedSourceCode<'r> {
+    // This attempts to "fix up" the spans on each annotation  in the case where
+    // it's an empty span immediately following a line terminator.
+    //
+    // At present, `annotate-snippets` (both upstream and our vendored copy)
+    // will render annotations of such spans to point to the space immediately
+    // following the previous line. But ideally, this should point to the space
+    // immediately preceding the next line.
+    //
+    // After attempting to fix `annotate-snippets` and giving up after a couple
+    // hours, this routine takes a different tact: it adjusts the span to be
+    // non-empty and it will cover the first codepoint of the following line.
+    // This forces `annotate-snippets` to point to the right place.
+    //
+    // See also: <https://github.com/astral-sh/ruff/issues/15509> and
+    // `ruff_linter::message::text::SourceCode::fix_up_empty_spans_after_line_terminator`,
+    // from which this was adapted.
+    fn fix_up_empty_spans_after_line_terminator(mut self) -> EscapedSourceCode<'r> {
+        for ann in &mut self.annotations {
+            let range = ann.range;
+            if !range.is_empty()
+                || range.start() == TextSize::from(0)
+                || range.start() >= self.text.text_len()
+            {
+                continue;
+            }
+            if !matches!(
+                self.text.as_bytes()[range.start().to_usize() - 1],
+                b'\n' | b'\r'
+            ) {
+                continue;
+            }
+            let start = range.start();
+            let end = ceil_char_boundary(&self.text, start + TextSize::from(1));
+            ann.range = TextRange::new(start, end);
+        }
+
+        self
+    }
+}
+
+/// Finds the closest [`TextSize`] not less than the offset given for which
+/// `is_char_boundary` is `true`. Unless the offset given is greater than
+/// the length of the underlying contents, in which case, the length of the
+/// contents is returned.
+///
+/// Can be replaced with `str::ceil_char_boundary` once it's stable.
+///
+/// # Examples
+///
+/// From `std`:
+///
+/// ```
+/// use ruff_db::diagnostic::ceil_char_boundary;
+/// use ruff_text_size::{Ranged, TextLen, TextSize};
+///
+/// let source = "❤️🧡💛💚💙💜";
+/// assert_eq!(source.text_len(), TextSize::from(26));
+/// assert!(!source.is_char_boundary(13));
+///
+/// let closest = ceil_char_boundary(source, TextSize::from(13));
+/// assert_eq!(closest, TextSize::from(14));
+/// assert_eq!(&source[..closest.to_usize()], "❤️🧡💛");
+/// ```
+///
+/// Additional examples:
+///
+/// ```
+/// use ruff_db::diagnostic::ceil_char_boundary;
+/// use ruff_text_size::{Ranged, TextRange, TextSize};
+///
+/// let source = "Hello";
+///
+/// assert_eq!(
+///     ceil_char_boundary(source, TextSize::from(0)),
+///     TextSize::from(0)
+/// );
+///
+/// assert_eq!(
+///     ceil_char_boundary(source, TextSize::from(5)),
+///     TextSize::from(5)
+/// );
+///
+/// assert_eq!(
+///     ceil_char_boundary(source, TextSize::from(6)),
+///     TextSize::from(5)
+/// );
+///
+/// let source = "α";
+///
+/// assert_eq!(
+///     ceil_char_boundary(source, TextSize::from(0)),
+///     TextSize::from(0)
+/// );
+///
+/// assert_eq!(
+///     ceil_char_boundary(source, TextSize::from(1)),
+///     TextSize::from(2)
+/// );
+///
+/// assert_eq!(
+///     ceil_char_boundary(source, TextSize::from(2)),
+///     TextSize::from(2)
+/// );
+///
+/// assert_eq!(
+///     ceil_char_boundary(source, TextSize::from(3)),
+///     TextSize::from(2)
+/// );
+/// ```
+pub fn ceil_char_boundary(text: &str, offset: TextSize) -> TextSize {
+    let upper_bound = offset
+        .to_u32()
+        .saturating_add(4)
+        .min(text.text_len().to_u32());
+    (offset.to_u32()..upper_bound)
+        .map(TextSize::from)
+        .find(|offset| text.is_char_boundary(offset.to_usize()))
+        .unwrap_or_else(|| TextSize::from(upper_bound))
+}
+
 #[cfg(test)]
 mod tests {
 
@@ -2359,7 +2591,7 @@ watermelon
         }
 
         /// Returns a builder for tersely constructing diagnostics.
-        fn builder(
+        pub(super) fn builder(
             &mut self,
             identifier: &'static str,
             severity: Severity,
@@ -2426,7 +2658,7 @@ watermelon
         ///
         /// See the docs on `TestEnvironment::span` for the meaning of
         /// `path`, `line_offset_start` and `line_offset_end`.
-        fn primary(
+        pub(super) fn primary(
             mut self,
             path: &str,
             line_offset_start: &str,