refinements, and tests

willmcgugan · willmcgugan · commit 87e7ca27d0f8 · 2026-02-19T16:44:19.000Z
diff --git a/rich/cells.py b/rich/cells.py
@@ -161,14 +161,19 @@ def _cell_len(text: str, unicode_version: str) -> int:
 def split_graphemes(
     text: str, unicode_version: str = "auto"
 ) -> "tuple[list[CellSpan], int]":
-    """Divide text into spans that define a single grapheme.
+    """Divide text into spans that define a single grapheme, and additonally return the cell length of the whole string.
+
+    The returned spans will cover every index in the string, with no gaps. It is possible for some graphemes to have a cell length of zero.
+    This can occur for nonsense strings like two zero width joiners, or for control codes that don't contribute to the grapheme size.
 
     Args:
         text: String to split.
         unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.
 
     Returns:
-        List of spans.
+        A tuple of a list of *spans* and the cell length of the entire string. A span is a list of tuples
+            of three values consisting of (<START>, <END>, <CELL LENGTH>), where START and END are string indices,
+            and CELL LENGTH is the cell length of the single grapheme.
     """
 
     cell_table = load_cell_table(unicode_version)
@@ -182,26 +187,32 @@ def split_graphemes(
     while index < codepoint_count:
         if (character := text[index]) in SPECIAL:
             if not spans:
+                # ZWJ or variation selector at the beginning of the string doesn't really make sense.
+                # But handle it, we must.
                 spans.append((index, index := index + 1, 0))
                 continue
             if character == "\u200d":
                 # zero width joiner
-                index += 1
-                if index < codepoint_count:
-                    index += 1
-                if spans:
-                    start, _end, cell_length = spans[-1]
-                    spans[-1] = (start, index, cell_length)
-            elif last_measured_character:
+                # The condition handles the case where a ZWJ is at the end of the string, and has nothing to join
+                index += 2 if index < (codepoint_count - 1) else 1
+                start, _end, cell_length = spans[-1]
+                spans[-1] = (start, index, cell_length)
+            else:
                 # variation selector 16
                 index += 1
-                if spans:
+                if last_measured_character:
                     start, _end, cell_length = spans[-1]
                     if last_measured_character in cell_table.narrow_to_wide:
                         last_measured_character = None
                         cell_length += 1
                         total_width += 1
                     spans[-1] = (start, index, cell_length)
+                else:
+                    # No previous character to change the size of.
+                    # Shouldn't occur in practice.
+                    # But handle it, we must.
+                    start, _end, cell_length = spans[-1]
+                    spans[-1] = (start, index, cell_length)
             continue
 
         if character_width := get_character_cell_size(character, unicode_version):
diff --git a/tests/test_cells.py b/tests/test_cells.py
@@ -140,9 +140,12 @@ def test_chop_cells_mixed_width():
         ("", []),
         ("\x1b", []),
         ("\x1b\x1b", []),
+        ("\x1b\x1b\x1b", []),
+        ("\x1b\x1b\x1b\x1b", []),
     ],
 )
 def test_chop_cells_zero_width(text: str, expected: list) -> None:
+    """Test zer width characters being chopped."""
     assert chop_cells(text, 3) == expected
 
 
@@ -191,6 +194,11 @@ def test_is_single_cell_widths() -> None:
             [(0, 1, 0)],
             0,
         ),  # Variation selector 16, without anything to change should have zero width
+        (
+            "\ufe0f\ufe0f",
+            [(0, 2, 0)],
+            0,
+        ),  # 2 X variation selector 16, without anything to change should have zero width
         (
             "\u200d",
             [(0, 1, 0)],