Skip to content

Commit 87e7ca2

Browse files
committed
refinements, and tests
1 parent 646aff8 commit 87e7ca2

File tree

2 files changed

+29
-10
lines changed

2 files changed

+29
-10
lines changed

rich/cells.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -161,14 +161,19 @@ def _cell_len(text: str, unicode_version: str) -> int:
161161
def split_graphemes(
162162
text: str, unicode_version: str = "auto"
163163
) -> "tuple[list[CellSpan], int]":
164-
"""Divide text into spans that define a single grapheme.
164+
"""Divide text into spans that define a single grapheme, and additonally return the cell length of the whole string.
165+
166+
The returned spans will cover every index in the string, with no gaps. It is possible for some graphemes to have a cell length of zero.
167+
This can occur for nonsense strings like two zero width joiners, or for control codes that don't contribute to the grapheme size.
165168
166169
Args:
167170
text: String to split.
168171
unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.
169172
170173
Returns:
171-
List of spans.
174+
A tuple of a list of *spans* and the cell length of the entire string. A span is a list of tuples
175+
of three values consisting of (<START>, <END>, <CELL LENGTH>), where START and END are string indices,
176+
and CELL LENGTH is the cell length of the single grapheme.
172177
"""
173178

174179
cell_table = load_cell_table(unicode_version)
@@ -182,26 +187,32 @@ def split_graphemes(
182187
while index < codepoint_count:
183188
if (character := text[index]) in SPECIAL:
184189
if not spans:
190+
# ZWJ or variation selector at the beginning of the string doesn't really make sense.
191+
# But handle it, we must.
185192
spans.append((index, index := index + 1, 0))
186193
continue
187194
if character == "\u200d":
188195
# zero width joiner
189-
index += 1
190-
if index < codepoint_count:
191-
index += 1
192-
if spans:
193-
start, _end, cell_length = spans[-1]
194-
spans[-1] = (start, index, cell_length)
195-
elif last_measured_character:
196+
# The condition handles the case where a ZWJ is at the end of the string, and has nothing to join
197+
index += 2 if index < (codepoint_count - 1) else 1
198+
start, _end, cell_length = spans[-1]
199+
spans[-1] = (start, index, cell_length)
200+
else:
196201
# variation selector 16
197202
index += 1
198-
if spans:
203+
if last_measured_character:
199204
start, _end, cell_length = spans[-1]
200205
if last_measured_character in cell_table.narrow_to_wide:
201206
last_measured_character = None
202207
cell_length += 1
203208
total_width += 1
204209
spans[-1] = (start, index, cell_length)
210+
else:
211+
# No previous character to change the size of.
212+
# Shouldn't occur in practice.
213+
# But handle it, we must.
214+
start, _end, cell_length = spans[-1]
215+
spans[-1] = (start, index, cell_length)
205216
continue
206217

207218
if character_width := get_character_cell_size(character, unicode_version):

tests/test_cells.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,12 @@ def test_chop_cells_mixed_width():
140140
("", []),
141141
("\x1b", []),
142142
("\x1b\x1b", []),
143+
("\x1b\x1b\x1b", []),
144+
("\x1b\x1b\x1b\x1b", []),
143145
],
144146
)
145147
def test_chop_cells_zero_width(text: str, expected: list) -> None:
148+
"""Test zer width characters being chopped."""
146149
assert chop_cells(text, 3) == expected
147150

148151

@@ -191,6 +194,11 @@ def test_is_single_cell_widths() -> None:
191194
[(0, 1, 0)],
192195
0,
193196
), # Variation selector 16, without anything to change should have zero width
197+
(
198+
"\ufe0f\ufe0f",
199+
[(0, 2, 0)],
200+
0,
201+
), # 2 X variation selector 16, without anything to change should have zero width
194202
(
195203
"\u200d",
196204
[(0, 1, 0)],

0 commit comments

Comments
 (0)