diff --git a/tabulate/__init__.py b/tabulate/__init__.py index e100c09..909293c 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -1638,7 +1638,13 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): return rows, headers, headers_pad -def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long_words=_BREAK_LONG_WORDS, break_on_hyphens=_BREAK_ON_HYPHENS): +def _wrap_text_to_colwidths( + list_of_lists, + colwidths, + numparses=True, + break_long_words=_BREAK_LONG_WORDS, + break_on_hyphens=_BREAK_ON_HYPHENS, +): if len(list_of_lists): num_cols = len(list_of_lists[0]) else: @@ -1655,7 +1661,11 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long continue if width is not None: - wrapper = _CustomTextWrap(width=width, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens) + wrapper = _CustomTextWrap( + width=width, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, + ) casted_cell = str(cell) wrapped = [ "\n".join(wrapper.wrap(line)) @@ -2258,7 +2268,11 @@ def tabulate( numparses = _expand_numparse(disable_numparse, num_cols) list_of_lists = _wrap_text_to_colwidths( - list_of_lists, maxcolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens + list_of_lists, + maxcolwidths, + numparses=numparses, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, ) if maxheadercolwidths is not None: @@ -2272,7 +2286,11 @@ def tabulate( numparses = _expand_numparse(disable_numparse, num_cols) headers = _wrap_text_to_colwidths( - [headers], maxheadercolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens + [headers], + maxheadercolwidths, + numparses=numparses, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, )[0] # empty values in the first column of RST tables should be escaped (issue #82) @@ -2737,15 +2755,17 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): space_left = width - cur_len # If we're allowed to break long words, then do so: put as much - # of the next chunk onto the current line as will fit. - if self.break_long_words: + # of the next chunk onto the current line as will fit. Be careful + # of empty chunks after ANSI codes removed. + chunk = reversed_chunks[-1] + chunk_noansi = _strip_ansi(chunk) + if self.break_long_words and chunk_noansi: # Tabulate Custom: Build the string up piece-by-piece in order to # take each charcter's width into account - chunk = reversed_chunks[-1] - i = 1 # Only count printable characters, so strip_ansi first, index later. - while len(_strip_ansi(chunk)[:i]) <= space_left: - i = i + 1 + for i in range(1, len(chunk_noansi) + 1): + if self._len(chunk_noansi[:i]) > space_left: + break # Consider escape codes when breaking words up total_escape_len = 0 last_group = 0 diff --git a/test/test_textwrapper.py b/test/test_textwrapper.py index 46dd818..c0aa4c6 100644 --- a/test/test_textwrapper.py +++ b/test/test_textwrapper.py @@ -176,6 +176,44 @@ def test_wrap_color_line_longword(): assert_equal(expected, result) +def test_wrap_color_line_longword_zerowidth(): + """Lines with zero-width symbols (accents) must include those symbols with the prior symbol. + Let's exercise the calculation where the available symbols never satisfy the available width, + and ensure chunk calculation succeeds and ANSI colors are maintained. + + Most combining marks combine with the preceding character (even in right-to-left alphabets): + - "e\u0301" → "é" (e + combining acute accent) + - "a\u0308" → "ä" (a + combining diaeresis) + - "n\u0303" → "ñ" (n + combining tilde) + Enclosing Marks: Some combining marks enclose the base character: + - "A\u20DD" → Ⓐ Combining enclosing circle + Multiple Combining Marks: You can stack multiple combining marks on a single base character: + - "e\u0301\u0308" → e with both acute accent and diaeresis + Zero width space → "ab" with a : + - "a\u200Bb" + + """ + try: + import wcwidth # noqa + except ImportError: + skip("test_wrap_wide_char is skipped") + + # Exactly filled, with a green zero-width segment at the end. + data = ( + "This_is_A\u20DD_\033[31mte\u0301st_string_\u200b" + "to_te\u0301\u0308st_a\u0308ccent\033[32m\u200b\033[0m" + ) + + expected = [ + "This_is_A\u20DD_\033[31mte\u0301\033[0m", + "\033[31mst_string_\u200bto\033[0m", + "\033[31m_te\u0301\u0308st_a\u0308ccent\033[32m\u200b\033[0m", + ] + wrapper = CTW(width=12) + result = wrapper.wrap(data) + assert_equal(expected, result) + + def test_wrap_color_line_multiple_escapes(): data = "012345(\x1b[32ma\x1b[0mbc\x1b[32mdefghij\x1b[0m)" expected = [