Vendor wcwidth using python's unicodedata

deathaxe · deathaxe · commit 78b998eae893 · 2025-04-25T11:37:09.000+02:00
This commit replaces wcwidth dependency by a simple vendored module, leveraging
python's built-in unicodedata.

Notes:

1. `wcwidth()` function, provided by wcwidth library, is already decorated
   with `lru_cache(100)`. Hence following line wraps lru-cached function into
   another duplicated lru-cache layer, which may cause significant overhead.

        wcwidth: Callable[[str], int] = lru_cache(maxsize=4096)(_wcwidth)

2. performance of vendored `wcwidth()` function is more or less equal to that
   provided by `wcwidth` package.

3. this change turns pyte into a self-contained library.

4. only possible downside is supported unicode version being bound/limited to
   that of used python interpreter. But that's probably rather minor as the
   interpreter wouldn't be able to decode more recent unicode chars anyway.

Benchmarks:

    &gt;&gt;&gt; from timeit import timeit
    &gt;&gt;&gt; from wcwidth import wcswidth as wcswidth1
    &gt;&gt;&gt; from pyte.wcwidth import wcswidth2
    &gt;&gt;&gt; s = "开源的计算机代数系统 Maxima 是用于操纵符号和数值表达式的系统"
    &gt;&gt;&gt; timeit(lambda: wcswidth1(s))
    7.851543699999999
    &gt;&gt;&gt; timeit(lambda: wcswidth2(s))
    3.857342599999999

Credits:

The implementation is borrowed from pytest and slightly tweaked.
diff --git a/pyproject.toml b/pyproject.toml
@@ -46,9 +46,6 @@ classifiers = [
     "Programming Language :: Python :: 3.13",
     "Topic :: Terminals :: Terminal Emulators/X Terminals",
 ]
-dependencies = [
-    "wcwidth",
-]
 
 [project.urls]
 Homepage = "https://github.com/selectel/pyte"
diff --git a/pyte/screens.py b/pyte/screens.py
@@ -38,17 +38,15 @@
 from typing import Any, Dict, List, NamedTuple, Optional, Set, TextIO, TypeVar
 from collections.abc import Callable, Generator, Sequence
 
-from wcwidth import wcwidth as _wcwidth  # type: ignore[import-untyped]
-
 from . import (
     charsets as cs,
     control as ctrl,
     graphics as g,
     modes as mo
 )
 from .streams import Stream
+from .wcwidth import wcwidth
 
-wcwidth: Callable[[str], int] = lru_cache(maxsize=4096)(_wcwidth)
 
 KT = TypeVar("KT")
 VT = TypeVar("VT")
diff --git a/pyte/wcwidth.py b/pyte/wcwidth.py
@@ -0,0 +1,65 @@
+from unicodedata import category, east_asian_width, normalize
+from functools import lru_cache
+
+
+@lru_cache(4096)
+def wcwidth(c: str) -> int:
+    """
+    Determine how many columns are needed to display a character in a terminal.
+
+    :param c:
+        A character to determine required columns for.
+
+    :returns:
+        -1 if the character is not printable.
+        0, 1 or 2 for other characters.
+    """
+    o = ord(c)
+
+    # ASCII fast path.
+    if 0x20 <= o < 0x07F:
+        return 1
+
+    # Some Cf/Zp/Zl characters which should be zero-width.
+    if (
+        o == 0x0000
+        or 0x200B <= o <= 0x200F
+        or 0x2028 <= o <= 0x202E
+        or 0x2060 <= o <= 0x2063
+    ):
+        return 0
+
+    cat = category(c)
+
+    # Control characters.
+    if cat == "Cc":
+        return -1
+
+    # Combining characters with zero width.
+    if cat in ("Me", "Mn"):
+        return 0
+
+    # Full/Wide east asian characters.
+    if east_asian_width(c) in ("F", "W"):
+        return 2
+
+    return 1
+
+
+def wcswidth(s: str) -> int:
+    """
+    Determine how many columns are needed to display a string in a terminal.
+
+    :param s:
+        String to determine required columns for.
+
+    :returns:
+        -1 if the string contains non-printable characters.
+    """
+    width = 0
+    for c in normalize("NFC", s):
+        wc = wcwidth(c)
+        if wc < 0:
+            return -1
+        width += wc
+    return width

Original file line number	Diff line number	Diff line change
`@@ -46,9 +46,6 @@ classifiers = [`
`46`	`46`	`"Programming Language :: Python :: 3.13",`
`47`	`47`	`"Topic :: Terminals :: Terminal Emulators/X Terminals",`
`48`	`48`	`]`
`49`		`-dependencies = [`
`50`		`- "wcwidth",`
`51`		`-]`
`52`	`49`
`53`	`50`	`[project.urls]`
`54`	`51`	`Homepage = "https://github.com/selectel/pyte"`