From 8c7f59c19acbf97fb8380a2458a518c9c95729f4 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Wed, 14 Feb 2024 15:00:33 -0500 Subject: [PATCH 1/8] Variation Selector 15 (VS-15, U+FE0E) support. I did a few spot checks of VS-15 when implementing VS-16, and erroneously believed that all emojis in VS-15 sequences were already listed as an EAW width of 1. But that's not true. There are several emojis that are "wide" that are changed to "narrow" with VS-15. --- bin/update-tables.py | 70 +++++++++++++++++++---- bin/verify-table-integrity.py | 4 +- docs/intro.rst | 3 + docs/specs.rst | 4 ++ tests/test_emojis.py | 66 +++++++++++++++++++++- tox.ini | 6 +- wcwidth/__init__.py | 12 ++-- wcwidth/table_vs15.py | 103 ++++++++++++++++++++++++++++++++++ wcwidth/wcwidth.py | 12 ++++ 9 files changed, 258 insertions(+), 22 deletions(-) create mode 100644 wcwidth/table_vs15.py diff --git a/bin/update-tables.py b/bin/update-tables.py index fc85b7c..bb5cb88 100644 --- a/bin/update-tables.py +++ b/bin/update-tables.py @@ -433,19 +433,22 @@ def fetch_table_vs16_data() -> UnicodeTableRenderCtx: """ table: dict[UnicodeVersion, TableDef] = {} unicode_latest = fetch_unicode_versions()[-1] + hex_str_vs = 'FE0F' wide_tables = fetch_table_wide_data().table unicode_version = UnicodeVersion.parse('9.0.0') # parse table formatted by the latest emoji release (developed with # 15.1.0) and parse a single file for all individual releases - table[unicode_version] = parse_vs16_data(fname=UnicodeDataFile.EmojiVariationSequences(unicode_latest), - ubound_unicode_version=unicode_version) + table[unicode_version] = parse_vs_data(fname=UnicodeDataFile.EmojiVariationSequences(unicode_latest), + ubound_unicode_version=unicode_version, + hex_str_vs=hex_str_vs) # parse and join the final emoji release 12.0 of the earlier "type" table[unicode_version].values.update( - parse_vs16_data(fname=UnicodeDataFile.LegacyEmojiVariationSequences(), - ubound_unicode_version=unicode_version).values) + parse_vs_data(fname=UnicodeDataFile.LegacyEmojiVariationSequences(), + ubound_unicode_version=unicode_version, + hex_str_vs=hex_str_vs).values) # perform culling on any values that are already understood as 'wide' # without the variation-16 selector @@ -458,9 +461,9 @@ def fetch_table_vs16_data() -> UnicodeTableRenderCtx: return UnicodeTableRenderCtx('VS16_NARROW_TO_WIDE', table) -def parse_vs16_data(fname: str, ubound_unicode_version: UnicodeVersion): +def parse_vs_data(fname: str, ubound_unicode_version: UnicodeVersion, hex_str_vs: str): with open(fname, encoding='utf-8') as fin: - table_iter = parse_vs16_table(fin) + table_iter = parse_vs_table(fin, hex_str_vs) # pull "date string" date = next(table_iter).comment.split(':', 1)[1].strip() # pull values only matching this unicode version and lower @@ -468,6 +471,51 @@ def parse_vs16_data(fname: str, ubound_unicode_version: UnicodeVersion): return TableDef(ubound_unicode_version, date, values) +def fetch_table_vs15_data() -> UnicodeTableRenderCtx: + """ + Fetch and create a "wide to narrow variation-15" lookup table. + + Characters in this table are wide, but when combined with a variation selector-15 (\uFE0E), they + become narrow, for the given versions of unicode. + + UNICODE_VERSION=9.0.0 or greater is required to enable detection of the effect of *any* + 'variation selector-15' wide emoji becoming narrow. + + Some terminals display U+231a, u+FE0E as a narrow font, but consuming a wide cell (iTerm2), + while most others display it as a wide cell, only. + + It is fair to call these ambiguous, see related 'ucs-detect' project. + """ + table: dict[UnicodeVersion, TableDef] = {} + unicode_latest = fetch_unicode_versions()[-1] + hex_str_vs = 'FE0E' + + wide_tables = fetch_table_wide_data().table + unicode_version = UnicodeVersion.parse('9.0.0') + + # parse table formatted by the latest emoji release (developed with + # 15.1.0) and parse a single file for all individual releases + table[unicode_version] = parse_vs_data(fname=UnicodeDataFile.EmojiVariationSequences(unicode_latest), + ubound_unicode_version=unicode_version, + hex_str_vs=hex_str_vs) + + # parse and join the final emoji release 12.0 of the earlier "type" + table[unicode_version].values.update( + parse_vs_data(fname=UnicodeDataFile.LegacyEmojiVariationSequences(), + ubound_unicode_version=unicode_version, + hex_str_vs=hex_str_vs).values) + + # perform culling on any values that are already understood as 'narrow' + # without the variation-15 selector + wide_table = wide_tables[unicode_version].as_value_ranges() + table[unicode_version].values = { + ucs for ucs in table[unicode_version].values + if _bisearch(ucs, wide_table) + } + + return UnicodeTableRenderCtx('VS15_WIDE_TO_NARROW', table) + + def cite_source_description(filename: str) -> tuple[str, str]: """Return unicode.org source data file's own description as citation.""" with open(filename, encoding='utf-8') as f: @@ -512,9 +560,8 @@ def parse_unicode_table(file: Iterable[str]) -> Iterator[TableEntry]: yield TableEntry(code_range, tuple(properties), comment) -def parse_vs16_table(fp: Iterable[str]) -> Iterator[TableEntry]: - """Parse emoji-variation-sequences.txt for codepoints that precede 0xFE0F.""" - hex_str_vs16 = 'FE0F' +def parse_vs_table(fp: Iterable[str], hex_str_vs: str = 'FE0F') -> Iterator[TableEntry]: + """Parse emoji-variation-sequences.txt for codepoints that precede `hex_str_vs`.""" for line in fp: data, _, comment = line.partition('#') data_fields: Iterator[str] = (field.strip() for field in data.split(';')) @@ -526,8 +573,8 @@ def parse_vs16_table(fp: Iterable[str]) -> Iterator[TableEntry]: yield TableEntry(None, tuple(properties), comment) continue code_points = code_points_str.split() - if len(code_points) == 2 and code_points[1] == hex_str_vs16: - # yield a single "code range" entry for a single value that precedes FE0F + if len(code_points) == 2 and code_points[1] == hex_str_vs: + # yield a single "code range" entry for a single value that precedes hex_str_vs yield TableEntry((int(code_points[0], 16), int(code_points[0], 16)), tuple(properties), comment) @@ -717,6 +764,7 @@ def get_codegen_definitions() -> Iterator[RenderDefinition]: UnicodeVersionPyRenderCtx(fetch_unicode_versions()) ) yield UnicodeTableRenderDef.new('table_vs16.py', fetch_table_vs16_data()) + yield UnicodeTableRenderDef.new('table_vs15.py', fetch_table_vs15_data()) yield UnicodeTableRenderDef.new('table_wide.py', fetch_table_wide_data()) yield UnicodeTableRenderDef.new('table_zero.py', fetch_table_zero_data()) yield UnicodeVersionRstRenderDef.new(fetch_source_headers()) diff --git a/bin/verify-table-integrity.py b/bin/verify-table-integrity.py index 688aea6..f373b75 100644 --- a/bin/verify-table-integrity.py +++ b/bin/verify-table-integrity.py @@ -64,9 +64,7 @@ def bisearch_pair(ucs, table): - """ - A copy of wcwidth._bisearch() but also returns the range of matched values. - """ + """A copy of wcwidth._bisearch() but also returns the range of matched values.""" lbound = 0 ubound = len(table) - 1 diff --git a/docs/intro.rst b/docs/intro.rst index 1b92ef4..6d5d73f 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -216,6 +216,9 @@ Other Languages ======= History ======= +*Unreleased* + * **Bugfix** accounting of some kinds of emoji sequences using U+FE0E + Variation Selector 15 (VS-15). 0.2.14 *2025-09-22* * **Drop Support** for Python 2.7 and 3.5. `PR #117`_. diff --git a/docs/specs.rst b/docs/specs.rst index 5b8a5ca..5e487e5 100644 --- a/docs/specs.rst +++ b/docs/specs.rst @@ -47,6 +47,9 @@ Width of 1 String characters are measured width of 1 when they are not measured as `Width of 0`_ or `Width of 2`_. +Any character in sequence with `U+FE0E`_ (variation Selector 15) defined +by `emoji-variation-sequences.txt`_ as ``text style``. + Width of 2 ---------- @@ -74,6 +77,7 @@ Any character in sequence with `U+FE0F`_ (Variation Selector 16) defined by .. _`U+2029`: https://codepoints.net/U+2029 .. _`U+D7B0`: https://codepoints.net/U+D7B0 .. _`U+D7FF`: https://codepoints.net/U+D7FF +.. _`U+FE0E`: https://codepoints.net/U+FE0E .. _`U+FE0F`: https://codepoints.net/U+FE0F .. _`DerivedGeneralCategory.txt`: https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedGeneralCategory.txt .. _`EastAsianWidth.txt`: https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt diff --git a/tests/test_emojis.py b/tests/test_emojis.py index 310d0c3..dae6467 100644 --- a/tests/test_emojis.py +++ b/tests/test_emojis.py @@ -176,7 +176,7 @@ def test_recommended_emoji_zwj_sequences(): def test_recommended_variation_16_sequences(): """ - Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt + Test wcswidth of vs-16 sequences from unicode.org's emoji-variation-sequences.txt """ # given, lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt') @@ -202,6 +202,34 @@ def test_recommended_variation_16_sequences(): assert num >= 742 +def test_recommended_variation_15_sequences(): + """ + Test wcswidth of vs-15 sequences from unicode.org's emoji-variation-sequences.txt + """ + # given, + lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt') + + errors = [] + num = 0 + for sequence, line in zip(sequences, lines): + num += 1 + if '\ufe0e' not in sequence: + # filter for only \uFE0E (VS-15) + continue + measured_width = wcwidth.wcswidth(sequence) + if measured_width != 1: + errors.append({ + 'expected_width': 1, + 'line': line, + 'measured_width': wcwidth.wcswidth(sequence), + 'sequence': sequence, + }) + + # verify + assert errors == [] + assert num >= 742 + + def test_unicode_9_vs16(): """Verify effect of VS-16 on unicode_version 9.0 and later""" phrase = ("\u2640" # FEMALE SIGN @@ -219,8 +247,25 @@ def test_unicode_9_vs16(): assert length_phrase == expect_length_phrase +def test_unicode_9_vs15(): + """Verify effect of VS-15 on unicode_version 9.0 and later""" + phrase = ("\U0001f4da" # BOOKS + "\uFE0E") # VARIATION SELECTOR-15 + + expect_length_each = (2, 0) + expect_length_phrase = 1 + + # exercise, + length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase) + length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0') + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + def test_unicode_8_vs16(): - """Verify that VS-16 has no effect on unicode_version 8.0 and earler""" + """Verify that VS-16 has no effect on unicode_version 8.0 and earlier""" phrase = ("\u2640" # FEMALE SIGN "\uFE0F") # VARIATION SELECTOR-16 @@ -234,3 +279,20 @@ def test_unicode_8_vs16(): # verify. assert length_each == expect_length_each assert length_phrase == expect_length_phrase + + +def test_unicode_8_vs15(): + """Verify that VS-15 has no effect on unicode_version 8.0 and earlier""" + phrase = ("\U0001f4da" # BOOKS + "\uFE0E") # VARIATION SELECTOR-15 + + expect_length_each = (1, 0) + expect_length_phrase = 1 + + # exercise, + length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase) + length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0') + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase diff --git a/tox.ini b/tox.ini index 9d66fbf..ec4e3f5 100644 --- a/tox.ini +++ b/tox.ini @@ -134,10 +134,14 @@ basepython = python3.13 commands = {envbindir}/isort --quiet --apply --recursive wcwidth tests bin [testenv:pylint] +# Files table_vs15.py and table_wide.py erroneously report "duplicate lines". +# Except for adding '# pylint: disable=duplicate-code' to the template files, we +# can chose only to disable a specific check, or specific files. We ignore the +# files. basepython = python3.13 deps = pylint commands = {envbindir}/pylint --rcfile={toxinidir}/.pylintrc \ - --ignore=tests,docs,setup.py,conf.py,build,distutils,.pyenv,.git,.tox \ + --ignore=tests,docs,setup.py,conf.py,build,distutils,.pyenv,.git,.tox,table_wide.py,table_vs15.py \ {posargs:{toxinidir}}/wcwidth [testenv:flake8] diff --git a/wcwidth/__init__.py b/wcwidth/__init__.py index e4e8138..8557884 100644 --- a/wcwidth/__init__.py +++ b/wcwidth/__init__.py @@ -5,12 +5,13 @@ """ # re-export all functions & definitions, even private ones, from top-level # module path, to allow for 'from wcwidth import _private_func'. Of course, -# user beware that any _private function may disappear or change signature at -# any future version. +# user beware that any _private functions or variables not exported by __all__ +# may disappear or change signature at any future version. # local from .wcwidth import ZERO_WIDTH # noqa from .wcwidth import (WIDE_EASTASIAN, + VS15_WIDE_TO_NARROW, VS16_NARROW_TO_WIDE, wcwidth, wcswidth, @@ -23,7 +24,8 @@ # 'from wcwidth import *', but also to say, "This is the public API". __all__ = ('wcwidth', 'wcswidth', 'list_versions') -# We also used pkg_resources to load unicode version tables from version.json, -# generated by bin/update-tables.py, but some environments are unable to -# import pkg_resources for one reason or another, yikes! +# We previously used pkg_resources to load unicode version tables from +# 'version.json', generated by bin/update-tables.py, but some environments are +# unable to import pkg_resources for one reason or another, so this is +# MANUALLY DUPLICATED here and in setup.py __version__ = '0.2.14' diff --git a/wcwidth/table_vs15.py b/wcwidth/table_vs15.py new file mode 100644 index 0000000..a5ede6f --- /dev/null +++ b/wcwidth/table_vs15.py @@ -0,0 +1,103 @@ +""" +Exports VS15_WIDE_TO_NARROW table keyed by supporting unicode version level. + +This code generated by wcwidth/bin/update-tables.py on 2024-02-14 19:59:22 UTC. +""" +VS15_WIDE_TO_NARROW = { + '9.0.0': ( + # Source: 9.0.0 + # Date: 2023-02-01, 02:22:54 GMT + # + (0x0231a, 0x0231b,), # Watch ..Hourglass + (0x023e9, 0x023ec,), # Black Right-pointing Dou..Black Down-pointing Doub + (0x023f0, 0x023f0,), # Alarm Clock + (0x023f3, 0x023f3,), # Hourglass With Flowing Sand + (0x025fd, 0x025fe,), # White Medium Small Squar..Black Medium Small Squar + (0x02614, 0x02615,), # Umbrella With Rain Drops..Hot Beverage + (0x02648, 0x02653,), # Aries ..Pisces + (0x0267f, 0x0267f,), # Wheelchair Symbol + (0x02693, 0x02693,), # Anchor + (0x026a1, 0x026a1,), # High Voltage Sign + (0x026aa, 0x026ab,), # Medium White Circle ..Medium Black Circle + (0x026bd, 0x026be,), # Soccer Ball ..Baseball + (0x026c4, 0x026c5,), # Snowman Without Snow ..Sun Behind Cloud + (0x026ce, 0x026ce,), # Ophiuchus + (0x026d4, 0x026d4,), # No Entry + (0x026ea, 0x026ea,), # Church + (0x026f2, 0x026f3,), # Fountain ..Flag In Hole + (0x026f5, 0x026f5,), # Sailboat + (0x026fa, 0x026fa,), # Tent + (0x026fd, 0x026fd,), # Fuel Pump + (0x02705, 0x02705,), # White Heavy Check Mark + (0x0270a, 0x0270b,), # Raised Fist ..Raised Hand + (0x02728, 0x02728,), # Sparkles + (0x0274c, 0x0274c,), # Cross Mark + (0x0274e, 0x0274e,), # Negative Squared Cross Mark + (0x02753, 0x02755,), # Black Question Mark Orna..White Exclamation Mark O + (0x02757, 0x02757,), # Heavy Exclamation Mark Symbol + (0x02795, 0x02797,), # Heavy Plus Sign ..Heavy Division Sign + (0x027b0, 0x027b0,), # Curly Loop + (0x027bf, 0x027bf,), # Double Curly Loop + (0x02b1b, 0x02b1c,), # Black Large Square ..White Large Square + (0x02b50, 0x02b50,), # White Medium Star + (0x02b55, 0x02b55,), # Heavy Large Circle + (0x03030, 0x03030,), # Wavy Dash + (0x0303d, 0x0303d,), # Part Alternation Mark + (0x03297, 0x03297,), # Circled Ideograph Congratulation + (0x03299, 0x03299,), # Circled Ideograph Secret + (0x1f004, 0x1f004,), # Mahjong Tile Red Dragon + (0x1f202, 0x1f202,), # Squared Katakana Sa + (0x1f21a, 0x1f21a,), # Squared Cjk Unified Ideograph-7121 + (0x1f22f, 0x1f22f,), # Squared Cjk Unified Ideograph-6307 + (0x1f237, 0x1f237,), # Squared Cjk Unified Ideograph-6708 + (0x1f30d, 0x1f30f,), # Earth Globe Europe-afric..Earth Globe Asia-austral + (0x1f315, 0x1f315,), # Full Moon Symbol + (0x1f31c, 0x1f31c,), # Last Quarter Moon With Face + (0x1f378, 0x1f378,), # Cocktail Glass + (0x1f393, 0x1f393,), # Graduation Cap + (0x1f3a7, 0x1f3a7,), # Headphone + (0x1f3ac, 0x1f3ae,), # Clapper Board ..Video Game + (0x1f3c2, 0x1f3c2,), # Snowboarder + (0x1f3c4, 0x1f3c4,), # Surfer + (0x1f3c6, 0x1f3c6,), # Trophy + (0x1f3ca, 0x1f3ca,), # Swimmer + (0x1f3e0, 0x1f3e0,), # House Building + (0x1f3ed, 0x1f3ed,), # Factory + (0x1f408, 0x1f408,), # Cat + (0x1f415, 0x1f415,), # Dog + (0x1f41f, 0x1f41f,), # Fish + (0x1f426, 0x1f426,), # Bird + (0x1f442, 0x1f442,), # Ear + (0x1f446, 0x1f449,), # White Up Pointing Backha..White Right Pointing Bac + (0x1f44d, 0x1f44e,), # Thumbs Up Sign ..Thumbs Down Sign + (0x1f453, 0x1f453,), # Eyeglasses + (0x1f46a, 0x1f46a,), # Family + (0x1f47d, 0x1f47d,), # Extraterrestrial Alien + (0x1f4a3, 0x1f4a3,), # Bomb + (0x1f4b0, 0x1f4b0,), # Money Bag + (0x1f4b3, 0x1f4b3,), # Credit Card + (0x1f4bb, 0x1f4bb,), # Personal Computer + (0x1f4bf, 0x1f4bf,), # Optical Disc + (0x1f4cb, 0x1f4cb,), # Clipboard + (0x1f4da, 0x1f4da,), # Books + (0x1f4df, 0x1f4df,), # Pager + (0x1f4e4, 0x1f4e6,), # Outbox Tray ..Package + (0x1f4ea, 0x1f4ed,), # Closed Mailbox With Lowe..Open Mailbox With Lowere + (0x1f4f7, 0x1f4f7,), # Camera + (0x1f4f9, 0x1f4fb,), # Video Camera ..Radio + (0x1f508, 0x1f508,), # Speaker + (0x1f50d, 0x1f50d,), # Left-pointing Magnifying Glass + (0x1f512, 0x1f513,), # Lock ..Open Lock + (0x1f550, 0x1f567,), # Clock Face One Oclock ..Clock Face Twelve-thirty + (0x1f610, 0x1f610,), # Neutral Face + (0x1f687, 0x1f687,), # Metro + (0x1f68d, 0x1f68d,), # Oncoming Bus + (0x1f691, 0x1f691,), # Ambulance + (0x1f694, 0x1f694,), # Oncoming Police Car + (0x1f698, 0x1f698,), # Oncoming Automobile + (0x1f6ad, 0x1f6ad,), # No Smoking Symbol + (0x1f6b2, 0x1f6b2,), # Bicycle + (0x1f6b9, 0x1f6ba,), # Mens Symbol ..Womens Symbol + (0x1f6bc, 0x1f6bc,), # Baby Symbol + ), +} diff --git a/wcwidth/wcwidth.py b/wcwidth/wcwidth.py index 92ca14a..7cf0863 100644 --- a/wcwidth/wcwidth.py +++ b/wcwidth/wcwidth.py @@ -67,6 +67,7 @@ from functools import lru_cache # local +from .table_vs15 import VS15_WIDE_TO_NARROW from .table_vs16 import VS16_NARROW_TO_WIDE from .table_wide import WIDE_EASTASIAN from .table_zero import ZERO_WIDTH @@ -189,6 +190,17 @@ def wcswidth(pwcs, n=None, unicode_version='auto'): last_measured_char = None idx += 1 continue + if char == u'\uFE0E' and last_measured_char: + # on variation selector 15 (VS15) following another character, + # conditionally subtract '1' from the measured width if that + # character is known to be converted from wide to narrow by VS15. + if _unicode_version is None: + _unicode_version = _wcversion_value(_wcmatch_version(unicode_version)) + if _unicode_version >= (9, 0, 0): + width -= _bisearch(ord(last_measured_char), VS15_WIDE_TO_NARROW["9.0.0"]) + last_measured_char = None + idx += 1 + continue # measure character at current index wcw = wcwidth(char, unicode_version) if wcw < 0: From 2dd896f7a9cd6d2e74bfa223b4c2706a9d1050cf Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Wed, 14 Feb 2024 15:06:42 -0500 Subject: [PATCH 2/8] Set PR hyperlink in changelog --- docs/intro.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/intro.rst b/docs/intro.rst index 6d5d73f..8cf0bc2 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -218,7 +218,7 @@ History ======= *Unreleased* * **Bugfix** accounting of some kinds of emoji sequences using U+FE0E - Variation Selector 15 (VS-15). + Variation Selector 15 (`PR #120`_). 0.2.14 *2025-09-22* * **Drop Support** for Python 2.7 and 3.5. `PR #117`_. From 96ab3c3d760b5d897b9e49a256b79ba8f6eedd7e Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Wed, 14 Feb 2024 15:15:50 -0500 Subject: [PATCH 3/8] Increase code coverage Add any additional U+FE0F/U+FE0E check in sequence of wcswidth() to ensure 100% code coverage --- tests/test_emojis.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/test_emojis.py b/tests/test_emojis.py index dae6467..e6dbd0d 100644 --- a/tests/test_emojis.py +++ b/tests/test_emojis.py @@ -233,10 +233,12 @@ def test_recommended_variation_15_sequences(): def test_unicode_9_vs16(): """Verify effect of VS-16 on unicode_version 9.0 and later""" phrase = ("\u2640" # FEMALE SIGN + "\uFE0F" # VARIATION SELECTOR-16 + "X" # ASCII Letter 'X' "\uFE0F") # VARIATION SELECTOR-16 - expect_length_each = (1, 0) - expect_length_phrase = 2 + expect_length_each = (1, 0, 1, 0) + expect_length_phrase = 3 # exercise, length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase) @@ -250,10 +252,12 @@ def test_unicode_9_vs16(): def test_unicode_9_vs15(): """Verify effect of VS-15 on unicode_version 9.0 and later""" phrase = ("\U0001f4da" # BOOKS + "\uFE0E" # VARIATION SELECTOR-15 + "X" # ASCII Letter 'X' "\uFE0E") # VARIATION SELECTOR-15 - expect_length_each = (2, 0) - expect_length_phrase = 1 + expect_length_each = (2, 0, 1, 0) + expect_length_phrase = 2 # exercise, length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase) @@ -267,10 +271,12 @@ def test_unicode_9_vs15(): def test_unicode_8_vs16(): """Verify that VS-16 has no effect on unicode_version 8.0 and earlier""" phrase = ("\u2640" # FEMALE SIGN + "\uFE0F" # VARIATION SELECTOR-16 + "X" # ASCII Letter 'X' "\uFE0F") # VARIATION SELECTOR-16 - expect_length_each = (1, 0) - expect_length_phrase = 1 + expect_length_each = (1, 0, 1, 0) + expect_length_phrase = 2 # exercise, length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase) @@ -284,10 +290,12 @@ def test_unicode_8_vs16(): def test_unicode_8_vs15(): """Verify that VS-15 has no effect on unicode_version 8.0 and earlier""" phrase = ("\U0001f4da" # BOOKS + "\uFE0E" # VARIATION SELECTOR-15 + "X" # ASCII Letter 'X' "\uFE0E") # VARIATION SELECTOR-15 - expect_length_each = (1, 0) - expect_length_phrase = 1 + expect_length_each = (1, 0, 1, 0) + expect_length_phrase = 2 # exercise, length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase) From 4ef5de95c2aec1f8f943dce89a727880f3188602 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Wed, 17 Sep 2025 00:58:51 -0400 Subject: [PATCH 4/8] Still not sure about this, WIP --- wcwidth/table_vs15.py | 206 +++++++++++++++++++++++------------------- 1 file changed, 114 insertions(+), 92 deletions(-) diff --git a/wcwidth/table_vs15.py b/wcwidth/table_vs15.py index a5ede6f..4ce1853 100644 --- a/wcwidth/table_vs15.py +++ b/wcwidth/table_vs15.py @@ -1,103 +1,125 @@ """ Exports VS15_WIDE_TO_NARROW table keyed by supporting unicode version level. -This code generated by wcwidth/bin/update-tables.py on 2024-02-14 19:59:22 UTC. +This code generated by wcwidth/bin/update-tables.py on 2024-03-15 14:05:54 UTC. """ VS15_WIDE_TO_NARROW = { '9.0.0': ( # Source: 9.0.0 # Date: 2023-02-01, 02:22:54 GMT # - (0x0231a, 0x0231b,), # Watch ..Hourglass - (0x023e9, 0x023ec,), # Black Right-pointing Dou..Black Down-pointing Doub - (0x023f0, 0x023f0,), # Alarm Clock - (0x023f3, 0x023f3,), # Hourglass With Flowing Sand - (0x025fd, 0x025fe,), # White Medium Small Squar..Black Medium Small Squar - (0x02614, 0x02615,), # Umbrella With Rain Drops..Hot Beverage - (0x02648, 0x02653,), # Aries ..Pisces - (0x0267f, 0x0267f,), # Wheelchair Symbol - (0x02693, 0x02693,), # Anchor - (0x026a1, 0x026a1,), # High Voltage Sign - (0x026aa, 0x026ab,), # Medium White Circle ..Medium Black Circle - (0x026bd, 0x026be,), # Soccer Ball ..Baseball - (0x026c4, 0x026c5,), # Snowman Without Snow ..Sun Behind Cloud - (0x026ce, 0x026ce,), # Ophiuchus - (0x026d4, 0x026d4,), # No Entry - (0x026ea, 0x026ea,), # Church - (0x026f2, 0x026f3,), # Fountain ..Flag In Hole - (0x026f5, 0x026f5,), # Sailboat - (0x026fa, 0x026fa,), # Tent - (0x026fd, 0x026fd,), # Fuel Pump - (0x02705, 0x02705,), # White Heavy Check Mark - (0x0270a, 0x0270b,), # Raised Fist ..Raised Hand - (0x02728, 0x02728,), # Sparkles - (0x0274c, 0x0274c,), # Cross Mark - (0x0274e, 0x0274e,), # Negative Squared Cross Mark - (0x02753, 0x02755,), # Black Question Mark Orna..White Exclamation Mark O - (0x02757, 0x02757,), # Heavy Exclamation Mark Symbol - (0x02795, 0x02797,), # Heavy Plus Sign ..Heavy Division Sign - (0x027b0, 0x027b0,), # Curly Loop - (0x027bf, 0x027bf,), # Double Curly Loop - (0x02b1b, 0x02b1c,), # Black Large Square ..White Large Square - (0x02b50, 0x02b50,), # White Medium Star - (0x02b55, 0x02b55,), # Heavy Large Circle - (0x03030, 0x03030,), # Wavy Dash - (0x0303d, 0x0303d,), # Part Alternation Mark - (0x03297, 0x03297,), # Circled Ideograph Congratulation - (0x03299, 0x03299,), # Circled Ideograph Secret - (0x1f004, 0x1f004,), # Mahjong Tile Red Dragon - (0x1f202, 0x1f202,), # Squared Katakana Sa - (0x1f21a, 0x1f21a,), # Squared Cjk Unified Ideograph-7121 - (0x1f22f, 0x1f22f,), # Squared Cjk Unified Ideograph-6307 - (0x1f237, 0x1f237,), # Squared Cjk Unified Ideograph-6708 - (0x1f30d, 0x1f30f,), # Earth Globe Europe-afric..Earth Globe Asia-austral - (0x1f315, 0x1f315,), # Full Moon Symbol - (0x1f31c, 0x1f31c,), # Last Quarter Moon With Face - (0x1f378, 0x1f378,), # Cocktail Glass - (0x1f393, 0x1f393,), # Graduation Cap - (0x1f3a7, 0x1f3a7,), # Headphone - (0x1f3ac, 0x1f3ae,), # Clapper Board ..Video Game - (0x1f3c2, 0x1f3c2,), # Snowboarder - (0x1f3c4, 0x1f3c4,), # Surfer - (0x1f3c6, 0x1f3c6,), # Trophy - (0x1f3ca, 0x1f3ca,), # Swimmer - (0x1f3e0, 0x1f3e0,), # House Building - (0x1f3ed, 0x1f3ed,), # Factory - (0x1f408, 0x1f408,), # Cat - (0x1f415, 0x1f415,), # Dog - (0x1f41f, 0x1f41f,), # Fish - (0x1f426, 0x1f426,), # Bird - (0x1f442, 0x1f442,), # Ear - (0x1f446, 0x1f449,), # White Up Pointing Backha..White Right Pointing Bac - (0x1f44d, 0x1f44e,), # Thumbs Up Sign ..Thumbs Down Sign - (0x1f453, 0x1f453,), # Eyeglasses - (0x1f46a, 0x1f46a,), # Family - (0x1f47d, 0x1f47d,), # Extraterrestrial Alien - (0x1f4a3, 0x1f4a3,), # Bomb - (0x1f4b0, 0x1f4b0,), # Money Bag - (0x1f4b3, 0x1f4b3,), # Credit Card - (0x1f4bb, 0x1f4bb,), # Personal Computer - (0x1f4bf, 0x1f4bf,), # Optical Disc - (0x1f4cb, 0x1f4cb,), # Clipboard - (0x1f4da, 0x1f4da,), # Books - (0x1f4df, 0x1f4df,), # Pager - (0x1f4e4, 0x1f4e6,), # Outbox Tray ..Package - (0x1f4ea, 0x1f4ed,), # Closed Mailbox With Lowe..Open Mailbox With Lowere - (0x1f4f7, 0x1f4f7,), # Camera - (0x1f4f9, 0x1f4fb,), # Video Camera ..Radio - (0x1f508, 0x1f508,), # Speaker - (0x1f50d, 0x1f50d,), # Left-pointing Magnifying Glass - (0x1f512, 0x1f513,), # Lock ..Open Lock - (0x1f550, 0x1f567,), # Clock Face One Oclock ..Clock Face Twelve-thirty - (0x1f610, 0x1f610,), # Neutral Face - (0x1f687, 0x1f687,), # Metro - (0x1f68d, 0x1f68d,), # Oncoming Bus - (0x1f691, 0x1f691,), # Ambulance - (0x1f694, 0x1f694,), # Oncoming Police Car - (0x1f698, 0x1f698,), # Oncoming Automobile - (0x1f6ad, 0x1f6ad,), # No Smoking Symbol - (0x1f6b2, 0x1f6b2,), # Bicycle - (0x1f6b9, 0x1f6ba,), # Mens Symbol ..Womens Symbol - (0x1f6bc, 0x1f6bc,), # Baby Symbol + (0x00023, 0x00023,), # Number Sign + (0x0002a, 0x0002a,), # Asterisk + (0x00030, 0x00039,), # Digit Zero ..Digit Nine + (0x000a9, 0x000a9,), # Copyright Sign + (0x000ae, 0x000ae,), # Registered Sign + (0x0203c, 0x0203c,), # Double Exclamation Mark + (0x02049, 0x02049,), # Exclamation Question Mark + (0x02122, 0x02122,), # Trade Mark Sign + (0x02139, 0x02139,), # Information Source + (0x02194, 0x02199,), # Left Right Arrow ..South West Arrow + (0x021a9, 0x021aa,), # Leftwards Arrow With Hoo..Rightwards Arrow With Ho + (0x02328, 0x02328,), # Keyboard + (0x023cf, 0x023cf,), # Eject Symbol + (0x023ed, 0x023ef,), # Black Right-pointing Dou..Black Right-pointing Tri + (0x023f1, 0x023f2,), # Stopwatch ..Timer Clock + (0x023f8, 0x023fa,), # Double Vertical Bar ..Black Circle For Record + (0x024c2, 0x024c2,), # Circled Latin Capital Letter M + (0x025aa, 0x025ab,), # Black Small Square ..White Small Square + (0x025b6, 0x025b6,), # Black Right-pointing Triangle + (0x025c0, 0x025c0,), # Black Left-pointing Triangle + (0x025fb, 0x025fc,), # White Medium Square ..Black Medium Square + (0x02600, 0x02604,), # Black Sun With Rays ..Comet + (0x0260e, 0x0260e,), # Black Telephone + (0x02611, 0x02611,), # Ballot Box With Check + (0x02618, 0x02618,), # Shamrock + (0x0261d, 0x0261d,), # White Up Pointing Index + (0x02620, 0x02620,), # Skull And Crossbones + (0x02622, 0x02623,), # Radioactive Sign ..Biohazard Sign + (0x02626, 0x02626,), # Orthodox Cross + (0x0262a, 0x0262a,), # Star And Crescent + (0x0262e, 0x0262f,), # Peace Symbol ..Yin Yang + (0x02638, 0x0263a,), # Wheel Of Dharma ..White Smiling Face + (0x02640, 0x02640,), # Female Sign + (0x02642, 0x02642,), # Male Sign + (0x0265f, 0x02660,), # Black Chess Pawn ..Black Spade Suit + (0x02663, 0x02663,), # Black Club Suit + (0x02665, 0x02666,), # Black Heart Suit ..Black Diamond Suit + (0x02668, 0x02668,), # Hot Springs + (0x0267b, 0x0267b,), # Black Universal Recycling Symbol + (0x0267e, 0x0267e,), # Permanent Paper Sign + (0x02692, 0x02692,), # Hammer And Pick + (0x02694, 0x02697,), # Crossed Swords ..Alembic + (0x02699, 0x02699,), # Gear + (0x0269b, 0x0269c,), # Atom Symbol ..Fleur-de-lis + (0x026a0, 0x026a0,), # Warning Sign + (0x026a7, 0x026a7,), # Male With Stroke And Male And Female Sign + (0x026b0, 0x026b1,), # Coffin ..Funeral Urn + (0x026c8, 0x026c8,), # Thunder Cloud And Rain + (0x026cf, 0x026cf,), # Pick + (0x026d1, 0x026d1,), # Helmet With White Cross + (0x026d3, 0x026d3,), # Chains + (0x026e9, 0x026e9,), # Shinto Shrine + (0x026f0, 0x026f1,), # Mountain ..Umbrella On Ground + (0x026f4, 0x026f4,), # Ferry + (0x026f7, 0x026f9,), # Skier ..Person With Ball + (0x02702, 0x02702,), # Black Scissors + (0x02708, 0x02709,), # Airplane ..Envelope + (0x0270c, 0x0270d,), # Victory Hand ..Writing Hand + (0x0270f, 0x0270f,), # Pencil + (0x02712, 0x02712,), # Black Nib + (0x02714, 0x02714,), # Heavy Check Mark + (0x02716, 0x02716,), # Heavy Multiplication X + (0x0271d, 0x0271d,), # Latin Cross + (0x02721, 0x02721,), # Star Of David + (0x02733, 0x02734,), # Eight Spoked Asterisk ..Eight Pointed Black Star + (0x02744, 0x02744,), # Snowflake + (0x02747, 0x02747,), # Sparkle + (0x02763, 0x02764,), # Heavy Heart Exclamation ..Heavy Black Heart + (0x027a1, 0x027a1,), # Black Rightwards Arrow + (0x02934, 0x02935,), # Arrow Pointing Rightward..Arrow Pointing Rightward + (0x02b05, 0x02b07,), # Leftwards Black Arrow ..Downwards Black Arrow + (0x1f170, 0x1f171,), # Negative Squared Latin C..Negative Squared Latin C + (0x1f17e, 0x1f17f,), # Negative Squared Latin C..Negative Squared Latin C + (0x1f321, 0x1f321,), # Thermometer + (0x1f324, 0x1f32c,), # White Sun With Small Clo..Wind Blowing Face + (0x1f336, 0x1f336,), # Hot Pepper + (0x1f37d, 0x1f37d,), # Fork And Knife With Plate + (0x1f396, 0x1f397,), # Military Medal ..Reminder Ribbon + (0x1f399, 0x1f39b,), # Studio Microphone ..Control Knobs + (0x1f39e, 0x1f39f,), # Film Frames ..Admission Tickets + (0x1f3cb, 0x1f3ce,), # Weight Lifter ..Racing Car + (0x1f3d4, 0x1f3df,), # Snow Capped Mountain ..Stadium + (0x1f3f3, 0x1f3f3,), # Waving White Flag + (0x1f3f5, 0x1f3f5,), # Rosette + (0x1f3f7, 0x1f3f7,), # Label + (0x1f43f, 0x1f43f,), # Chipmunk + (0x1f441, 0x1f441,), # Eye + (0x1f4fd, 0x1f4fd,), # Film Projector + (0x1f549, 0x1f54a,), # Om Symbol ..Dove Of Peace + (0x1f56f, 0x1f570,), # Candle ..Mantelpiece Clock + (0x1f573, 0x1f579,), # Hole ..Joystick + (0x1f587, 0x1f587,), # Linked Paperclips + (0x1f58a, 0x1f58d,), # Lower Left Ballpoint Pen..Lower Left Crayon + (0x1f590, 0x1f590,), # Raised Hand With Fingers Splayed + (0x1f5a5, 0x1f5a5,), # Desktop Computer + (0x1f5a8, 0x1f5a8,), # Printer + (0x1f5b1, 0x1f5b2,), # Three Button Mouse ..Trackball + (0x1f5bc, 0x1f5bc,), # Frame With Picture + (0x1f5c2, 0x1f5c4,), # Card Index Dividers ..File Cabinet + (0x1f5d1, 0x1f5d3,), # Wastebasket ..Spiral Calendar Pad + (0x1f5dc, 0x1f5de,), # Compression ..Rolled-up Newspaper + (0x1f5e1, 0x1f5e1,), # Dagger Knife + (0x1f5e3, 0x1f5e3,), # Speaking Head In Silhouette + (0x1f5e8, 0x1f5e8,), # Left Speech Bubble + (0x1f5ef, 0x1f5ef,), # Right Anger Bubble + (0x1f5f3, 0x1f5f3,), # Ballot Box With Ballot + (0x1f5fa, 0x1f5fa,), # World Map + (0x1f6cb, 0x1f6cb,), # Couch And Lamp + (0x1f6cd, 0x1f6cf,), # Shopping Bags ..Bed + (0x1f6e0, 0x1f6e5,), # Hammer And Wrench ..Motor Boat + (0x1f6e9, 0x1f6e9,), # Small Airplane + (0x1f6f0, 0x1f6f0,), # Satellite + (0x1f6f3, 0x1f6f3,), # Passenger Ship ), } From 3caf1d7e1272af02100a37bb2eacd3bc9f138604 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Mon, 20 Oct 2025 19:32:27 -0400 Subject: [PATCH 5/8] Regenerate table_vs15.py with corrected logic The previous table was generated with inverted logic (keeping narrow characters instead of wide characters), a kind of experiment that I may study outside of this project. Regenerated/reverted vs-15 table --- wcwidth/table_vs15.py | 208 +++++++++++++++++++----------------------- 1 file changed, 93 insertions(+), 115 deletions(-) diff --git a/wcwidth/table_vs15.py b/wcwidth/table_vs15.py index 4ce1853..1c72bd8 100644 --- a/wcwidth/table_vs15.py +++ b/wcwidth/table_vs15.py @@ -1,125 +1,103 @@ """ Exports VS15_WIDE_TO_NARROW table keyed by supporting unicode version level. -This code generated by wcwidth/bin/update-tables.py on 2024-03-15 14:05:54 UTC. +This code generated by wcwidth/bin/update-tables.py on 2025-10-20 23:32:03 UTC. """ VS15_WIDE_TO_NARROW = { '9.0.0': ( # Source: 9.0.0 - # Date: 2023-02-01, 02:22:54 GMT + # Date: 2025-01-30, 21:48:29 GMT # - (0x00023, 0x00023,), # Number Sign - (0x0002a, 0x0002a,), # Asterisk - (0x00030, 0x00039,), # Digit Zero ..Digit Nine - (0x000a9, 0x000a9,), # Copyright Sign - (0x000ae, 0x000ae,), # Registered Sign - (0x0203c, 0x0203c,), # Double Exclamation Mark - (0x02049, 0x02049,), # Exclamation Question Mark - (0x02122, 0x02122,), # Trade Mark Sign - (0x02139, 0x02139,), # Information Source - (0x02194, 0x02199,), # Left Right Arrow ..South West Arrow - (0x021a9, 0x021aa,), # Leftwards Arrow With Hoo..Rightwards Arrow With Ho - (0x02328, 0x02328,), # Keyboard - (0x023cf, 0x023cf,), # Eject Symbol - (0x023ed, 0x023ef,), # Black Right-pointing Dou..Black Right-pointing Tri - (0x023f1, 0x023f2,), # Stopwatch ..Timer Clock - (0x023f8, 0x023fa,), # Double Vertical Bar ..Black Circle For Record - (0x024c2, 0x024c2,), # Circled Latin Capital Letter M - (0x025aa, 0x025ab,), # Black Small Square ..White Small Square - (0x025b6, 0x025b6,), # Black Right-pointing Triangle - (0x025c0, 0x025c0,), # Black Left-pointing Triangle - (0x025fb, 0x025fc,), # White Medium Square ..Black Medium Square - (0x02600, 0x02604,), # Black Sun With Rays ..Comet - (0x0260e, 0x0260e,), # Black Telephone - (0x02611, 0x02611,), # Ballot Box With Check - (0x02618, 0x02618,), # Shamrock - (0x0261d, 0x0261d,), # White Up Pointing Index - (0x02620, 0x02620,), # Skull And Crossbones - (0x02622, 0x02623,), # Radioactive Sign ..Biohazard Sign - (0x02626, 0x02626,), # Orthodox Cross - (0x0262a, 0x0262a,), # Star And Crescent - (0x0262e, 0x0262f,), # Peace Symbol ..Yin Yang - (0x02638, 0x0263a,), # Wheel Of Dharma ..White Smiling Face - (0x02640, 0x02640,), # Female Sign - (0x02642, 0x02642,), # Male Sign - (0x0265f, 0x02660,), # Black Chess Pawn ..Black Spade Suit - (0x02663, 0x02663,), # Black Club Suit - (0x02665, 0x02666,), # Black Heart Suit ..Black Diamond Suit - (0x02668, 0x02668,), # Hot Springs - (0x0267b, 0x0267b,), # Black Universal Recycling Symbol - (0x0267e, 0x0267e,), # Permanent Paper Sign - (0x02692, 0x02692,), # Hammer And Pick - (0x02694, 0x02697,), # Crossed Swords ..Alembic - (0x02699, 0x02699,), # Gear - (0x0269b, 0x0269c,), # Atom Symbol ..Fleur-de-lis - (0x026a0, 0x026a0,), # Warning Sign - (0x026a7, 0x026a7,), # Male With Stroke And Male And Female Sign - (0x026b0, 0x026b1,), # Coffin ..Funeral Urn - (0x026c8, 0x026c8,), # Thunder Cloud And Rain - (0x026cf, 0x026cf,), # Pick - (0x026d1, 0x026d1,), # Helmet With White Cross - (0x026d3, 0x026d3,), # Chains - (0x026e9, 0x026e9,), # Shinto Shrine - (0x026f0, 0x026f1,), # Mountain ..Umbrella On Ground - (0x026f4, 0x026f4,), # Ferry - (0x026f7, 0x026f9,), # Skier ..Person With Ball - (0x02702, 0x02702,), # Black Scissors - (0x02708, 0x02709,), # Airplane ..Envelope - (0x0270c, 0x0270d,), # Victory Hand ..Writing Hand - (0x0270f, 0x0270f,), # Pencil - (0x02712, 0x02712,), # Black Nib - (0x02714, 0x02714,), # Heavy Check Mark - (0x02716, 0x02716,), # Heavy Multiplication X - (0x0271d, 0x0271d,), # Latin Cross - (0x02721, 0x02721,), # Star Of David - (0x02733, 0x02734,), # Eight Spoked Asterisk ..Eight Pointed Black Star - (0x02744, 0x02744,), # Snowflake - (0x02747, 0x02747,), # Sparkle - (0x02763, 0x02764,), # Heavy Heart Exclamation ..Heavy Black Heart - (0x027a1, 0x027a1,), # Black Rightwards Arrow - (0x02934, 0x02935,), # Arrow Pointing Rightward..Arrow Pointing Rightward - (0x02b05, 0x02b07,), # Leftwards Black Arrow ..Downwards Black Arrow - (0x1f170, 0x1f171,), # Negative Squared Latin C..Negative Squared Latin C - (0x1f17e, 0x1f17f,), # Negative Squared Latin C..Negative Squared Latin C - (0x1f321, 0x1f321,), # Thermometer - (0x1f324, 0x1f32c,), # White Sun With Small Clo..Wind Blowing Face - (0x1f336, 0x1f336,), # Hot Pepper - (0x1f37d, 0x1f37d,), # Fork And Knife With Plate - (0x1f396, 0x1f397,), # Military Medal ..Reminder Ribbon - (0x1f399, 0x1f39b,), # Studio Microphone ..Control Knobs - (0x1f39e, 0x1f39f,), # Film Frames ..Admission Tickets - (0x1f3cb, 0x1f3ce,), # Weight Lifter ..Racing Car - (0x1f3d4, 0x1f3df,), # Snow Capped Mountain ..Stadium - (0x1f3f3, 0x1f3f3,), # Waving White Flag - (0x1f3f5, 0x1f3f5,), # Rosette - (0x1f3f7, 0x1f3f7,), # Label - (0x1f43f, 0x1f43f,), # Chipmunk - (0x1f441, 0x1f441,), # Eye - (0x1f4fd, 0x1f4fd,), # Film Projector - (0x1f549, 0x1f54a,), # Om Symbol ..Dove Of Peace - (0x1f56f, 0x1f570,), # Candle ..Mantelpiece Clock - (0x1f573, 0x1f579,), # Hole ..Joystick - (0x1f587, 0x1f587,), # Linked Paperclips - (0x1f58a, 0x1f58d,), # Lower Left Ballpoint Pen..Lower Left Crayon - (0x1f590, 0x1f590,), # Raised Hand With Fingers Splayed - (0x1f5a5, 0x1f5a5,), # Desktop Computer - (0x1f5a8, 0x1f5a8,), # Printer - (0x1f5b1, 0x1f5b2,), # Three Button Mouse ..Trackball - (0x1f5bc, 0x1f5bc,), # Frame With Picture - (0x1f5c2, 0x1f5c4,), # Card Index Dividers ..File Cabinet - (0x1f5d1, 0x1f5d3,), # Wastebasket ..Spiral Calendar Pad - (0x1f5dc, 0x1f5de,), # Compression ..Rolled-up Newspaper - (0x1f5e1, 0x1f5e1,), # Dagger Knife - (0x1f5e3, 0x1f5e3,), # Speaking Head In Silhouette - (0x1f5e8, 0x1f5e8,), # Left Speech Bubble - (0x1f5ef, 0x1f5ef,), # Right Anger Bubble - (0x1f5f3, 0x1f5f3,), # Ballot Box With Ballot - (0x1f5fa, 0x1f5fa,), # World Map - (0x1f6cb, 0x1f6cb,), # Couch And Lamp - (0x1f6cd, 0x1f6cf,), # Shopping Bags ..Bed - (0x1f6e0, 0x1f6e5,), # Hammer And Wrench ..Motor Boat - (0x1f6e9, 0x1f6e9,), # Small Airplane - (0x1f6f0, 0x1f6f0,), # Satellite - (0x1f6f3, 0x1f6f3,), # Passenger Ship + (0x0231a, 0x0231b,), # Watch ..Hourglass + (0x023e9, 0x023ec,), # Black Right-pointing Dou..Black Down-pointing Doub + (0x023f0, 0x023f0,), # Alarm Clock + (0x023f3, 0x023f3,), # Hourglass With Flowing Sand + (0x025fd, 0x025fe,), # White Medium Small Squar..Black Medium Small Squar + (0x02614, 0x02615,), # Umbrella With Rain Drops..Hot Beverage + (0x02648, 0x02653,), # Aries ..Pisces + (0x0267f, 0x0267f,), # Wheelchair Symbol + (0x02693, 0x02693,), # Anchor + (0x026a1, 0x026a1,), # High Voltage Sign + (0x026aa, 0x026ab,), # Medium White Circle ..Medium Black Circle + (0x026bd, 0x026be,), # Soccer Ball ..Baseball + (0x026c4, 0x026c5,), # Snowman Without Snow ..Sun Behind Cloud + (0x026ce, 0x026ce,), # Ophiuchus + (0x026d4, 0x026d4,), # No Entry + (0x026ea, 0x026ea,), # Church + (0x026f2, 0x026f3,), # Fountain ..Flag In Hole + (0x026f5, 0x026f5,), # Sailboat + (0x026fa, 0x026fa,), # Tent + (0x026fd, 0x026fd,), # Fuel Pump + (0x02705, 0x02705,), # White Heavy Check Mark + (0x0270a, 0x0270b,), # Raised Fist ..Raised Hand + (0x02728, 0x02728,), # Sparkles + (0x0274c, 0x0274c,), # Cross Mark + (0x0274e, 0x0274e,), # Negative Squared Cross Mark + (0x02753, 0x02755,), # Black Question Mark Orna..White Exclamation Mark O + (0x02757, 0x02757,), # Heavy Exclamation Mark Symbol + (0x02795, 0x02797,), # Heavy Plus Sign ..Heavy Division Sign + (0x027b0, 0x027b0,), # Curly Loop + (0x027bf, 0x027bf,), # Double Curly Loop + (0x02b1b, 0x02b1c,), # Black Large Square ..White Large Square + (0x02b50, 0x02b50,), # White Medium Star + (0x02b55, 0x02b55,), # Heavy Large Circle + (0x03030, 0x03030,), # Wavy Dash + (0x0303d, 0x0303d,), # Part Alternation Mark + (0x03297, 0x03297,), # Circled Ideograph Congratulation + (0x03299, 0x03299,), # Circled Ideograph Secret + (0x1f004, 0x1f004,), # Mahjong Tile Red Dragon + (0x1f202, 0x1f202,), # Squared Katakana Sa + (0x1f21a, 0x1f21a,), # Squared Cjk Unified Ideograph-7121 + (0x1f22f, 0x1f22f,), # Squared Cjk Unified Ideograph-6307 + (0x1f237, 0x1f237,), # Squared Cjk Unified Ideograph-6708 + (0x1f30d, 0x1f30f,), # Earth Globe Europe-afric..Earth Globe Asia-austral + (0x1f315, 0x1f315,), # Full Moon Symbol + (0x1f31c, 0x1f31c,), # Last Quarter Moon With Face + (0x1f378, 0x1f378,), # Cocktail Glass + (0x1f393, 0x1f393,), # Graduation Cap + (0x1f3a7, 0x1f3a7,), # Headphone + (0x1f3ac, 0x1f3ae,), # Clapper Board ..Video Game + (0x1f3c2, 0x1f3c2,), # Snowboarder + (0x1f3c4, 0x1f3c4,), # Surfer + (0x1f3c6, 0x1f3c6,), # Trophy + (0x1f3ca, 0x1f3ca,), # Swimmer + (0x1f3e0, 0x1f3e0,), # House Building + (0x1f3ed, 0x1f3ed,), # Factory + (0x1f408, 0x1f408,), # Cat + (0x1f415, 0x1f415,), # Dog + (0x1f41f, 0x1f41f,), # Fish + (0x1f426, 0x1f426,), # Bird + (0x1f442, 0x1f442,), # Ear + (0x1f446, 0x1f449,), # White Up Pointing Backha..White Right Pointing Bac + (0x1f44d, 0x1f44e,), # Thumbs Up Sign ..Thumbs Down Sign + (0x1f453, 0x1f453,), # Eyeglasses + (0x1f46a, 0x1f46a,), # Family + (0x1f47d, 0x1f47d,), # Extraterrestrial Alien + (0x1f4a3, 0x1f4a3,), # Bomb + (0x1f4b0, 0x1f4b0,), # Money Bag + (0x1f4b3, 0x1f4b3,), # Credit Card + (0x1f4bb, 0x1f4bb,), # Personal Computer + (0x1f4bf, 0x1f4bf,), # Optical Disc + (0x1f4cb, 0x1f4cb,), # Clipboard + (0x1f4da, 0x1f4da,), # Books + (0x1f4df, 0x1f4df,), # Pager + (0x1f4e4, 0x1f4e6,), # Outbox Tray ..Package + (0x1f4ea, 0x1f4ed,), # Closed Mailbox With Lowe..Open Mailbox With Lowere + (0x1f4f7, 0x1f4f7,), # Camera + (0x1f4f9, 0x1f4fb,), # Video Camera ..Radio + (0x1f508, 0x1f508,), # Speaker + (0x1f50d, 0x1f50d,), # Left-pointing Magnifying Glass + (0x1f512, 0x1f513,), # Lock ..Open Lock + (0x1f550, 0x1f567,), # Clock Face One Oclock ..Clock Face Twelve-thirty + (0x1f610, 0x1f610,), # Neutral Face + (0x1f687, 0x1f687,), # Metro + (0x1f68d, 0x1f68d,), # Oncoming Bus + (0x1f691, 0x1f691,), # Ambulance + (0x1f694, 0x1f694,), # Oncoming Police Car + (0x1f698, 0x1f698,), # Oncoming Automobile + (0x1f6ad, 0x1f6ad,), # No Smoking Symbol + (0x1f6b2, 0x1f6b2,), # Bicycle + (0x1f6b9, 0x1f6ba,), # Mens Symbol ..Womens Symbol + (0x1f6bc, 0x1f6bc,), # Baby Symbol ), } From c5ec9a59fdd04e0226d9289c1ba2f5335d5aada6 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Tue, 21 Oct 2025 10:54:57 -0400 Subject: [PATCH 6/8] Refactor tests and update-tables a small bit --- bin/update-tables.py | 12 +-- tests/test_emojis.py | 174 +++++++++++++++++++------------------------ 2 files changed, 82 insertions(+), 104 deletions(-) diff --git a/bin/update-tables.py b/bin/update-tables.py index bb5cb88..0edd068 100644 --- a/bin/update-tables.py +++ b/bin/update-tables.py @@ -68,6 +68,8 @@ *range(0xD7B0, 0xD800), # Hangul Jungseong O-Yeo .. Undefined Character of Hangul Jamo Extended-B ) +HEX_STR_VS15 = 'FE0E' +HEX_STR_VS16 = 'FE0F' def _bisearch(ucs, table): """A copy of wcwwidth._bisearch, to prevent having issues when depending on code that imports @@ -433,7 +435,6 @@ def fetch_table_vs16_data() -> UnicodeTableRenderCtx: """ table: dict[UnicodeVersion, TableDef] = {} unicode_latest = fetch_unicode_versions()[-1] - hex_str_vs = 'FE0F' wide_tables = fetch_table_wide_data().table unicode_version = UnicodeVersion.parse('9.0.0') @@ -442,13 +443,13 @@ def fetch_table_vs16_data() -> UnicodeTableRenderCtx: # 15.1.0) and parse a single file for all individual releases table[unicode_version] = parse_vs_data(fname=UnicodeDataFile.EmojiVariationSequences(unicode_latest), ubound_unicode_version=unicode_version, - hex_str_vs=hex_str_vs) + hex_str_vs=HEX_STR_VS16) # parse and join the final emoji release 12.0 of the earlier "type" table[unicode_version].values.update( parse_vs_data(fname=UnicodeDataFile.LegacyEmojiVariationSequences(), ubound_unicode_version=unicode_version, - hex_str_vs=hex_str_vs).values) + hex_str_vs=HEX_STR_VS16).values) # perform culling on any values that are already understood as 'wide' # without the variation-16 selector @@ -488,7 +489,6 @@ def fetch_table_vs15_data() -> UnicodeTableRenderCtx: """ table: dict[UnicodeVersion, TableDef] = {} unicode_latest = fetch_unicode_versions()[-1] - hex_str_vs = 'FE0E' wide_tables = fetch_table_wide_data().table unicode_version = UnicodeVersion.parse('9.0.0') @@ -497,13 +497,13 @@ def fetch_table_vs15_data() -> UnicodeTableRenderCtx: # 15.1.0) and parse a single file for all individual releases table[unicode_version] = parse_vs_data(fname=UnicodeDataFile.EmojiVariationSequences(unicode_latest), ubound_unicode_version=unicode_version, - hex_str_vs=hex_str_vs) + hex_str_vs=HEX_STR_VS15) # parse and join the final emoji release 12.0 of the earlier "type" table[unicode_version].values.update( parse_vs_data(fname=UnicodeDataFile.LegacyEmojiVariationSequences(), ubound_unicode_version=unicode_version, - hex_str_vs=hex_str_vs).values) + hex_str_vs=HEX_STR_VS15).values) # perform culling on any values that are already understood as 'narrow' # without the variation-15 selector diff --git a/tests/test_emojis.py b/tests/test_emojis.py index e6dbd0d..6c402e4 100644 --- a/tests/test_emojis.py +++ b/tests/test_emojis.py @@ -174,133 +174,111 @@ def test_recommended_emoji_zwj_sequences(): assert num >= 1468 -def test_recommended_variation_16_sequences(): +@pytest.mark.parametrize('vs_char,expected_width', [ + ('\ufe0f', 2), + ('\ufe0e', 1), +]) +def test_recommended_variation_sequences(vs_char, expected_width): """ - Test wcswidth of vs-16 sequences from unicode.org's emoji-variation-sequences.txt + Test wcswidth of variation selector sequences from emoji-variation-sequences.txt """ - # given, lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt') errors = [] num = 0 for sequence, line in zip(sequences, lines): num += 1 - if '\ufe0f' not in sequence: - # filter for only \uFE0F (VS-16) + if vs_char not in sequence: continue measured_width = wcwidth.wcswidth(sequence) - if measured_width != 2: + if measured_width != expected_width: errors.append({ - 'expected_width': 2, + 'expected_width': expected_width, 'line': line, - 'measured_width': wcwidth.wcswidth(sequence), + 'measured_width': measured_width, 'sequence': sequence, }) - # verify assert errors == [] assert num >= 742 -def test_recommended_variation_15_sequences(): - """ - Test wcswidth of vs-15 sequences from unicode.org's emoji-variation-sequences.txt +@pytest.mark.parametrize('unicode_version,base_char,vs_char,base_width,expect_phrase_width', [ + ('9.0', '\u2640', '\uFE0F', 1, 3), + ('9.0', '\U0001f4da', '\uFE0E', 2, 2), + ('8.0', '\u2640', '\uFE0F', 1, 2), + ('8.0', '\U0001f4da', '\uFE0E', 1, 2), +]) +def test_variation_selector_unicode_version(unicode_version, base_char, vs_char, base_width, expect_phrase_width): """ - # given, - lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt') - - errors = [] - num = 0 - for sequence, line in zip(sequences, lines): - num += 1 - if '\ufe0e' not in sequence: - # filter for only \uFE0E (VS-15) - continue - measured_width = wcwidth.wcswidth(sequence) - if measured_width != 1: - errors.append({ - 'expected_width': 1, - 'line': line, - 'measured_width': wcwidth.wcswidth(sequence), - 'sequence': sequence, - }) - - # verify - assert errors == [] - assert num >= 742 - - -def test_unicode_9_vs16(): - """Verify effect of VS-16 on unicode_version 9.0 and later""" - phrase = ("\u2640" # FEMALE SIGN - "\uFE0F" # VARIATION SELECTOR-16 - "X" # ASCII Letter 'X' - "\uFE0F") # VARIATION SELECTOR-16 - - expect_length_each = (1, 0, 1, 0) - expect_length_phrase = 3 - - # exercise, - length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase) - length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0') - - # verify. - assert length_each == expect_length_each - assert length_phrase == expect_length_phrase - - -def test_unicode_9_vs15(): - """Verify effect of VS-15 on unicode_version 9.0 and later""" - phrase = ("\U0001f4da" # BOOKS - "\uFE0E" # VARIATION SELECTOR-15 - "X" # ASCII Letter 'X' - "\uFE0E") # VARIATION SELECTOR-15 + Test variation selector behavior across Unicode versions. - expect_length_each = (2, 0, 1, 0) - expect_length_phrase = 2 + VS-16 and VS-15 should affect width in Unicode 9.0+, but not in 8.0 and earlier. + """ + phrase = base_char + vs_char + "X" + vs_char + expect_length_each = (base_width, 0, 1, 0) - # exercise, - length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase) - length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0') + length_each = tuple(wcwidth.wcwidth(w_char, unicode_version=unicode_version) for w_char in phrase) + length_phrase = wcwidth.wcswidth(phrase, unicode_version=unicode_version) - # verify. assert length_each == expect_length_each - assert length_phrase == expect_length_phrase - - -def test_unicode_8_vs16(): - """Verify that VS-16 has no effect on unicode_version 8.0 and earlier""" - phrase = ("\u2640" # FEMALE SIGN - "\uFE0F" # VARIATION SELECTOR-16 - "X" # ASCII Letter 'X' - "\uFE0F") # VARIATION SELECTOR-16 + assert length_phrase == expect_phrase_width + + +@pytest.mark.parametrize('char,expected_base_width,expected_vs15_width,description', [ + ('\u231A', 2, 1, 'WATCH'), + ('\u231B', 2, 1, 'HOURGLASS'), + ('\u2648', 2, 1, 'ARIES'), + ('\u26A1', 2, 1, 'HIGH VOLTAGE SIGN'), + ('\U0001F4DA', 2, 1, 'BOOKS'), + ('\U0001F3E0', 2, 1, 'HOUSE BUILDING'), + ('\u0023', 1, 1, 'NUMBER SIGN'), + ('\u002A', 1, 1, 'ASTERISK'), + ('\u00A9', 1, 1, 'COPYRIGHT SIGN'), +]) +def test_vs15_width_effects(char, expected_base_width, expected_vs15_width, description): + """ + Test VS-15 width effects on various characters. - expect_length_each = (1, 0, 1, 0) - expect_length_phrase = 2 + Wide chars (2→1): VS-15 converts to narrow text presentation + Narrow chars (1→1): VS-15 has no effect, already narrow + """ + width_alone = wcwidth.wcswidth(char, unicode_version='9.0') + width_with_vs15 = wcwidth.wcswidth(char + '\uFE0E', unicode_version='9.0') - # exercise, - length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase) - length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0') + assert width_alone == expected_base_width + assert width_with_vs15 == expected_vs15_width - # verify. - assert length_each == expect_length_each - assert length_phrase == expect_length_phrase +def test_vs15_vs16_symmetry(): + """Verify VS-15 and VS-16 have symmetric opposite effects on dual-presentation chars""" + watch = '\u231A' -def test_unicode_8_vs15(): - """Verify that VS-15 has no effect on unicode_version 8.0 and earlier""" - phrase = ("\U0001f4da" # BOOKS - "\uFE0E" # VARIATION SELECTOR-15 - "X" # ASCII Letter 'X' - "\uFE0E") # VARIATION SELECTOR-15 + width_base = wcwidth.wcswidth(watch, unicode_version='9.0') + width_vs15 = wcwidth.wcswidth(watch + '\uFE0E', unicode_version='9.0') + width_vs16 = wcwidth.wcswidth(watch + '\uFE0F', unicode_version='9.0') - expect_length_each = (1, 0, 1, 0) - expect_length_phrase = 2 + assert width_base == 2 + assert width_vs15 == 1 + assert width_vs16 == 2 - # exercise, - length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase) - length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0') - # verify. - assert length_each == expect_length_each - assert length_phrase == expect_length_phrase +def test_vs15_multiple_in_sequence(): + """Verify multiple VS-15 applications in a single string""" + phrase = ( + '\u231A\uFE0E' # WATCH + VS15 (wide -> narrow) + 'X' # ASCII + '\U0001F4DA\uFE0E' # BOOKS + VS15 (wide -> narrow) + 'Y' # ASCII + '\u2648\uFE0E' # ARIES + VS15 (wide -> narrow) + ) + + width = wcwidth.wcswidth(phrase, unicode_version='9.0') + assert width == 5 + + +def test_vs15_without_preceding_char(): + """Verify VS-15 without a preceding measurable character has width 0""" + phrase = '\uFE0E' + width = wcwidth.wcwidth(phrase, unicode_version='9.0') + assert width == 0 From df7f69d0bbe0b9150c46718122bbeb3db2881255 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 9 Nov 2025 17:47:56 -0500 Subject: [PATCH 7/8] update wcwidth-browser and prepare note for next release --- bin/update-tables.py | 2 +- bin/wcwidth-browser.py | 280 +++++++++++++++++++++++++++++++++++++---- docs/intro.rst | 7 ++ 3 files changed, 264 insertions(+), 25 deletions(-) diff --git a/bin/update-tables.py b/bin/update-tables.py index 0edd068..e01392b 100644 --- a/bin/update-tables.py +++ b/bin/update-tables.py @@ -737,7 +737,7 @@ def replace_if_modified(new_filename: str, original_filename: str) -> None: significant_changes = False for line in diff_lines: if (line.startswith(('@@', '---', '+++')) or - (line.startswith(('-','+')) and 'This code generated' in line)): + (line.startswith(('-', '+')) and 'This code generated' in line)): continue else: significant_changes = line.startswith(('-', '+')) diff --git a/bin/wcwidth-browser.py b/bin/wcwidth-browser.py index 545f911..3865419 100755 --- a/bin/wcwidth-browser.py +++ b/bin/wcwidth-browser.py @@ -9,14 +9,22 @@ Usage: ./bin/wcwidth-browser.py [--wide=] [--alignment=] - [--combining] + [--combining | --vs15 | --vs16] + [--without-vs] [--help] Options: --wide= Browser 1 or 2 character-wide cells. --alignment= Chose left or right alignment. [default: left] - --combining Use combining character generator. [default: 2] + --combining Use combining character generator. + --vs15 Browse emoji variation sequences with VS-15 (text style). + --vs16 Browse emoji variation sequences with VS-16 (emoji style). + --without-vs Display base characters without variation selector. --help Display usage + +Note: + Only one of --combining, --vs15, or --vs16 can be used at a time. + The --without-vs option only applies when using --vs15 or --vs16. """ # pylint: disable=C0103,W0622 # Invalid constant name "echo" @@ -24,6 +32,7 @@ # Invalid module name "wcwidth-browser" # std imports +import os import sys import signal import string @@ -148,6 +157,100 @@ def __next__(self): return (ucs, name) +class WcVariationSequenceGenerator: + """Generator yields emoji variation sequences from emoji-variation-sequences.txt.""" + + # pylint: disable=R0903 + # Too few public methods (0/2) + + def __init__(self, base_width, unicode_version, variation_selector='VS15'): + """ + Class constructor. + + :param int base_width: filter by base character width (1 or 2). + :param str unicode_version: Unicode version. + :param str variation_selector: 'VS15' or 'VS16'. + """ + self.sequences = [] + + # Determine which variation selector we're looking for + vs_hex = 'FE0E' if variation_selector == 'VS15' else 'FE0F' + + # Find the emoji-variation-sequences.txt file + script_dir = os.path.dirname(os.path.abspath(__file__)) + filepath = os.path.join(script_dir, '..', 'tests', 'emoji-variation-sequences.txt') + + try: + with open(filepath, 'r', encoding='utf-8') as f: + for line in f: + # Skip comments and empty lines + if line.startswith('#') or not line.strip(): + continue + + # Only process lines with our target variation selector + if vs_hex not in line: + continue + + # Parse line format: "0023 FE0E ; text style; # (1.1) NUMBER SIGN" + parts = line.split(';') + if len(parts) < 2: + continue + + codepoints = parts[0].strip().split() + if len(codepoints) < 2: + continue + + try: + base_cp = int(codepoints[0], 16) + vs_cp = int(codepoints[1], 16) + except ValueError: + continue + + # Check base character width matches our filter + if wcwidth(chr(base_cp), unicode_version=unicode_version) != base_width: + continue + + # Extract name from comment + comment_parts = line.split('#') + if len(comment_parts) >= 2: + # Format: "# (1.1) NUMBER SIGN" + name_part = comment_parts[1].strip() + # Remove version info like "(1.1) " + if ')' in name_part: + name = name_part.split(')', 1)[1].strip() + else: + name = name_part + name = string.capwords(name) + else: + name = "UNKNOWN" + + # Create the variation sequence + sequence = chr(base_cp) + chr(vs_cp) + self.sequences.append((sequence, name)) + + except FileNotFoundError: + # If file not found, just have empty sequences + pass + + self.sequences.reverse() + + def __iter__(self): + """Special method called by iter().""" + return self + + def __next__(self): + """ + Special method called by next(). + + :return: variation sequence and name, as tuple. + :rtype: tuple[str, str] + :raises StopIteration: no more sequences + """ + if not self.sequences: + raise StopIteration + return self.sequences.pop() + + class Style: """Styling decorator class instance for terminal output.""" @@ -266,7 +369,8 @@ class Pager: #: screen state for next draw method(s). STATE_CLEAN, STATE_DIRTY, STATE_REFRESH = 0, 1, 2 - def __init__(self, term, screen, character_factory): + def __init__(self, term, screen, character_factory, variation_selector=None, + show_variation_selector=True): """ Class constructor. @@ -276,10 +380,17 @@ def __init__(self, term, screen, character_factory): :type screen: Screen :param character_factory: Character factory generator. :type character_factory: callable returning iterable. + :param variation_selector: Variation selector mode ('VS15', 'VS16', or None). + :type variation_selector: str or None + :param show_variation_selector: Whether to display variation selector in VS mode. + :type show_variation_selector: bool """ self.term = term self.screen = screen self.character_factory = character_factory + self.variation_selector = variation_selector + self.show_variation_selector = show_variation_selector + self.base_width_filter = screen.wide # For VS mode filtering self.unicode_version = 'auto' self.dirty = self.STATE_REFRESH self.last_page = 0 @@ -311,8 +422,15 @@ def initialize_page_data(self): # pylint: disable=attribute-defined-outside-init if self.term.is_a_tty: self.display_initialize() - self.character_generator = self.character_factory( - self.screen.wide, self.unicode_version) + + # Use variation sequence generator if in VS mode + if self.variation_selector: + self.character_generator = WcVariationSequenceGenerator( + self.base_width_filter, self.unicode_version, self.variation_selector) + else: + self.character_generator = self.character_factory( + self.screen.wide, self.unicode_version) + self._page_data = list() while True: try: @@ -429,19 +547,68 @@ def process_keystroke(self, inp, idx, offset): def _process_keystroke_commands(self, inp): """Process keystrokes that issue commands (side effects).""" - if inp in ('1', '2') and self.screen.wide != int(inp): - # change between 1 or 2-character wide mode. - self.screen.wide = int(inp) - self.initialize_page_data() - self.on_resize(None, None) + if inp in ('1', '2'): + new_width = int(inp) + if self.variation_selector: + # In VS mode, change base width filter + if self.base_width_filter != new_width: + self.base_width_filter = new_width + # If showing without VS, also update display width + if not self.show_variation_selector: + self.screen.wide = new_width + self.initialize_page_data() + self.on_resize(None, None) + else: + # In normal mode, change display width + if self.screen.wide != new_width: + self.screen.wide = new_width + self.initialize_page_data() + self.on_resize(None, None) + elif inp == '5': + # Switch to VS-15 mode + if self.variation_selector != 'VS15': + self.variation_selector = 'VS15' + self.base_width_filter = 1 # Default to narrow base + # Display width depends on whether showing with or without VS + if self.show_variation_selector: + self.screen.wide = 1 # VS-15 displays at width 1 + else: + self.screen.wide = self.base_width_filter # Use base width + self.initialize_page_data() + self.on_resize(None, None) + elif inp == '6': + # Switch to VS-16 mode + if self.variation_selector != 'VS16': + self.variation_selector = 'VS16' + self.base_width_filter = 1 # Default to narrow base + # Display width depends on whether showing with or without VS + if self.show_variation_selector: + self.screen.wide = 2 # VS-16 displays at width 2 + else: + self.screen.wide = self.base_width_filter # Use base width + self.initialize_page_data() + self.on_resize(None, None) elif inp == 'c': - # switch on/off combining characters + # Switch on/off combining characters, clear VS mode + self.variation_selector = None self.character_factory = ( WcWideCharacterGenerator if self.character_factory != WcWideCharacterGenerator else WcCombinedCharacterGenerator) self.initialize_page_data() self.on_resize(None, None) + elif inp == 'w': + # Toggle showing variation selector (only in VS mode) + if self.variation_selector: + self.show_variation_selector = not self.show_variation_selector + # Update display width based on whether we're showing VS or not + if self.show_variation_selector: + # Showing with VS: use VS-determined width + self.screen.wide = 1 if self.variation_selector == 'VS15' else 2 + else: + # Showing without VS: use base character width + self.screen.wide = self.base_width_filter + self.on_resize(None, None) elif inp in ('_', '-'): # adjust name length -2 nlen = max(1, self.screen.style.name_len - 2) @@ -550,6 +717,25 @@ def draw_heading(self, writer): return True return False + def mode_label(self): + """ + Return a label describing the current browsing mode. + + :return: Mode label string. + :rtype: str + """ + if self.variation_selector: + # VS mode: show base width + VS type + with/without VS + width_label = "NARROW" if self.base_width_filter == 1 else "WIDE" + vs_display = "W/VS" if self.show_variation_selector else "WO/VS" + return f"{width_label}+{self.variation_selector}+{vs_display}" + elif self.character_factory == WcCombinedCharacterGenerator: + # Combining mode + return "COMBINING" + else: + # Normal mode: show display width + return "NARROW" if self.screen.wide == 1 else "WIDE" + def draw_status(self, writer, idx): """ Conditionally draw status bar when output terminal is a tty. @@ -566,11 +752,16 @@ def draw_status(self, writer, idx): last_end = '(END)' else: last_end = f'/{self.last_page}' - txt = ('Page {idx}{last_end} - ' + + # Get current mode label + mode = self.mode_label() + + txt = ('Page {idx}{last_end} - [{mode}] - ' '{q} to quit, [keys: {keyset}]' .format(idx=style.attr_minor(f'{idx}'), last_end=style.attr_major(last_end), - keyset=style.attr_major('kjfbvc12-='), + mode=style.attr_major(mode), + keyset=style.attr_major('kjfbvc1256w-='), q=style.attr_minor('q'))) writer(self.term.center(txt).rstrip()) @@ -636,14 +827,20 @@ def text_entry(self, ucs, name): '{name:<{name_len}s}')) delimiter = style.attr_minor(style.delimiter) if len(ucs) != 1: - # determine display of combining characters - val = ord(ucs[1]) - # a combining character displayed of any fg color - # will reset the foreground character of the cell - # combined with (iTerm2, OSX). - disp_ucs = style.attr_major(ucs[0:2]) - if len(ucs) > 2: - disp_ucs += ucs[2] + # Variation sequence or combining character + if self.variation_selector and not self.show_variation_selector: + # VS mode, showing without variation selector - display only base + val = ord(ucs[0]) + disp_ucs = style.attr_major(ucs[0]) + else: + # Combining character or VS mode with variation selector shown + val = ord(ucs[1]) + # a combining character displayed of any fg color + # will reset the foreground character of the cell + # combined with (iTerm2, OSX). + disp_ucs = style.attr_major(ucs[0:2]) + if len(ucs) > 2: + disp_ucs += ucs[2] else: # non-combining val = ord(ucs) @@ -668,9 +865,38 @@ def validate_args(opts): else: assert opts['--alignment'] in ('left', 'right'), opts['--alignment'] opts['--wide'] = int(opts['--wide']) + + # Ensure mutual exclusivity of --combining, --vs15, and --vs16 + exclusive_opts = [opts.get('--combining', False), + opts.get('--vs15', False), + opts.get('--vs16', False)] + assert sum(bool(opt) for opt in exclusive_opts) <= 1, \ + "Only one of --combining, --vs15, or --vs16 can be used" + + # Set character factory and variation selector opts['character_factory'] = WcWideCharacterGenerator - if opts['--combining']: + opts['variation_selector'] = None + opts['base_width_filter'] = opts['--wide'] # Save base width filter + opts['display_width'] = opts['--wide'] # Default display width + opts['show_variation_selector'] = not opts.get('--without-vs', False) + + if opts.get('--combining'): opts['character_factory'] = WcCombinedCharacterGenerator + elif opts.get('--vs15'): + opts['variation_selector'] = 'VS15' + # Display width depends on whether showing with or without VS + if opts['show_variation_selector']: + opts['display_width'] = 1 # VS-15 displays at width 1 + else: + opts['display_width'] = opts['base_width_filter'] # Use base width + elif opts.get('--vs16'): + opts['variation_selector'] = 'VS16' + # Display width depends on whether showing with or without VS + if opts['show_variation_selector']: + opts['display_width'] = 2 # VS-16 displays at width 2 + else: + opts['display_width'] = opts['base_width_filter'] # Use base width + return opts @@ -687,8 +913,14 @@ def main(opts): alignment=opts['--alignment']) style.name_len = 10 - screen = Screen(term, style, wide=opts['--wide']) - pager = Pager(term, screen, opts['character_factory']) + screen = Screen(term, style, wide=opts['display_width']) + pager = Pager(term, screen, opts['character_factory'], + variation_selector=opts['variation_selector'], + show_variation_selector=opts['show_variation_selector']) + + # Set base width filter from command-line argument + if opts['variation_selector']: + pager.base_width_filter = opts['base_width_filter'] with term.location(), term.cbreak(), \ term.fullscreen(), term.hidden_cursor(): diff --git a/docs/intro.rst b/docs/intro.rst index aa049df..5a6df36 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -221,6 +221,12 @@ History * **Bugfix** accounting of some kinds of emoji sequences using U+FE0E Variation Selector 15 (`PR #120`_). +0.2.15 *2025-11-09* + * **Bugfix** specification and code for Variation Selector 15 (VS-15, U+FE0E) + support. `PR #120`_. + * **Updated** `bin/wcwidth-browser.py`_ with arguments and modes, ``--vs15``, ``--vs16``, + and ``--without-vs``. + 0.2.14 *2025-09-22* * **Drop Support** for Python 2.7 and 3.5. `PR #117`_. * **Update** tables to include Unicode Specifications 16.0.0 and 17.0.0. @@ -351,6 +357,7 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c:: .. _`PR #117`: https://github.com/jquast/wcwidth/pull/117 .. _`PR #146`: https://github.com/jquast/wcwidth/pull/146 .. _`PR #149`: https://github.com/jquast/wcwidth/pull/149 +.. _`PR #120`: https://github.com/jquast/wcwidth/pull/149 .. _`Issue #101`: https://github.com/jquast/wcwidth/issues/101 .. _`jquast/blessed`: https://github.com/jquast/blessed .. _`selectel/pyte`: https://github.com/selectel/pyte From 0f0df47c9990a47ceb3049255cfb69ccde8bf135 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 9 Nov 2025 19:27:57 -0500 Subject: [PATCH 8/8] remove 'docopt' requirement --- bin/wcwidth-browser.py | 130 ++++++++++++++++++++++++++++++++++++++- requirements-develop.txt | 1 - 2 files changed, 127 insertions(+), 4 deletions(-) diff --git a/bin/wcwidth-browser.py b/bin/wcwidth-browser.py index 3865419..eac3c80 100755 --- a/bin/wcwidth-browser.py +++ b/bin/wcwidth-browser.py @@ -22,9 +22,42 @@ --without-vs Display base characters without variation selector. --help Display usage +Interactive Keys: + Navigation: + k, y, UP Scroll backward 1 line + j, e, ENTER, DOWN Scroll forward 1 line + f, SPACE, PGDOWN Scroll forward 1 page + b, PGUP Scroll backward 1 page + F, SHIFT-DOWN Scroll forward 10 pages + B, SHIFT-UP Scroll backward 10 pages + HOME Go to top + G, END Go to bottom + Ctrl-L Refresh screen + + Mode Switching: + 0 Exit VS mode (return to normal mode) + 1 Narrow width (normal) / Narrow base filter (VS mode) + 2 Wide width (normal) / Wide base filter (VS mode) + 5 Switch to VS-15 mode (text style) + 6 Switch to VS-16 mode (emoji style) + c Toggle combining character mode + w Toggle with/without variation selector (VS mode only) + + Display Adjustment: + -, _ Decrease character name display length by 2 + +, = Increase character name display length by 2 + v Select Unicode version + + Exit: + q, Q Quit browser + Note: Only one of --combining, --vs15, or --vs16 can be used at a time. The --without-vs option only applies when using --vs15 or --vs16. + + In VS mode, the display shows: + - W/VS: Characters displayed with variation selector + - WO/VS: Base characters displayed without variation selector """ # pylint: disable=C0103,W0622 # Invalid constant name "echo" @@ -36,11 +69,11 @@ import sys import signal import string +import argparse import functools import unicodedata # 3rd party -import docopt import blessed # local @@ -564,6 +597,13 @@ def _process_keystroke_commands(self, inp): self.screen.wide = new_width self.initialize_page_data() self.on_resize(None, None) + elif inp == '0': + # Exit VS mode, return to normal mode + if self.variation_selector: + self.variation_selector = None + # Keep current display width (screen.wide stays as is) + self.initialize_page_data() + self.on_resize(None, None) elif inp == '5': # Switch to VS-15 mode if self.variation_selector != 'VS15': @@ -761,7 +801,7 @@ def draw_status(self, writer, idx): .format(idx=style.attr_minor(f'{idx}'), last_end=style.attr_major(last_end), mode=style.attr_major(mode), - keyset=style.attr_major('kjfbvc1256w-='), + keyset=style.attr_major('kjfbvc01256w-='), q=style.attr_minor('q'))) writer(self.term.center(txt).rstrip()) @@ -928,5 +968,89 @@ def main(opts): return 0 +def parse_args(): + """Parse command-line arguments using argparse.""" + # Extract description and usage from module docstring + doc_lines = __doc__.split('\n') + description = [] + for line in doc_lines: + if line.strip() and not line.startswith('Usage:'): + description.append(line) + if line.startswith('Usage:'): + break + + parser = argparse.ArgumentParser( + description='A terminal browser for testing printable width of unicode.', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Interactive Keys: + Navigation: + k, y, UP Scroll backward 1 line + j, e, ENTER, DOWN Scroll forward 1 line + f, SPACE, PGDOWN Scroll forward 1 page + b, PGUP Scroll backward 1 page + F, SHIFT-DOWN Scroll forward 10 pages + B, SHIFT-UP Scroll backward 10 pages + HOME Go to top + G, END Go to bottom + Ctrl-L Refresh screen + + Mode Switching: + 0 Exit VS mode (return to normal mode) + 1 Narrow width (normal) / Narrow base filter (VS mode) + 2 Wide width (normal) / Wide base filter (VS mode) + 5 Switch to VS-15 mode (text style) + 6 Switch to VS-16 mode (emoji style) + c Toggle combining character mode + w Toggle with/without variation selector (VS mode only) + + Display Adjustment: + -, _ Decrease character name display length by 2 + +, = Increase character name display length by 2 + v Select Unicode version + + Exit: + q, Q Quit browser + +Notes: + Only one of --combining, --vs15, or --vs16 can be used at a time. + The --without-vs option only applies when using --vs15 or --vs16. + + In VS mode, the display shows: + - W/VS: Characters displayed with variation selector + - WO/VS: Base characters displayed without variation selector +""") + + parser.add_argument('--wide', metavar='', type=str, default=None, + help='Browser 1 or 2 character-wide cells.') + parser.add_argument('--alignment', metavar='', type=str, default='left', + help='Choose left or right alignment. (default: left)') + + # Mutually exclusive group for mode selection + mode_group = parser.add_mutually_exclusive_group() + mode_group.add_argument('--combining', action='store_true', + help='Use combining character generator.') + mode_group.add_argument('--vs15', action='store_true', + help='Browse emoji variation sequences with VS-15 (text style).') + mode_group.add_argument('--vs16', action='store_true', + help='Browse emoji variation sequences with VS-16 (emoji style).') + + parser.add_argument('--without-vs', action='store_true', + help='Display base characters without variation selector.') + + args = parser.parse_args() + + # Convert to docopt-style dict format for compatibility with validate_args + return { + '--wide': args.wide, + '--alignment': args.alignment, + '--combining': args.combining, + '--vs15': args.vs15, + '--vs16': args.vs16, + '--without-vs': args.without_vs, + '--help': False, # argparse handles this automatically + } + + if __name__ == '__main__': - sys.exit(main(validate_args(docopt.docopt(__doc__)))) + sys.exit(main(validate_args(parse_args()))) diff --git a/requirements-develop.txt b/requirements-develop.txt index 018a884..b34b5d1 100644 --- a/requirements-develop.txt +++ b/requirements-develop.txt @@ -1,3 +1,2 @@ # This is just for the bin/wcwidth-browser.py script blessed>=1.14.1,<2 -docopt==0.6.2