From f8a6de1819e82165216e01e247364cd952799426 Mon Sep 17 00:00:00 2001 From: "R. Menon" Date: Sat, 15 Mar 2025 18:14:23 -0700 Subject: [PATCH] Recognize Greek letters in patterns --- mathematica/lexer.py | 7 +++++-- setup.py | 2 +- tests/test_lexer.py | 26 +++++++++++++++++++++++++- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/mathematica/lexer.py b/mathematica/lexer.py index a09708a..711cdd1 100644 --- a/mathematica/lexer.py +++ b/mathematica/lexer.py @@ -11,7 +11,8 @@ class Regex: - IDENTIFIER = r'[a-zA-Z\$][a-zA-Z0-9\$]*' + UNICODE = mma.UNICODE_SYSTEM_UNDEFINED_SYMBOLS.union(mma.UNICODE_SYSTEM_SYMBOLS) + IDENTIFIER = r'[a-zA-ZΑ-Ωα-ω\${unicode}][a-zA-ZΑ-Ωα-ω0-9\${unicode}]*'.format(unicode=''.join(UNICODE)) NAMED_CHARACTER = r'\\\[{identifier}]'.format(identifier=IDENTIFIER) SYMBOLS = (r'[`]?({identifier}|{named_character})(`({identifier}|{named_character}))*[`]?' .format(identifier=IDENTIFIER, named_character=NAMED_CHARACTER)) @@ -20,7 +21,6 @@ class Regex: REAL = r'({integer}|{float})`({integer}|{float})?|{float}'.format(integer=INTEGER, float=FLOAT) BASE_NUMBER = r'{integer}\s*\^\^\s*({real}|{integer})'.format(integer=INTEGER, real=REAL) SCIENTIFIC_NUMBER = r'({real}|{integer})\s*\*\^\s*{integer}'.format(real=REAL, integer=INTEGER) - # PATTERNS = r'{symbol}\_{{1,3}}({symbol})?|({symbol})?\_{{1,3}}{symbol}'.format(symbol=SYMBOLS) PATTERNS = r'{symbol}:?\_{{1,3}}({symbol})?|({symbol})?:?\_{{1,3}}{symbol}'.format(symbol=SYMBOLS) SLOTS = r'#{symbol}|#\"{symbol}\"|#{{1,2}}[0-9]*'.format(symbol=SYMBOLS) MESSAGES = r'(::)(\s*)({symbol})'.format(symbol=SYMBOLS) @@ -131,6 +131,9 @@ def unicode(index, token, value): else: new_token = MToken.UNKNOWN return index, new_token, value + elif token is MToken.SYMBOL and value in mma.UNICODE_SYSTEM_SYMBOLS: + new_token = MToken.BUILTIN + return index, new_token, value else: return index, token, value diff --git a/setup.py b/setup.py index ece1df8..0efe7e2 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ setup( name='pygments-mathematica', - version='0.4.1', + version='0.4.2', description='Mathematica/Wolfram Language Lexer for Pygments', long_description=__doc__, author='rsmenon', diff --git a/tests/test_lexer.py b/tests/test_lexer.py index 34f7a9f..f33930a 100644 --- a/tests/test_lexer.py +++ b/tests/test_lexer.py @@ -397,4 +397,28 @@ def test_string_closing_quote_on_newline(self): (MToken.STRING, '"'), (MToken.SYMBOL, 'abc'), ] - self.verify(code, expected) \ No newline at end of file + self.verify(code, expected) + + def test_unicode_greek(self): + code = [ + 'varλ1a', + 'Δ', + 'f[Δx_List] := Δx', + 'a∂_', + ] + expected = [ + [(MToken.SYMBOL, 'varλ1a')], + [(MToken.SYMBOL, 'Δ')], + [ + (MToken.SYMBOL, 'f'), + (MToken.GROUP, '['), + (MToken.PATTERN, 'Δx_List'), + (MToken.GROUP, ']'), + (MToken.WHITESPACE, ' '), + (MToken.OPERATOR, ':='), + (MToken.WHITESPACE, ' '), + (MToken.SYMBOL, 'Δx'), + ], + [(MToken.PATTERN, 'a∂_')], + ] + self.verify_all(code, expected) \ No newline at end of file