From c76b457a9a8bad29373332d449cfc811b22ff207 Mon Sep 17 00:00:00 2001 From: manuelbb-upb Date: Sat, 4 Oct 2025 15:50:06 +0200 Subject: [PATCH 1/6] Try to infer/undo binary operator spacing This is one of the issues in https://github.com/Kolaru/MathTeXEngine.jl/issues/61 --- src/parser/parser.jl | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/parser/parser.jl b/src/parser/parser.jl index ed13f3a..a4f648c 100644 --- a/src/parser/parser.jl +++ b/src/parser/parser.jl @@ -222,6 +222,12 @@ function texparse(tex ; root = TeXExpr(:lines), showdebug = false) else expr = canonical_expr(c) end + if head(expr) == :spaced && inside_math && !isempty(stack) + top = first(stack) + if _is_ordinary(top) + expr = only(expr.args) + end + end push!(stack, expr) push_down!(stack) end @@ -245,3 +251,24 @@ function texparse(tex ; root = TeXExpr(:lines), showdebug = false) return lines end end + +function _is_ordinary(top) + if head(top) in (:punctuation, :space) + return true + elseif head(top) in (:function, :integral, :underover) + return true + elseif head(top) in (:superscript, :subscript) + return true + elseif head(top) == :delimiter + if length(top.args)==1 && top.args[1] in ('(', '[', '<') + return true + end + elseif head(top) in (:inline_math, :group, :delimited) + if isempty(top.args) + return true + else + return _is_ordinary(last(top.args)) + end + end + return false +end From 39d18b48d9955ab4497eb3ec1c659fb3f9655c4f Mon Sep 17 00:00:00 2001 From: manuelbb-upb Date: Mon, 6 Oct 2025 13:26:56 +0200 Subject: [PATCH 2/6] also handle spaced commands --- src/MathTeXEngine.jl | 2 ++ src/parser/parser.jl | 86 +++++++++++++++++++++++++++----------------- 2 files changed, 56 insertions(+), 32 deletions(-) diff --git a/src/MathTeXEngine.jl b/src/MathTeXEngine.jl index ffb03f3..d660830 100644 --- a/src/MathTeXEngine.jl +++ b/src/MathTeXEngine.jl @@ -28,6 +28,8 @@ export glyph_index # Reexport from LaTeXStrings export @L_str +const UNSPACE_BINARY_OPERATORS_HEURISTIC = Ref(true) + include("parser/tokenizer.jl") include("parser/texexpr.jl") include("parser/commands_data.jl") diff --git a/src/parser/parser.jl b/src/parser/parser.jl index a4f648c..42bc780 100644 --- a/src/parser/parser.jl +++ b/src/parser/parser.jl @@ -59,7 +59,9 @@ end show_stack(stack) = show_stack(stdout, stack) -function push_down!(stack) +function push_down!(stack, inside_math=false) + global UNSPACE_BINARY_OPERATORS_HEURISTIC + top = pop!(stack) if head(top) == :group # Replace empty groups by 0 spaces @@ -70,6 +72,24 @@ function push_down!(stack) top = only(top.args) end end + if UNSPACE_BINARY_OPERATORS_HEURISTIC[] && inside_math + if head(top) == :spaced + # for `:spaced` expressions (binary operators mainly) inspect what comes before + undo_spacing = false + if isempty(stack) + undo_spacing = true + else + prev = first(stack) + if !(_is_plausible_left_arg(prev)) + # if prior element is not argument for a binary operator, then remove symmetric spacing + undo_spacing = true + end + end + if undo_spacing + top = only(top.args) + end + end + end push!(first(stack), top) if head(first(stack)) in [:subscript, :superscript] @@ -79,10 +99,10 @@ function push_down!(stack) push!(first(stack).args, decorated) end - conclude_command!!(stack) + conclude_command!!(stack, inside_math) end -function conclude_command!!(stack) +function conclude_command!!(stack, inside_math=false) com = first(stack) head(com) != :command && return false nargs = length(com.args) - 1 @@ -90,7 +110,7 @@ function conclude_command!!(stack) if required_args(first(com.args)) == nargs pop!(stack) push!(stack, command_expr(com.args[1], com.args[2:end])) - push_down!(stack) + push_down!(stack, inside_math) end end @@ -145,7 +165,7 @@ function texparse(tex ; root = TeXExpr(:lines), showdebug = false) if token == dollar if head(first(stack)) == :inline_math inside_math = false - push_down!(stack) + push_down!(stack, inside_math) else inside_math = true push!(stack, TeXExpr(:inline_math)) @@ -155,7 +175,7 @@ function texparse(tex ; root = TeXExpr(:lines), showdebug = false) throw(TeXParseError("unexpected new line", stack, length(tex), tex)) end - push_down!(stack) + push_down!(stack, inside_math) push!(stack, TeXExpr(:line)) elseif token == lcurly push!(stack, TeXExpr(:group)) @@ -163,7 +183,7 @@ function texparse(tex ; root = TeXExpr(:lines), showdebug = false) if head(first(stack)) != :group throw(TeXParseError("missing closing '}'", stack, pos, tex)) end - push_down!(stack) + push_down!(stack, inside_math) elseif token == left push!(stack, TeXExpr(:delimited, delimiter(raw"\left", tex[pos:pos+len-1]))) elseif token == right @@ -183,7 +203,7 @@ function texparse(tex ; root = TeXExpr(:lines), showdebug = false) elseif token == command com_str = tex[pos:pos+len-1] push!(stack, TeXExpr(:command, [com_str])) - conclude_command!!(stack) + conclude_command!!(stack, inside_math) elseif token == underscore || token == caret || token == primes dec = (token == underscore) ? :subscript : :superscript @@ -222,14 +242,9 @@ function texparse(tex ; root = TeXExpr(:lines), showdebug = false) else expr = canonical_expr(c) end - if head(expr) == :spaced && inside_math && !isempty(stack) - top = first(stack) - if _is_ordinary(top) - expr = only(expr.args) - end - end + push!(stack, expr) - push_down!(stack) + push_down!(stack, inside_math) end catch err throw(TeXParseError("unexpected error", stack, pos, tex)) @@ -237,7 +252,7 @@ function texparse(tex ; root = TeXExpr(:lines), showdebug = false) end if head(first(stack)) == :line - push_down!(stack) + push_down!(stack, inside_math) end if length(stack) > 1 @@ -252,23 +267,30 @@ function texparse(tex ; root = TeXExpr(:lines), showdebug = false) end end -function _is_ordinary(top) - if head(top) in (:punctuation, :space) - return true - elseif head(top) in (:function, :integral, :underover) - return true - elseif head(top) in (:superscript, :subscript) - return true - elseif head(top) == :delimiter - if length(top.args)==1 && top.args[1] in ('(', '[', '<') - return true +function _is_plausible_left_arg(texpr) + if head(texpr) in (:punctuation, :space) + ## punctuation or explicit spacing likely does not precede symbol for binary op + return false + elseif head(texpr) in (:function, :integral, :underover) + ## function without parentheses `\sin +1` + ## integral sign `∫ -1` + ## other unary symbols `∑ ± 1` + return false + elseif head(texpr) in (:superscript, :subscript) + ## sub- or superscripts without parenthesis + return false + elseif head(texpr) == :delimiter + ## beginning of parentheses group + if length(texpr.args)==1 && texpr.args[1] in ('(', '[', '<') + return false end - elseif head(top) in (:inline_math, :group, :delimited) - if isempty(top.args) - return true + elseif head(texpr) in (:inline_math, :group, :delimited) + ## look at last element within group expressions + if isempty(texpr.args) + return false # consistent with TeXExpr(:space, 0) in case of :group else - return _is_ordinary(last(top.args)) + return _is_plausible_left_arg(last(texpr.args)) end end - return false -end + return true +end \ No newline at end of file From 6a80fc80ef04efb6a7325a16ffd8f752057eb909 Mon Sep 17 00:00:00 2001 From: manuelbb-upb Date: Mon, 6 Oct 2025 13:49:49 +0200 Subject: [PATCH 3/6] add some tests --- test/parser.jl | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/test/parser.jl b/test/parser.jl index 6370ec3..fb663df 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -187,6 +187,91 @@ end (:char, 'd'))) end + @testset "Unspaced symbol" begin + MathTeXEngine.UNSPACE_BINARY_OPERATORS_HEURISTIC[] = true + ## keep space in binary operations + test_parse(raw"$2-1$", ( + :inline_math, + (:digit, '2'), + (:spaced, (:symbol, '−')), + (:digit, '1') + )) + ## but remove if used as unary symbol + test_parse(raw"$-1$", ( + :inline_math, + (:symbol, '−'), + (:digit, '1') + )) + ## same for spaced commands + test_parse(raw"$2\pm1$", ( + :inline_math, + (:digit, '2'), + (:spaced, (:symbol, '±')), + (:digit, '1') + )) + test_parse(raw"$\pm1$", ( + :inline_math, + (:symbol, '±'), + (:digit, '1') + )) + + ## within parentheses: + test_parse(raw"$(2-1)$", ( + :inline_math, + (:delimiter, "("), + (:digit, '2'), + (:spaced, (:symbol, '−')), + (:digit, '1'), + (:delimiter, ")"), + )) + test_parse(raw"$(-1)$", ( + :inline_math, + (:delimiter, "("), + (:symbol, '−'), + (:digit, '1'), + (:delimiter, ")"), + )) + ## zero space/empty group removes binary spacing + test_parse(raw"$(2{}-1)$", ( + :inline_math, + (:delimiter, "("), + (:digit, '2'), + (:space, 0.0), + (:symbol, '−'), + (:digit, '1'), + (:delimiter, ")"), + )) + ## exponents + test_parse(raw"$a^+$", ( + :inline_math, + (:decorated, + (:char, 'a'), + nothing, + (:symbol, '+') + ) + )) + test_parse(raw"$a^{+}$", ( + :inline_math, + (:decorated, + (:char, 'a'), + nothing, + (:symbol, '+') + ) + )) + test_parse(raw"$a^{1+2}$", ( + :inline_math, + (:decorated, + (:char, 'a'), + nothing, + (:group, + (:digit, '1'), + (:spaced, (:symbol, '+')), + (:digit, '2'), + ) + ) + )) + end + @testset "Subscript and superscript" begin @test texparse(raw"a^2_3") == texparse(raw"a_3^2") @test texparse(raw"^7_b") == texparse(raw"{}^7_b") From f7f6cc1cd3280deb6c2e92170d77a9aa08b73b56 Mon Sep 17 00:00:00 2001 From: manuelbb-upb Date: Mon, 6 Oct 2025 14:56:33 +0200 Subject: [PATCH 4/6] move UNSPACE_BINARY_OPERATORS_HEURISTIC to `parser.jl` --- src/MathTeXEngine.jl | 2 -- src/parser/parser.jl | 4 ++++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/MathTeXEngine.jl b/src/MathTeXEngine.jl index d660830..ffb03f3 100644 --- a/src/MathTeXEngine.jl +++ b/src/MathTeXEngine.jl @@ -28,8 +28,6 @@ export glyph_index # Reexport from LaTeXStrings export @L_str -const UNSPACE_BINARY_OPERATORS_HEURISTIC = Ref(true) - include("parser/tokenizer.jl") include("parser/texexpr.jl") include("parser/commands_data.jl") diff --git a/src/parser/parser.jl b/src/parser/parser.jl index 42bc780..e28102e 100644 --- a/src/parser/parser.jl +++ b/src/parser/parser.jl @@ -1,3 +1,7 @@ +## flag to indicate whether or not to (try to) remove space for +## binary operator symbols if they are used as unary prefixes +const UNSPACE_BINARY_OPERATORS_HEURISTIC = Ref(true) + struct TeXParseError <: Exception msg::String stack::Stack From ec3b7e2b50907921e98f6010028a5061fb118a3a Mon Sep 17 00:00:00 2001 From: manuelbb-upb Date: Mon, 20 Oct 2025 15:41:36 +0200 Subject: [PATCH 5/6] move flag to main module file --- src/MathTeXEngine.jl | 7 +++++++ src/parser/parser.jl | 4 ---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/MathTeXEngine.jl b/src/MathTeXEngine.jl index ffb03f3..91da575 100644 --- a/src/MathTeXEngine.jl +++ b/src/MathTeXEngine.jl @@ -28,6 +28,13 @@ export glyph_index # Reexport from LaTeXStrings export @L_str +# Global settings flags + +## parser flag to indicate whether or not to (try to) remove space for +## binary operator symbols if they are used as unary prefixes +## (only in math mode) +const UNSPACE_BINARY_OPERATORS_HEURISTIC = Ref(true) + include("parser/tokenizer.jl") include("parser/texexpr.jl") include("parser/commands_data.jl") diff --git a/src/parser/parser.jl b/src/parser/parser.jl index e28102e..42bc780 100644 --- a/src/parser/parser.jl +++ b/src/parser/parser.jl @@ -1,7 +1,3 @@ -## flag to indicate whether or not to (try to) remove space for -## binary operator symbols if they are used as unary prefixes -const UNSPACE_BINARY_OPERATORS_HEURISTIC = Ref(true) - struct TeXParseError <: Exception msg::String stack::Stack From 92163215190e76c7533394a97f2310270af800f8 Mon Sep 17 00:00:00 2001 From: manuelbb-upb Date: Mon, 20 Oct 2025 15:45:34 +0200 Subject: [PATCH 6/6] add explanation to README --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index a879ff5..b88f0bb 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,15 @@ The engine should support every construction the parser does (see below). Currently the only font set supported is Computer Modern. +# Advanced Customization + +There is a reference to a boolean flag for enabling/disabling a heuristic that is meant to remove spacing from binary operator symbols if they are +used as unary prefixes instead: +```julia +MathTeXEngine.UNSPACE_BINARY_OPERATORS_HEURISTIC[] = true # default +``` +If the flag is set to `true`, `L"+1"` should not have additional spacing around `+` while `L"1+2"` has. + ## Engine examples ### Basic examples