Skip to content

Commit 3288ec9

Browse files
authored
Feature/unicode support (#237)
* Added tests as module * Added unicode support to symbolic expressions * Added lowercase lambda * Switched unicode parsing to be separate to elementary functions * Implemented unicode parsing for physical quantities * Revert implementation * Implemented unicode parsing for physical quantities * Refactored preproccess * Switched from theta to degree symbol * Added other unicode variants of greek letters * Added variant unicode characters to physical quantities
1 parent 68aa5b3 commit 3288ec9

File tree

7 files changed

+286
-87
lines changed

7 files changed

+286
-87
lines changed

app/context/physical_quantity.py

Lines changed: 93 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -476,73 +476,106 @@ def feedback_procedure_generator(parameters_dict):
476476
graphs.update({label: graph})
477477
return graphs
478478

479-
480-
def expression_preprocess(name, expr, parameters):
481-
if parameters.get("strictness", "natural") == "legacy":
482-
prefix_data = {(p[0], p[1], tuple(), p[3]) for p in set_of_SI_prefixes}
483-
prefixes = []
484-
for prefix in prefix_data:
485-
prefixes = prefixes+[prefix[0]] + list(prefix[-1])
486-
prefix_short_forms = [prefix[1] for prefix in prefix_data]
487-
unit_data = set_of_SI_base_unit_dimensions \
488-
| set_of_derived_SI_units_in_SI_base_units \
489-
| set_of_common_units_in_SI \
490-
| set_of_very_common_units_in_SI \
491-
| set_of_imperial_units
492-
unit_long_forms = prefixes
493-
for unit in unit_data:
494-
unit_long_forms = unit_long_forms+[unit[0]] + list(unit[-2]) + list(unit[-1])
495-
unit_long_forms = "("+"|".join(unit_long_forms)+")"
496-
# Rewrite any expression on the form "*UNIT" (but not "**UNIT") as " UNIT"
497-
# Example: "newton*metre" ---> "newton metre"
498-
search_string = r"(?<!\*)\* *"+unit_long_forms
479+
def preprocess_legacy(expr, parameters):
480+
prefix_data = {(p[0], p[1], tuple(), p[3]) for p in set_of_SI_prefixes}
481+
prefixes = []
482+
for prefix in prefix_data:
483+
prefixes = prefixes + [prefix[0]] + list(prefix[-1])
484+
prefix_short_forms = [prefix[1] for prefix in prefix_data]
485+
unit_data = set_of_SI_base_unit_dimensions \
486+
| set_of_derived_SI_units_in_SI_base_units \
487+
| set_of_common_units_in_SI \
488+
| set_of_very_common_units_in_SI \
489+
| set_of_imperial_units
490+
unit_long_forms = prefixes
491+
for unit in unit_data:
492+
unit_long_forms = unit_long_forms + [unit[0]] + list(unit[-2]) + list(unit[-1])
493+
unit_long_forms = "(" + "|".join(unit_long_forms) + ")"
494+
# Rewrite any expression on the form "*UNIT" (but not "**UNIT") as " UNIT"
495+
# Example: "newton*metre" ---> "newton metre"
496+
search_string = r"(?<!\*)\* *" + unit_long_forms
497+
match_content = re.search(search_string, expr[1:])
498+
while match_content is not None:
499+
expr = expr[0:match_content.span()[0] + 1] + match_content.group().replace("*", " ") + expr[
500+
match_content.span()[
501+
1] + 1:]
499502
match_content = re.search(search_string, expr[1:])
500-
while match_content is not None:
501-
expr = expr[0:match_content.span()[0]+1]+match_content.group().replace("*", " ")+expr[match_content.span()[1]+1:]
502-
match_content = re.search(search_string, expr[1:])
503-
prefixes = "("+"|".join(prefixes)+")"
504-
# Rewrite any expression on the form "PREFIX UNIT" as "PREFIXUNIT"
505-
# Example: "kilo metre" ---> "kilometre"
506-
search_string = prefixes+" "+unit_long_forms
503+
prefixes = "(" + "|".join(prefixes) + ")"
504+
# Rewrite any expression on the form "PREFIX UNIT" as "PREFIXUNIT"
505+
# Example: "kilo metre" ---> "kilometre"
506+
search_string = prefixes + " " + unit_long_forms
507+
match_content = re.search(search_string, expr)
508+
while match_content is not None:
509+
expr = expr[0:match_content.span()[0]] + " " + "".join(match_content.group().split()) + expr[
510+
match_content.span()[
511+
1]:]
507512
match_content = re.search(search_string, expr)
508-
while match_content is not None:
509-
expr = expr[0:match_content.span()[0]]+" "+"".join(match_content.group().split())+expr[match_content.span()[1]:]
510-
match_content = re.search(search_string, expr)
511-
unit_short_forms = [u[1] for u in unit_data]
512-
short_forms = "("+"|".join(list(set(prefix_short_forms+unit_short_forms)))+")"
513-
# Add space before short forms of prefixes or unit names if they are preceded by numbers or multiplication
514-
# Example: "100Pa" ---> "100 Pa"
515-
search_string = r"[0-9\*\(\)]"+short_forms
513+
unit_short_forms = [u[1] for u in unit_data]
514+
short_forms = "(" + "|".join(list(set(prefix_short_forms + unit_short_forms))) + ")"
515+
# Add space before short forms of prefixes or unit names if they are preceded by numbers or multiplication
516+
# Example: "100Pa" ---> "100 Pa"
517+
search_string = r"[0-9\*\(\)]" + short_forms
518+
match_content = re.search(search_string, expr)
519+
while match_content is not None:
520+
expr = expr[0:match_content.span()[0] + 1] + " " + expr[match_content.span()[0] + 1:]
516521
match_content = re.search(search_string, expr)
517-
while match_content is not None:
518-
expr = expr[0:match_content.span()[0]+1]+" "+expr[match_content.span()[0]+1:]
519-
match_content = re.search(search_string, expr)
520-
# Remove space after prefix short forms if they are preceded by numbers, multiplication or space
521-
# Example: "100 m Pa" ---> "100 mPa"
522-
prefix_short_forms = "("+"|".join(prefix_short_forms)+")"
523-
search_string = r"[0-9\*\(\) ]"+prefix_short_forms+" "
522+
# Remove space after prefix short forms if they are preceded by numbers, multiplication or space
523+
# Example: "100 m Pa" ---> "100 mPa"
524+
prefix_short_forms = "(" + "|".join(prefix_short_forms) + ")"
525+
search_string = r"[0-9\*\(\) ]" + prefix_short_forms + " "
526+
match_content = re.search(search_string, expr)
527+
while match_content is not None:
528+
expr = expr[0:match_content.span()[0] + 1] + match_content.group()[0:-1] + expr[match_content.span()[1]:]
524529
match_content = re.search(search_string, expr)
525-
while match_content is not None:
526-
expr = expr[0:match_content.span()[0]+1]+match_content.group()[0:-1]+expr[match_content.span()[1]:]
527-
match_content = re.search(search_string, expr)
528-
# Remove multiplication and space after prefix short forms if they are preceded by numbers, multiplication or space
529-
# Example: "100 m* Pa" ---> "100 mPa"
530-
search_string = r"[0-9\*\(\) ]"+prefix_short_forms+"\* "
530+
# Remove multiplication and space after prefix short forms if they are preceded by numbers, multiplication or space
531+
# Example: "100 m* Pa" ---> "100 mPa"
532+
search_string = r"[0-9\*\(\) ]" + prefix_short_forms + "\* "
533+
match_content = re.search(search_string, expr)
534+
while match_content is not None:
535+
expr = expr[0:match_content.span()[0] + 1] + match_content.group()[0:-2] + expr[match_content.span()[1]:]
531536
match_content = re.search(search_string, expr)
532-
while match_content is not None:
533-
expr = expr[0:match_content.span()[0]+1]+match_content.group()[0:-2]+expr[match_content.span()[1]:]
534-
match_content = re.search(search_string, expr)
535-
# Replace multiplication followed by space before unit short forms with only spaces if they are preceded by numbers or space
536-
# Example: "100* Pa" ---> "100 Pa"
537-
unit_short_forms = "("+"|".join(unit_short_forms)+")"
538-
search_string = r"[0-9\(\) ]\* "+unit_short_forms
537+
# Replace multiplication followed by space before unit short forms with only spaces if they are preceded by numbers or space
538+
# Example: "100* Pa" ---> "100 Pa"
539+
unit_short_forms = "(" + "|".join(unit_short_forms) + ")"
540+
search_string = r"[0-9\(\) ]\* " + unit_short_forms
541+
match_content = re.search(search_string, expr)
542+
while match_content is not None:
543+
expr = expr[0:match_content.span()[0]] + match_content.group().replace("*", " ") + expr[
544+
match_content.span()[1]:]
539545
match_content = re.search(search_string, expr)
540-
while match_content is not None:
541-
expr = expr[0:match_content.span()[0]]+match_content.group().replace("*", " ")+expr[match_content.span()[1]:]
542-
match_content = re.search(search_string, expr)
543546

544-
success = True
545-
return success, expr, None
547+
return expr
548+
549+
def transform_prefixes_to_standard(expr):
550+
"""
551+
Transform ONLY alternative prefix spellings to standard prefix names.
552+
Ensure there's exactly one space after the prefix before the unit.
553+
Works for both attached (e.g. 'km') and spaced (e.g. 'k m') forms.
554+
"""
555+
556+
for prefix_name, symbol, power, alternatives in set_of_SI_prefixes:
557+
for alt in alternatives:
558+
if not alt:
559+
continue
560+
561+
# Match the alternative prefix either attached to or followed by spaces before a unit
562+
# Examples matched: "km", "k m", "microsecond", "micro second"
563+
pattern = rf'(?<!\w){re.escape(alt)}\s*(?=[A-Za-zµΩ])'
564+
expr = re.sub(pattern, prefix_name + ' ', expr)
565+
566+
# Normalize spacing (no multiple spaces)
567+
expr = re.sub(r'\s{2,}', ' ', expr).strip()
568+
569+
return expr
570+
571+
def expression_preprocess(name, expr, parameters):
572+
if parameters.get("strictness", "natural") == "legacy":
573+
expr = preprocess_legacy(expr, parameters)
574+
return True, expr, None
575+
576+
expr = transform_prefixes_to_standard(expr)
577+
578+
return True, expr, None
546579

547580

548581
def feedback_string_generator(tags, graph, parameters_dict):

app/tests/__init__.py

Whitespace-only changes.

app/tests/physical_quantity_evaluation_tests.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,5 +374,24 @@ def test_answer_zero_value(self):
374374
result = evaluation_function(res, ans, params, include_test_data=True)
375375
assert result["is_correct"] is False
376376

377+
@pytest.mark.parametrize(
378+
"ans,res",
379+
[
380+
("10 ohm", "10 Ω"),
381+
("10 micro A", "10 μA"),
382+
("10 micro A", "10 μ A"),
383+
("30 degree", "30 °"),
384+
]
385+
)
386+
def test_greek_letter_units(self, ans, res):
387+
params = {
388+
'strict_syntax': False,
389+
'physical_quantity': True,
390+
'elementary_functions': True,
391+
}
392+
result = evaluation_function(res, ans, params)
393+
assert result["is_correct"] is True
394+
395+
377396
if __name__ == "__main__":
378397
pytest.main(['-xk not slow', "--no-header", os.path.abspath(__file__)])

app/tests/symbolic_evaluation_tests.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2009,5 +2009,89 @@ def test_abstract_integral(self):
20092009
result = evaluation_function(response, answer, params)
20102010
assert result["is_correct"] is True
20112011

2012+
@pytest.mark.parametrize("unicode_char,letter_name", [
2013+
("Α", "Alpha"), ("α", "alpha"), ("Β", "Beta"), ("β", "beta"),
2014+
("Γ", "Gamma"), ("γ", "gamma"), ("Δ", "Delta"), ("δ", "delta"),
2015+
("Ε", "Epsilon"), ("ε", "epsilon"), ("Ζ", "Zeta"), ("ζ", "zeta"),
2016+
("Η", "Eta"), ("η", "eta"), ("Θ", "Theta"), ("θ", "theta"),
2017+
("Ι", "Iota"), ("ι", "iota"), ("Κ", "Kappa"), ("κ", "kappa"),
2018+
("Λ", "Lambda"), ("λ", "lambda"),
2019+
("Μ", "Mu"), ("μ", "mu"), ("Ν", "Nu"), ("ν", "nu"),
2020+
("Ξ", "Xi"), ("ξ", "xi"), ("Ο", "Omicron"), ("ο", "omicron"),
2021+
("Π", "Pi"), ("π", "pi"), ("Ρ", "Rho"), ("ρ", "rho"),
2022+
("Σ", "Sigma"), ("σ", "sigma"), ("Τ", "Tau"), ("τ", "tau"),
2023+
("Υ", "Upsilon"), ("υ", "upsilon"), ("Φ", "Phi"), ("φ", "phi"),
2024+
("Χ", "Chi"), ("χ", "chi"), ("Ψ", "Psi"), ("ψ", "psi"),
2025+
("Ω", "Omega"), ("ω", "omega")
2026+
])
2027+
def test_greek_unicode_letters(self, unicode_char, letter_name):
2028+
response = unicode_char
2029+
answer = letter_name
2030+
params = {
2031+
"strict_syntax": False,
2032+
"elementary_functions": False,
2033+
}
2034+
result = evaluation_function(response, answer, params)
2035+
assert result["is_correct"] is True
2036+
2037+
@pytest.mark.parametrize("unicode_expr,letter_expr", [
2038+
# Basic expressions with common variables
2039+
("α + β", "alpha + beta"),
2040+
("2μ + 3", "2*mu + 3"),
2041+
("π*r^2", "pi*r^2"),
2042+
("θ/2", "theta/2"),
2043+
("σ^2", "sigma^2"),
2044+
2045+
# Chi vs X confusion tests (CRITICAL)
2046+
("χ + x", "chi + x"),
2047+
("Χ + X", "Chi + X"),
2048+
("χ*x", "chi*x"),
2049+
("x^2 + χ", "x^2 + chi"),
2050+
("χ^2 + x^2", "chi^2 + x^2"),
2051+
2052+
# Xi vs X confusion tests
2053+
("ξ + x", "xi + x"),
2054+
("Ξ*X", "Xi*X"),
2055+
2056+
# Rho vs P confusion tests
2057+
("ρ + p", "rho + p"),
2058+
("Ρ*P", "Rho*P"),
2059+
2060+
# Nu vs V confusion tests
2061+
("ν + v", "nu + v"),
2062+
("Ν*V", "Nu*V"),
2063+
2064+
# Omicron vs O confusion tests
2065+
("ο + o", "omicron + o"),
2066+
("Ο*O", "Omicron*O"),
2067+
2068+
# Multiple Greek letters with Latin variables
2069+
("α*x + β*y", "alpha*x + beta*y"),
2070+
("μ*σ^2 + ν", "mu*sigma^2 + nu"),
2071+
("γ*t + δ*s", "gamma*t + delta*s"),
2072+
("Λ*x + μ*y + ν*z", "Lambda*x + mu*y + nu*z"),
2073+
("λ*x + μ*y + ν*z", "lambda*x + mu*y + nu*z"),
2074+
2075+
# Complex expressions
2076+
("sin(θ) + cos(φ)", "sin(theta) + cos(phi)"),
2077+
("e^(iπ)", "e^(i*pi)"),
2078+
2079+
# Edge cases with similar-looking letters
2080+
("ω*t + φ", "omega*t + phi"),
2081+
("Ψ(x) + ψ(y)", "Psi(x) + psi(y)"),
2082+
("Δx/Δt", "Delta*x/Delta*t"),
2083+
("ε_0*μ_0", "epsilon_0*mu_0"),
2084+
])
2085+
def test_greek_letters_in_expressions(self, unicode_expr, letter_expr):
2086+
response = unicode_expr
2087+
answer = letter_expr
2088+
params = {
2089+
"strict_syntax": False,
2090+
"elementary_functions": False,
2091+
}
2092+
result = evaluation_function(response, answer, params)
2093+
assert result["is_correct"] is True
2094+
2095+
20122096
if __name__ == "__main__":
20132097
pytest.main(['-xk not slow', "--tb=line", '--durations=10', os.path.abspath(__file__)])

0 commit comments

Comments
 (0)