@@ -476,73 +476,106 @@ def feedback_procedure_generator(parameters_dict):
476476 graphs .update ({label : graph })
477477 return graphs
478478
479-
480- def expression_preprocess (name , expr , parameters ):
481- if parameters .get ("strictness" , "natural" ) == "legacy" :
482- prefix_data = {(p [0 ], p [1 ], tuple (), p [3 ]) for p in set_of_SI_prefixes }
483- prefixes = []
484- for prefix in prefix_data :
485- prefixes = prefixes + [prefix [0 ]] + list (prefix [- 1 ])
486- prefix_short_forms = [prefix [1 ] for prefix in prefix_data ]
487- unit_data = set_of_SI_base_unit_dimensions \
488- | set_of_derived_SI_units_in_SI_base_units \
489- | set_of_common_units_in_SI \
490- | set_of_very_common_units_in_SI \
491- | set_of_imperial_units
492- unit_long_forms = prefixes
493- for unit in unit_data :
494- unit_long_forms = unit_long_forms + [unit [0 ]] + list (unit [- 2 ]) + list (unit [- 1 ])
495- unit_long_forms = "(" + "|" .join (unit_long_forms )+ ")"
496- # Rewrite any expression on the form "*UNIT" (but not "**UNIT") as " UNIT"
497- # Example: "newton*metre" ---> "newton metre"
498- search_string = r"(?<!\*)\* *" + unit_long_forms
479+ def preprocess_legacy (expr , parameters ):
480+ prefix_data = {(p [0 ], p [1 ], tuple (), p [3 ]) for p in set_of_SI_prefixes }
481+ prefixes = []
482+ for prefix in prefix_data :
483+ prefixes = prefixes + [prefix [0 ]] + list (prefix [- 1 ])
484+ prefix_short_forms = [prefix [1 ] for prefix in prefix_data ]
485+ unit_data = set_of_SI_base_unit_dimensions \
486+ | set_of_derived_SI_units_in_SI_base_units \
487+ | set_of_common_units_in_SI \
488+ | set_of_very_common_units_in_SI \
489+ | set_of_imperial_units
490+ unit_long_forms = prefixes
491+ for unit in unit_data :
492+ unit_long_forms = unit_long_forms + [unit [0 ]] + list (unit [- 2 ]) + list (unit [- 1 ])
493+ unit_long_forms = "(" + "|" .join (unit_long_forms ) + ")"
494+ # Rewrite any expression on the form "*UNIT" (but not "**UNIT") as " UNIT"
495+ # Example: "newton*metre" ---> "newton metre"
496+ search_string = r"(?<!\*)\* *" + unit_long_forms
497+ match_content = re .search (search_string , expr [1 :])
498+ while match_content is not None :
499+ expr = expr [0 :match_content .span ()[0 ] + 1 ] + match_content .group ().replace ("*" , " " ) + expr [
500+ match_content .span ()[
501+ 1 ] + 1 :]
499502 match_content = re .search (search_string , expr [1 :])
500- while match_content is not None :
501- expr = expr [0 :match_content .span ()[0 ]+ 1 ]+ match_content .group ().replace ("*" , " " )+ expr [match_content .span ()[1 ]+ 1 :]
502- match_content = re .search (search_string , expr [1 :])
503- prefixes = "(" + "|" .join (prefixes )+ ")"
504- # Rewrite any expression on the form "PREFIX UNIT" as "PREFIXUNIT"
505- # Example: "kilo metre" ---> "kilometre"
506- search_string = prefixes + " " + unit_long_forms
503+ prefixes = "(" + "|" .join (prefixes ) + ")"
504+ # Rewrite any expression on the form "PREFIX UNIT" as "PREFIXUNIT"
505+ # Example: "kilo metre" ---> "kilometre"
506+ search_string = prefixes + " " + unit_long_forms
507+ match_content = re .search (search_string , expr )
508+ while match_content is not None :
509+ expr = expr [0 :match_content .span ()[0 ]] + " " + "" .join (match_content .group ().split ()) + expr [
510+ match_content .span ()[
511+ 1 ]:]
507512 match_content = re .search (search_string , expr )
508- while match_content is not None :
509- expr = expr [ 0 : match_content . span ()[ 0 ]] + " " + " " .join (match_content . group (). split ()) + expr [ match_content . span ()[ 1 ]:]
510- match_content = re . search ( search_string , expr )
511- unit_short_forms = [ u [ 1 ] for u in unit_data ]
512- short_forms = "(" + "|" . join ( list ( set ( prefix_short_forms + unit_short_forms ))) + ")"
513- # Add space before short forms of prefixes or unit names if they are preceded by numbers or multiplication
514- # Example: "100Pa" ---> "100 Pa"
515- search_string = r"[0-9\*\(\)]" + short_forms
513+ unit_short_forms = [ u [ 1 ] for u in unit_data ]
514+ short_forms = "(" + "| " .join (list ( set ( prefix_short_forms + unit_short_forms ))) + ")"
515+ # Add space before short forms of prefixes or unit names if they are preceded by numbers or multiplication
516+ # Example: "100Pa" ---> "100 Pa"
517+ search_string = r"[0-9\*\(\)]" + short_forms
518+ match_content = re . search ( search_string , expr )
519+ while match_content is not None :
520+ expr = expr [ 0 : match_content . span ()[ 0 ] + 1 ] + " " + expr [ match_content . span ()[ 0 ] + 1 :]
516521 match_content = re .search (search_string , expr )
517- while match_content is not None :
518- expr = expr [ 0 : match_content . span ()[ 0 ] + 1 ] + " " + expr [ match_content . span ()[ 0 ] + 1 :]
519- match_content = re . search ( search_string , expr )
520- # Remove space after prefix short forms if they are preceded by numbers, multiplication or space
521- # Example: "100 m Pa" ---> "100 mPa"
522- prefix_short_forms = "(" + "|" . join ( prefix_short_forms ) + ")"
523- search_string = r"[0-9\*\(\) ]" + prefix_short_forms + " "
522+ # Remove space after prefix short forms if they are preceded by numbers, multiplication or space
523+ # Example: "100 m Pa" ---> "100 mPa"
524+ prefix_short_forms = "(" + "|" . join ( prefix_short_forms ) + ")"
525+ search_string = r"[0-9\*\(\) ]" + prefix_short_forms + " "
526+ match_content = re . search ( search_string , expr )
527+ while match_content is not None :
528+ expr = expr [ 0 : match_content . span ()[ 0 ] + 1 ] + match_content . group ()[ 0 : - 1 ] + expr [ match_content . span ()[ 1 ]:]
524529 match_content = re .search (search_string , expr )
525- while match_content is not None :
526- expr = expr [ 0 : match_content . span ()[ 0 ] + 1 ] + match_content . group ()[ 0 : - 1 ] + expr [ match_content . span ()[ 1 ]:]
527- match_content = re . search ( search_string , expr )
528- # Remove multiplication and space after prefix short forms if they are preceded by numbers, multiplication or space
529- # Example: "100 m* Pa" ---> "100 mPa"
530- search_string = r"[0-9\*\(\) ]" + prefix_short_forms + "\* "
530+ # Remove multiplication and space after prefix short forms if they are preceded by numbers, multiplication or space
531+ # Example: "100 m* Pa" ---> "100 mPa"
532+ search_string = r"[0-9\*\(\) ]" + prefix_short_forms + "\* "
533+ match_content = re . search ( search_string , expr )
534+ while match_content is not None :
535+ expr = expr [ 0 : match_content . span ()[ 0 ] + 1 ] + match_content . group ()[ 0 : - 2 ] + expr [ match_content . span ()[ 1 ]:]
531536 match_content = re .search (search_string , expr )
532- while match_content is not None :
533- expr = expr [0 :match_content .span ()[0 ]+ 1 ]+ match_content .group ()[0 :- 2 ]+ expr [match_content .span ()[1 ]:]
534- match_content = re .search (search_string , expr )
535- # Replace multiplication followed by space before unit short forms with only spaces if they are preceded by numbers or space
536- # Example: "100* Pa" ---> "100 Pa"
537- unit_short_forms = "(" + "|" .join (unit_short_forms )+ ")"
538- search_string = r"[0-9\(\) ]\* " + unit_short_forms
537+ # Replace multiplication followed by space before unit short forms with only spaces if they are preceded by numbers or space
538+ # Example: "100* Pa" ---> "100 Pa"
539+ unit_short_forms = "(" + "|" .join (unit_short_forms ) + ")"
540+ search_string = r"[0-9\(\) ]\* " + unit_short_forms
541+ match_content = re .search (search_string , expr )
542+ while match_content is not None :
543+ expr = expr [0 :match_content .span ()[0 ]] + match_content .group ().replace ("*" , " " ) + expr [
544+ match_content .span ()[1 ]:]
539545 match_content = re .search (search_string , expr )
540- while match_content is not None :
541- expr = expr [0 :match_content .span ()[0 ]]+ match_content .group ().replace ("*" , " " )+ expr [match_content .span ()[1 ]:]
542- match_content = re .search (search_string , expr )
543546
544- success = True
545- return success , expr , None
547+ return expr
548+
549+ def transform_prefixes_to_standard (expr ):
550+ """
551+ Transform ONLY alternative prefix spellings to standard prefix names.
552+ Ensure there's exactly one space after the prefix before the unit.
553+ Works for both attached (e.g. 'km') and spaced (e.g. 'k m') forms.
554+ """
555+
556+ for prefix_name , symbol , power , alternatives in set_of_SI_prefixes :
557+ for alt in alternatives :
558+ if not alt :
559+ continue
560+
561+ # Match the alternative prefix either attached to or followed by spaces before a unit
562+ # Examples matched: "km", "k m", "microsecond", "micro second"
563+ pattern = rf'(?<!\w){ re .escape (alt )} \s*(?=[A-Za-zµΩ])'
564+ expr = re .sub (pattern , prefix_name + ' ' , expr )
565+
566+ # Normalize spacing (no multiple spaces)
567+ expr = re .sub (r'\s{2,}' , ' ' , expr ).strip ()
568+
569+ return expr
570+
571+ def expression_preprocess (name , expr , parameters ):
572+ if parameters .get ("strictness" , "natural" ) == "legacy" :
573+ expr = preprocess_legacy (expr , parameters )
574+ return True , expr , None
575+
576+ expr = transform_prefixes_to_standard (expr )
577+
578+ return True , expr , None
546579
547580
548581def feedback_string_generator (tags , graph , parameters_dict ):
0 commit comments