coderxio
diff --git a/‎parsers/dose.py‎
Lines changed: 45 additions & 2 deletions b/‎parsers/dose.py‎
Lines changed: 45 additions & 2 deletions
diff --git a/‎parsers/duration.py‎
Lines changed: 2 additions & 2 deletions b/‎parsers/duration.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎parsers/frequency.py‎
Lines changed: 13 additions & 14 deletions b/‎parsers/frequency.py‎
Lines changed: 13 additions & 14 deletions
diff --git a/‎parsers/indication.py‎
Lines changed: 2 additions & 2 deletions b/‎parsers/indication.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎parsers/route.py‎
Lines changed: 44 additions & 6 deletions b/‎parsers/route.py‎
Lines changed: 44 additions & 6 deletions
@@ -11,7 +11,7 @@ def normalize_pattern(self):
             # and join them with a | character
             # and add them to the dose_patterns array
             dose_patterns.append(r'|'.join(p))        
-        pattern = re.compile(r'(?:(?P<dose_negation>' + RE_DOSE_STRENGTH_NEGATION + r')\s?)?(?P<dose>' + RE_RANGE + r')\s?(?P<dose_unit>' + r'|'.join(dose_patterns) + r')', flags = re.I)
+        pattern = re.compile(r'(?:(?P<dose_negation>' + RE_DOSE_STRENGTH_NEGATION + r')\s?)?(?P<dose>' + RE_RANGE + r')\s?(?P<dose_unit>' + r'|'.join(dose_patterns) + r')(?!\s?\/\s?act)', flags = re.I)
         return pattern
     def normalize_match(self, match):
         # alternatively, if negation text is found before the dose, don't generate a match
@@ -21,6 +21,16 @@ def normalize_match(self, match):
         dose_range = split_range(match.group('dose'))
         dose, dose_max = dose_range
         dose_unit = get_normalized(DOSE_UNITS, match.group('dose_unit'))
+        # convert teaspoon and tablespoon to mL
+        if (dose_unit in ['teaspoon', 'tablespoon']):
+            multipliers = {
+                'teaspoon': 5,
+                'tablespoon': 15
+            }
+            multiplier = multipliers[dose_unit]
+            dose = dose * multiplier if dose else dose
+            dose_max = dose_max * multiplier if dose_max else dose_max
+            dose_unit = 'mL'
         dose_text_start, dose_text_end = match.span()
         dose_text = match[0]
         dose_readable = self.get_readable(dose=dose, dose_max=dose_max, dose_unit=dose_unit)
@@ -58,7 +68,7 @@ def normalize_pattern(self):
             # and join them with a | character
             # and add them to the dose_patterns array
             strength_unit_patterns.append(r'|'.join(p))        
-        pattern = re.compile(r'^(?:' + r'|'.join(method_patterns) + r')?\s?(?P<dose>' + RE_RANGE + r')(?!\d)(?!\s?(?:' + r'|'.join(strength_unit_patterns) + r'))', flags = re.I)
+        pattern = re.compile(r'^(?:' + r'|'.join(method_patterns) + r')?\s?(?P<dose>' + RE_RANGE + r')(?!(?:\s)?\d)(?!(?:\s)?(?:times|x))(?!\s?(?:' + r'|'.join(strength_unit_patterns) + r'))', flags = re.I)
         return pattern
     def normalize_match(self, match):
         dose_range = split_range(match.group('dose'))
@@ -87,11 +97,44 @@ def normalize_match(self, match):
         dose_readable = self.get_readable(dose_unit=dose_unit)
         return self.generate_match({'dose_unit': dose_unit, 'dose_text_start': dose_text_start, 'dose_text_end': dose_text_end, 'dose_text': dose_text, 'dose_readable': dose_readable})
 
+class ApplyDoseUnitParser(DoseParser):
+    def normalize_pattern(self):
+        pattern = re.compile(r'apply', flags = re.I)
+        return pattern
+    def normalize_match(self, match):
+        dose = 1
+        dose_unit = 'application'
+        dose_text_start, dose_text_end = match.span()
+        dose_text = match[0]
+        dose_readable = self.get_readable(dose_unit=dose_unit)
+        return self.generate_match({'dose': dose, 'dose_unit': dose_unit, 'dose_text_start': dose_text_start, 'dose_text_end': dose_text_end, 'dose_text': dose_text, 'dose_readable': dose_readable})
+
+class EachDoseUnitParser(DoseParser):
+    def normalize_pattern(self):
+        dose_patterns = []
+        for n, p in MISCELLANEOUS_ROUTES.items():
+            # add the name of the pattern to the list of matched patterns
+            p.append(n)
+            # and join them with a | character
+            # and add them to the dose_patterns array
+            dose_patterns.append(r'|'.join(p))
+        pattern = re.compile(r'(' + r'|'.join(dose_patterns) + r')', flags = re.I)
+        return pattern
+    def normalize_match(self, match):
+        dose = 1
+        dose_unit = 'each'
+        dose_text_start, dose_text_end = match.span()
+        dose_text = match[0]
+        dose_readable = self.get_readable(dose_unit=dose_unit)
+        return self.generate_match({'dose': dose, 'dose_unit': dose_unit, 'dose_text_start': dose_text_start, 'dose_text_end': dose_text_end, 'dose_text': dose_text, 'dose_readable': dose_readable})
+
 
 parsers = [
     DoseParser(),
     DoseOnlyParser(),
     DoseUnitOnlyParser(),
+    ApplyDoseUnitParser(),
+    EachDoseUnitParser(),
 ]
 
 #print(DoseParser().parse('take one capsule prn nausea for 5 days'))
 
@@ -20,7 +20,7 @@ def get_readable(self, duration=None, duration_max=None, duration_unit=None):
 
 # for x [more] days
 class DurationParserForXDays(DurationParser):
-    pattern = r'(?:for|x)\s*(?P<duration>' + RE_RANGE + r')\s*(?:more)?\s?(?P<duration_unit>year|month|week|day|yr\b|mon\b|wk\b|d\b)'
+    pattern = r'(?:for|\bf|x)\s*(?P<duration>' + RE_RANGE + r')\s*(?:more)?\s?(?P<duration_unit>year(?:s)|month(?:s)|week(?:s)|day(?:s)|yr(?:s)\b|mon(?:s)\b|wk(?:s)?|d\b|w\b)'
     def normalize_match(self, match):
         duration_range = split_range(match.group('duration'))
         duration_text_start, duration_text_end = match.span()
@@ -33,7 +33,7 @@ def normalize_match(self, match):
 
 # up to x days
 class DurationParserUpToXDays(DurationParser):
-    pattern = r'(?:for )?up to (?P<duration>' + RE_RANGE + r')\s?(?P<duration_unit>year|month|week|day|yr\b|mon\b|wk\b|d\b)'
+    pattern = r'(?:for )?up to (?P<duration>' + RE_RANGE + r')\s?(?P<duration_unit>year(?:s)|month(?:s)|week(?:s)|day(?:s)|yr(?:s)\b|mon(?:s)\b|wk(?:s)|d\b)'
     def normalize_match(self, match):
         duration_range = split_range(match.group('duration'))
         duration_text_start, duration_text_end = match.span()
 
@@ -72,7 +72,7 @@ def normalize_match(self, match):
 # frequency = a (1 if once, 2 if twice), period = 1, periodUnit = b (normalize to d, wk, mo, yr)
 # NOTE: 'daily' won't match this pattern because it requires specific times *per* day
 class FrequencyXTimesPerDay(FrequencyParser):
-	pattern = r'(?P<frequency>' + RE_RANGE + r'\s*(?:time(?:s)?|x|nights|days)|once|twice)\s*(?:per|a|each|every|\/)\s*(?P<period_unit>day|week|month|year|d\b|w\b|mon|m\b|yr)'
+	pattern = r'(?P<frequency>' + RE_RANGE + r'\s?(?:time(?:s)?|x|nights|days)|once|twice)\s?(?:per|a|each|every|\/)\s?(?P<period_unit>day|week|wk\b|month|year|d\b|w\b|mon|m\b|yr)'
 	def normalize_match(self, match):
 		frequency = frequency_max = match.group('frequency')
 		if (frequency):
@@ -95,7 +95,7 @@ def normalize_match(self, match):
 # frequency = a[0], frequencyMax = a[1], period = 1, periodUnit = b (normalize to d, wk, mo, yr)
 # frequency = a (1 if once, 2 if twice, 1 if null), period = 1, periodUnit = b (normalize to d, wk, mo, yr)
 class FrequencyXTimesDaily(FrequencyParser):
-	pattern = r'(?:(?P<frequency>' + RE_RANGE + r'\s?(?:time(?:s)?|x)|once|twice)(?: \ba\b| per)?\s?)(?P<period_unit>day|\bd\b|daily|dialy|weekly|monthly|yearly|\bhs\b)'
+	pattern = r'(?:(?P<frequency>' + RE_RANGE + r'\s?(?:time(?:s)?|x)|once|twice)(?: \ba\b| per)?\s?)(?P<period_unit>day|d\b|daily|dialy|weekly|monthly|yearly|\bhs\b)'
 	def normalize_match(self, match):
 		frequency = frequency_max = match.group('frequency')
 		if (frequency):
@@ -112,11 +112,11 @@ def normalize_match(self, match):
 
 
 # daily | nightly | weekly | monthly | yearly
-# NOTE: this is where 'daily' matches
+# NOTE: this is where 'daily' matches 
 # frequency = a[0], frequencyMax = a[1], period = 1, periodUnit = b (normalize to d, wk, mo, yr)
 # frequency = a (1 if once, 2 if twice, 1 if null), period = 1, periodUnit = b (normalize to d, wk, mo, yr)
 class FrequencyDaily(FrequencyParser):
-	pattern = r'(?P<period_unit>day|\bd\b|daily|dialy|weekly|monthly|yearly|\bhs\b)'
+	pattern = r'(?P<period_unit>\bd\b|daily|dialy|weekly|monthly|yearly|\bhs\b)'
 	def normalize_match(self, match):
 		frequency = 1
 		period = 1
@@ -129,13 +129,13 @@ def normalize_match(self, match):
 
 
 # every | each | per | q
-# [other]
+# [other] 
 # day | week | month | morning | afternoon | evening | night | hs
 # TODO: combine with the qpm/qhs/qday/qdaily group above (not sure if this still applies)
 # frequency = 1, period = 1 (or 2 if a is not null), periodUnit = b (normalize to d, wk, mo), [when = b (normalize to MORN, AFT, EVE, etc]
 # NOTE: moved below FrequencyDaily because "per day" was taking precedence in max daily dose text
 class FrequencyEveryDay(FrequencyParser):
-	pattern = r'(?:every|each|q|per)\s?(?P<period>other\b|o\b)?\s*(?:day (?:in the|at)\s?)?(?P<period_unit>hour|day|week|month|morning|afternoon|evening at bedtime|bedtime|evening|night|hs\b|pm\b|am\b|d\b)'
+	pattern = r'(?:every|each|q|per|\ba)\s?(?P<period>other\b|o)?\s?(?:day (?:in the|at)\s?)?(?P<period_unit>hour|day|week|wk\b|month|morning(?! and)|afternoon|evening at bedtime|bedtime|evening|night|hs|pm|am|d\b)'
 	def normalize_match(self, match):
 		frequency = 1
 		period = 2 if match.group('period') else 1
@@ -151,7 +151,7 @@ def normalize_match(self, match):
 # Monday, Tuesday, Wednesday, and Friday
 # dayOfWeek = a
 class FrequencySpecificDayOfWeek(FrequencyParser):
-	pattern = r'(?:every|on|q)\s+(?P<day_of_week>(?:(?:\s*(?:and|&|\+|,)\s*)*(?:' + RE_DAYS_OF_WEEK + '))+)'
+	pattern = r'(?:every|on|q)\s?(?P<day_of_week>(?:(?:\s?(?:and|&|\+|,|\s)\s?)?(?:' + RE_DAYS_OF_WEEK + '))+)'
 	def normalize_match(self, match):
 		# TODO: normalize days of week to be comma or pipe delimited - tuesday and thursday -> tuesday|thursday or tuesday,thursday
 		day_of_week = match.group('day_of_week')
@@ -166,7 +166,7 @@ def normalize_match(self, match):
 # morning | evening | afternoon
 # frequency = 1, when = a
 class FrequencyInTheX(FrequencyParser):
-	pattern = r'in the\s*(morning|evening|afternoon)'
+	pattern = r'in the\s?(morning|evening|afternoon)'
 	def normalize_match(self, match):
 		frequency = 1
 		period = 1
@@ -197,7 +197,7 @@ def normalize_match(self, match):
 # one time only
 # count = 1
 class FrequencyOneTime(FrequencyParser):
-	pattern = r'(?:x\s?1\b(?!day| day|d\b| d\b|week| week|w\b| w\b|month| month|mon|m\b| m\b| mon\b)|(?:1|one) time(?: only)?(?! daily| per)|for (?:1|one) dose|once$|once in imaging|at (?:the )?(?:first|1st) (?:onset:sign) of symptoms)'
+	pattern = r'(?:x\s?1\b(?!day| day|d\b| d\b|week| week|w\b| w\b|month| month|mon|m\b| m\b| mon\b)|(?:1|one) time(?: only)?(?! daily| per)|for (?:1|one) dose|once|once in imaging|before transfusion|(?:one|1) hour prior to (?:dental )?appointment|at (?:the )?(?:first|1st) (?:onset:sign) of symptoms)'
 	def normalize_match(self, match):
 		count = 1
 		frequency_text_start, frequency_text_end = match.span()
@@ -208,12 +208,11 @@ def normalize_match(self, match):
 
 
 class FrequencyAsDirected(FrequencyParser):
-	pattern = r'as directed(?: on package)?|ad lib|as instructed|see admin instructions|see notes'
+	pattern = r'as directed(?: on package)?|ad lib|as dir\b|as instructed|see admin instructions|follow package directions|see notes|sliding scale|per package instructions'
 	def normalize_match(self, match):
-		# TODO: how to capture just text?
-		# text = 'as directed'
 		frequency_text_start, frequency_text_end = match.span()
-		frequency_text = match.group(0)
+		# frequency_text = match.group(0)
+		frequency_text = 'as directed'
 		frequency_readable = frequency_text
 		# TODO: normalize text numbers to integer numbers - maybe make separate normalize_period_unit function that also hits the text_to_int function?
 		return self.generate_match({'frequency_text_start': frequency_text_start, 'frequency_text_end': frequency_text_end, 'frequency_text': frequency_text, 'frequency_readable': frequency_readable})
@@ -236,5 +235,5 @@ def normalize_match(self, match):
 	FrequencyAtBedtime(),
 	FrequencyOneTime(),
 	# NOTE: removing this parser for DRX implementation - may consider adding back
-	# FrequencyAsDirected(),
+	FrequencyAsDirected(),
 ]
@@ -5,7 +5,7 @@
 # TODO: replace wildcard indication search with INDICATIONS list from normalize.py
 class IndicationParser(Parser):
     parser_type = 'indication'
-    pattern = r'(?P<as_needed>as needed for|if needed for|as needed|if needed|prn for|prf|prn)(?:\s?(?P<indication>.{,250}))?'
+    pattern = r'(?P<as_needed>as needed for|if needed for|as needed|to prevent|if needed|prn for|prnf|prf|prn|at (?:the )?(?:first|1st) sign of)(?:\s?(?P<indication>.{,250}))?'
     match_keys = ['as_needed', 'indication', 'indication_text_start', 'indication_text_end', 'indication_text', 'indication_readable']
     def normalize_match(self, match):
         as_needed = 1
@@ -30,7 +30,7 @@ def get_readable(self, as_needed=None, indication=None):
         return readable
 
 class ChronicIndicationParser(IndicationParser):
-    pattern = r'(?!as needed|if needed|prn|prf) for (?P<indication>.{,250})(?!' + RE_RANGE + r')'
+    pattern = r'(?!as needed |if needed |prn |prf |prnf )(?:for|indications) (?P<indication>.{,250})(?!' + RE_RANGE + r')'
     def normalize_match(self, match):
         indication_text = match.group('indication')
         indication = (get_indication(indication_text) if indication_text != None else indication_text)
 
@@ -11,7 +11,7 @@ def normalize_pattern(self):
             # and join them with a | character
             # and add them to the route_patterns array
             route_patterns.append(r'|'.join(p))
-        pattern = re.compile(r'\b(?P<route>' + r'|'.join(route_patterns) + r')\b', flags = re.I)
+        pattern = re.compile(r'(?P<route>' + r'|'.join(route_patterns) + r')', flags = re.I)
         return pattern
     def normalize_match(self, match):
         route = get_normalized(ROUTES, match.group('route'))
@@ -32,7 +32,7 @@ def normalize_pattern(self):
             # and join them with a | character
             # and add them to the route_patterns array
             route_patterns.append(r'|'.join(p))
-        pattern = re.compile(r'\b(?P<route>' + r'|'.join(route_patterns) + r')\b', flags = re.I)
+        pattern = re.compile(r'(?P<route>' + r'|'.join(route_patterns) + r')', flags = re.I)
         return pattern
     def normalize_match(self, match):
         route = get_normalized(INHALATION_ROUTES, match.group('route'))
@@ -51,7 +51,7 @@ def normalize_pattern(self):
             # and join them with a | character
             # and add them to the route_patterns array
             topical_route_patterns.append(r'|'.join(p))
-        pattern = re.compile(r'\b(?P<route>' + r'|'.join(topical_route_patterns) + r')\b', flags = re.I)
+        pattern = re.compile(r'(?P<route>' + r'|'.join(topical_route_patterns) + r')(?!\s?pain)', flags = re.I)
         return pattern
     def normalize_match(self, match):
         route = get_normalized(TOPICAL_ROUTES, match.group('route'))
@@ -113,8 +113,9 @@ def parse(self, sig):
         self.matches = matches
         return matches
 
+
 class InferredOralRouteParser(RouteParser):
-    pattern = r'\b(?P<route>(?!vaginal|sublingual)tab(?:let)?(?:s)?(?!.*(?:sublingual(?:ly)?|into|per|on the|between the|under|by sublingual route|by buccal route))|cap(?:sule)?(?:s)?|chew(?:able)?|\dpo|capful|pill)\b'
+    pattern = r'(?P<route>(?!vaginal|sublingual)tab(?:let)?(?:s)?(?!.*(?:sublingual(?:ly)?|into|per|on the|between the|under|by sublingual route|by buccal route))|cap(?:sule)?(?:s)?|chew(?:able)?|\dpo|capful|pill)'
     def normalize_pattern(self):
         return re.compile(self.pattern, flags = re.I)
     def normalize_match(self, match):
@@ -124,11 +125,48 @@ def normalize_match(self, match):
         route_readable = self.get_readable(route=route)
         return self.generate_match({'route': route, 'route_text_start': route_text_start, 'route_text_end': route_text_end, 'route_text': route_text, 'route_readable': route_readable})
 
+
+# infers inhalation route for things like 'puffs' in the absence of other more specific routes
+class InferredInhalationRouteParser(RouteParser):
+    pattern = r'puff(?:s)?(?! in each nostril)(?! in the nose)(?! each nostril)(?! in nostril)'
+    def normalize_pattern(self):
+        return re.compile(self.pattern, flags = re.I)
+    def normalize_match(self, match):
+        route = 'into the lungs'
+        route_text_start, route_text_end = match.span()
+        route_text = match[0]
+        route_readable = self.get_readable(route=route)
+        return self.generate_match({'route': route, 'route_text_start': route_text_start, 'route_text_end': route_text_end, 'route_text': route_text, 'route_readable': route_readable})
+
+
+class MiscellaneousRouteParser(RouteParser):
+    def normalize_pattern(self):
+        dose_patterns = []
+        for n, p in MISCELLANEOUS_ROUTES.items():
+            # add the name of the pattern to the list of matched patterns
+            p.append(n)
+            # and join them with a | character
+            # and add them to the dose_patterns array
+            dose_patterns.append(r'|'.join(p))
+        pattern = re.compile(r'(' + r'|'.join(dose_patterns) + r')', flags = re.I)
+        return pattern
+    def normalize_match(self, match):
+        route = 'miscellaneous'
+        route_text_start, route_text_end = match.span()
+        route_text = match[0]
+        route_readable = self.get_readable(route=route)
+        return self.generate_match({'route': route, 'route_text_start': route_text_start, 'route_text_end': route_text_end, 'route_text': route_text, 'route_readable': route_readable})
+
+
+# NOTE: moved InhalationRouteParser above RouteParser here so that "2 PUFFS BY MOUTH DAILY" resolved to "into the lungs" instead of "by mouth"...
+#       however, left it in different order above for class inheritance
 parsers = [
-    RouteParser(),
     InhalationRouteParser(),
+    RouteParser(),
     TopicalRouteParser(),
-    InferredOralRouteParser()
+    # InferredOralRouteParser(), # turned off for VUMC - TODO: need to create customer "settings"
+    # InferredInhalationRouteParser(), # turned off for VUMC - TODO: need to create customer "settings"
+    MiscellaneousRouteParser(),
 ]
 
 #print(RouteParser().parse('take one by mouth daily'))