coderxio
diff --git a/‎parsers/dose.py‎
Lines changed: 11 additions & 4 deletions b/‎parsers/dose.py‎
Lines changed: 11 additions & 4 deletions
diff --git a/‎parsers/frequency.py‎
Lines changed: 8 additions & 9 deletions b/‎parsers/frequency.py‎
Lines changed: 8 additions & 9 deletions
diff --git a/‎parsers/indication.py‎
Lines changed: 1 addition & 1 deletion b/‎parsers/indication.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎parsers/route.py‎
Lines changed: 42 additions & 6 deletions b/‎parsers/route.py‎
Lines changed: 42 additions & 6 deletions
@@ -11,7 +11,7 @@ def normalize_pattern(self):
             # and join them with a | character
             # and add them to the dose_patterns array
             dose_patterns.append(r'|'.join(p))        
-        pattern = re.compile(r'(?:(?P<dose_negation>' + RE_DOSE_STRENGTH_NEGATION + r')\s?)?(?P<dose>' + RE_RANGE + r')\s?(?P<dose_unit>' + r'|'.join(dose_patterns) + r')', flags = re.I)
+        pattern = re.compile(r'(?:(?P<dose_negation>' + RE_DOSE_STRENGTH_NEGATION + r')\s?)?(?P<dose>' + RE_RANGE + r')\s?(?P<dose_unit>' + r'|'.join(dose_patterns) + r')(?!\s?\/\s?act)', flags = re.I)
         return pattern
     def normalize_match(self, match):
         # alternatively, if negation text is found before the dose, don't generate a match
@@ -111,15 +111,22 @@ def normalize_match(self, match):
 
 class EachDoseUnitParser(DoseParser):
     def normalize_pattern(self):
-        dose_patterns = r'|'.join(ROUTES['miscellaneous'])
-        pattern = re.compile(r'(' + dose_patterns + r')', flags = re.I)
+        dose_patterns = []
+        for n, p in MISCELLANEOUS_ROUTES.items():
+            # add the name of the pattern to the list of matched patterns
+            p.append(n)
+            # and join them with a | character
+            # and add them to the dose_patterns array
+            dose_patterns.append(r'|'.join(p))
+        pattern = re.compile(r'(' + r'|'.join(dose_patterns) + r')', flags = re.I)
         return pattern
     def normalize_match(self, match):
+        dose = 1
         dose_unit = 'each'
         dose_text_start, dose_text_end = match.span()
         dose_text = match[0]
         dose_readable = self.get_readable(dose_unit=dose_unit)
-        return self.generate_match({'dose_unit': dose_unit, 'dose_text_start': dose_text_start, 'dose_text_end': dose_text_end, 'dose_text': dose_text, 'dose_readable': dose_readable})
+        return self.generate_match({'dose': dose, 'dose_unit': dose_unit, 'dose_text_start': dose_text_start, 'dose_text_end': dose_text_end, 'dose_text': dose_text, 'dose_readable': dose_readable})
 
 
 parsers = [
 
@@ -72,7 +72,7 @@ def normalize_match(self, match):
 # frequency = a (1 if once, 2 if twice), period = 1, periodUnit = b (normalize to d, wk, mo, yr)
 # NOTE: 'daily' won't match this pattern because it requires specific times *per* day
 class FrequencyXTimesPerDay(FrequencyParser):
-	pattern = r'(?P<frequency>' + RE_RANGE + r'\s*(?:time(?:s)?|x|nights|days)|once|twice)\s*(?:per|a|each|every|\/)\s*(?P<period_unit>day|week|month|year|d\b|w\b|mon|m\b|yr)'
+	pattern = r'(?P<frequency>' + RE_RANGE + r'\s?(?:time(?:s)?|x|nights|days)|once|twice)\s?(?:per|a|each|every|\/)\s?(?P<period_unit>day|week|wk\b|month|year|d\b|w\b|mon|m\b|yr)'
 	def normalize_match(self, match):
 		frequency = frequency_max = match.group('frequency')
 		if (frequency):
@@ -95,7 +95,7 @@ def normalize_match(self, match):
 # frequency = a[0], frequencyMax = a[1], period = 1, periodUnit = b (normalize to d, wk, mo, yr)
 # frequency = a (1 if once, 2 if twice, 1 if null), period = 1, periodUnit = b (normalize to d, wk, mo, yr)
 class FrequencyXTimesDaily(FrequencyParser):
-	pattern = r'(?:(?P<frequency>' + RE_RANGE + r'\s?(?:time(?:s)?|x)|once|twice)(?: \ba\b| per)?\s?)(?P<period_unit>day|\bd\b|daily|dialy|weekly|monthly|yearly|\bhs\b)'
+	pattern = r'(?:(?P<frequency>' + RE_RANGE + r'\s?(?:time(?:s)?|x)|once|twice)(?: \ba\b| per)?\s?)(?P<period_unit>day|d\b|daily|dialy|weekly|monthly|yearly|\bhs\b)'
 	def normalize_match(self, match):
 		frequency = frequency_max = match.group('frequency')
 		if (frequency):
@@ -135,7 +135,7 @@ def normalize_match(self, match):
 # frequency = 1, period = 1 (or 2 if a is not null), periodUnit = b (normalize to d, wk, mo), [when = b (normalize to MORN, AFT, EVE, etc]
 # NOTE: moved below FrequencyDaily because "per day" was taking precedence in max daily dose text
 class FrequencyEveryDay(FrequencyParser):
-	pattern = r'(?:every|each|q|per|\ba)\s?(?P<period>other\b|o\b)?\s*(?:day (?:in the|at)\s?)?(?P<period_unit>hour|day|week|month|morning|afternoon|evening at bedtime|bedtime|evening|night|hs|pm|am|d\b)'
+	pattern = r'(?:every|each|q|per|\ba)\s?(?P<period>other\b|o)?\s?(?:day (?:in the|at)\s?)?(?P<period_unit>hour|day|week|wk\b|month|morning(?! and)|afternoon|evening at bedtime|bedtime|evening|night|hs|pm|am|d\b)'
 	def normalize_match(self, match):
 		frequency = 1
 		period = 2 if match.group('period') else 1
@@ -166,7 +166,7 @@ def normalize_match(self, match):
 # morning | evening | afternoon
 # frequency = 1, when = a
 class FrequencyInTheX(FrequencyParser):
-	pattern = r'in the\s*(morning|evening|afternoon)'
+	pattern = r'in the\s?(morning|evening|afternoon)'
 	def normalize_match(self, match):
 		frequency = 1
 		period = 1
@@ -197,7 +197,7 @@ def normalize_match(self, match):
 # one time only
 # count = 1
 class FrequencyOneTime(FrequencyParser):
-	pattern = r'(?:x\s?1\b(?!day| day|d\b| d\b|week| week|w\b| w\b|month| month|mon|m\b| m\b| mon\b)|(?:1|one) time(?: only)?(?! daily| per)|for (?:1|one) dose|once|once in imaging|at (?:the )?(?:first|1st) (?:onset:sign) of symptoms)'
+	pattern = r'(?:x\s?1\b(?!day| day|d\b| d\b|week| week|w\b| w\b|month| month|mon|m\b| m\b| mon\b)|(?:1|one) time(?: only)?(?! daily| per)|for (?:1|one) dose|once|once in imaging|before transfusion|(?:one|1) hour prior to (?:dental )?appointment|at (?:the )?(?:first|1st) (?:onset:sign) of symptoms)'
 	def normalize_match(self, match):
 		count = 1
 		frequency_text_start, frequency_text_end = match.span()
@@ -208,12 +208,11 @@ def normalize_match(self, match):
 
 
 class FrequencyAsDirected(FrequencyParser):
-	pattern = r'as directed(?: on package)?|ad lib|as instructed|see admin instructions|see notes'
+	pattern = r'as directed(?: on package)?|ad lib|as dir\b|as instructed|see admin instructions|follow package directions|see notes|sliding scale|per package instructions'
 	def normalize_match(self, match):
-		# TODO: how to capture just text?
-		# text = 'as directed'
 		frequency_text_start, frequency_text_end = match.span()
-		frequency_text = match.group(0)
+		# frequency_text = match.group(0)
+		frequency_text = 'as directed'
 		frequency_readable = frequency_text
 		# TODO: normalize text numbers to integer numbers - maybe make separate normalize_period_unit function that also hits the text_to_int function?
 		return self.generate_match({'frequency_text_start': frequency_text_start, 'frequency_text_end': frequency_text_end, 'frequency_text': frequency_text, 'frequency_readable': frequency_readable})
 
@@ -5,7 +5,7 @@
 # TODO: replace wildcard indication search with INDICATIONS list from normalize.py
 class IndicationParser(Parser):
     parser_type = 'indication'
-    pattern = r'(?P<as_needed>as needed for|if needed for|as needed|to prevent|if needed|prn for|prnf|prf|prn)(?:\s?(?P<indication>.{,250}))?'
+    pattern = r'(?P<as_needed>as needed for|if needed for|as needed|to prevent|if needed|prn for|prnf|prf|prn|at (?:the )?(?:first|1st) sign of)(?:\s?(?P<indication>.{,250}))?'
     match_keys = ['as_needed', 'indication', 'indication_text_start', 'indication_text_end', 'indication_text', 'indication_readable']
     def normalize_match(self, match):
         as_needed = 1
 
@@ -11,7 +11,7 @@ def normalize_pattern(self):
             # and join them with a | character
             # and add them to the route_patterns array
             route_patterns.append(r'|'.join(p))
-        pattern = re.compile(r'\b(?P<route>' + r'|'.join(route_patterns) + r')\b', flags = re.I)
+        pattern = re.compile(r'(?P<route>' + r'|'.join(route_patterns) + r')', flags = re.I)
         return pattern
     def normalize_match(self, match):
         route = get_normalized(ROUTES, match.group('route'))
@@ -32,7 +32,7 @@ def normalize_pattern(self):
             # and join them with a | character
             # and add them to the route_patterns array
             route_patterns.append(r'|'.join(p))
-        pattern = re.compile(r'\b(?P<route>' + r'|'.join(route_patterns) + r')\b', flags = re.I)
+        pattern = re.compile(r'(?P<route>' + r'|'.join(route_patterns) + r')', flags = re.I)
         return pattern
     def normalize_match(self, match):
         route = get_normalized(INHALATION_ROUTES, match.group('route'))
@@ -51,7 +51,7 @@ def normalize_pattern(self):
             # and join them with a | character
             # and add them to the route_patterns array
             topical_route_patterns.append(r'|'.join(p))
-        pattern = re.compile(r'\b(?P<route>' + r'|'.join(topical_route_patterns) + r')(?!\s?pain)\b', flags = re.I)
+        pattern = re.compile(r'(?P<route>' + r'|'.join(topical_route_patterns) + r')(?!\s?pain)', flags = re.I)
         return pattern
     def normalize_match(self, match):
         route = get_normalized(TOPICAL_ROUTES, match.group('route'))
@@ -113,8 +113,9 @@ def parse(self, sig):
         self.matches = matches
         return matches
 
+
 class InferredOralRouteParser(RouteParser):
-    pattern = r'\b(?P<route>(?!vaginal|sublingual)tab(?:let)?(?:s)?(?!.*(?:sublingual(?:ly)?|into|per|on the|between the|under|by sublingual route|by buccal route))|cap(?:sule)?(?:s)?|chew(?:able)?|\dpo|capful|pill)\b'
+    pattern = r'(?P<route>(?!vaginal|sublingual)tab(?:let)?(?:s)?(?!.*(?:sublingual(?:ly)?|into|per|on the|between the|under|by sublingual route|by buccal route))|cap(?:sule)?(?:s)?|chew(?:able)?|\dpo|capful|pill)'
     def normalize_pattern(self):
         return re.compile(self.pattern, flags = re.I)
     def normalize_match(self, match):
@@ -124,13 +125,48 @@ def normalize_match(self, match):
         route_readable = self.get_readable(route=route)
         return self.generate_match({'route': route, 'route_text_start': route_text_start, 'route_text_end': route_text_end, 'route_text': route_text, 'route_readable': route_readable})
 
+
+# infers inhalation route for things like 'puffs' in the absence of other more specific routes
+class InferredInhalationRouteParser(RouteParser):
+    pattern = r'puff(?:s)?(?! in each nostril)(?! in the nose)(?! each nostril)(?! in nostril)'
+    def normalize_pattern(self):
+        return re.compile(self.pattern, flags = re.I)
+    def normalize_match(self, match):
+        route = 'into the lungs'
+        route_text_start, route_text_end = match.span()
+        route_text = match[0]
+        route_readable = self.get_readable(route=route)
+        return self.generate_match({'route': route, 'route_text_start': route_text_start, 'route_text_end': route_text_end, 'route_text': route_text, 'route_readable': route_readable})
+
+
+class MiscellaneousRouteParser(RouteParser):
+    def normalize_pattern(self):
+        dose_patterns = []
+        for n, p in MISCELLANEOUS_ROUTES.items():
+            # add the name of the pattern to the list of matched patterns
+            p.append(n)
+            # and join them with a | character
+            # and add them to the dose_patterns array
+            dose_patterns.append(r'|'.join(p))
+        pattern = re.compile(r'(' + r'|'.join(dose_patterns) + r')', flags = re.I)
+        return pattern
+    def normalize_match(self, match):
+        route = 'miscellaneous'
+        route_text_start, route_text_end = match.span()
+        route_text = match[0]
+        route_readable = self.get_readable(route=route)
+        return self.generate_match({'route': route, 'route_text_start': route_text_start, 'route_text_end': route_text_end, 'route_text': route_text, 'route_readable': route_readable})
+
+
 # NOTE: moved InhalationRouteParser above RouteParser here so that "2 PUFFS BY MOUTH DAILY" resolved to "into the lungs" instead of "by mouth"...
 #       however, left it in different order above for class inheritance
 parsers = [
-    # InhalationRouteParser(), # turned off for VUMC - TODO: need to create customer "settings"
+    InhalationRouteParser(),
     RouteParser(),
     TopicalRouteParser(),
-    # InferredOralRouteParser()
+    # InferredOralRouteParser(), # turned off for VUMC - TODO: need to create customer "settings"
+    # InferredInhalationRouteParser(), # turned off for VUMC - TODO: need to create customer "settings"
+    MiscellaneousRouteParser(),
 ]
 
 #print(RouteParser().parse('take one by mouth daily'))