coderxio
diff --git a/‎parsers/dose.py‎
Lines changed: 21 additions & 19 deletions b/‎parsers/dose.py‎
Lines changed: 21 additions & 19 deletions
diff --git a/‎parsers/max.py‎
Lines changed: 105 additions & 0 deletions b/‎parsers/max.py‎
Lines changed: 105 additions & 0 deletions
diff --git a/‎parsers/services/normalize.py‎
Lines changed: 27 additions & 3 deletions b/‎parsers/services/normalize.py‎
Lines changed: 27 additions & 3 deletions
diff --git a/‎parsers/sig.py‎
Lines changed: 76 additions & 25 deletions b/‎parsers/sig.py‎
Lines changed: 76 additions & 25 deletions
diff --git a/‎sig/migrations/0011_auto_20220718_1626.py‎
Lines changed: 33 additions & 0 deletions b/‎sig/migrations/0011_auto_20220718_1626.py‎
Lines changed: 33 additions & 0 deletions
@@ -42,24 +42,6 @@ def get_readable(self, dose=None, dose_max=None, dose_unit=None):
         readable = readable.strip()
         return readable
 
-class DoseUnitOnlyParser(DoseParser):
-    def normalize_pattern(self):
-        dose_patterns = []
-        for n, p in DOSE_UNITS.items():
-            # add the name of the pattern to the list of matched patterns
-            p.append(n)
-            # and join them with a | character
-            # and add them to the dose_patterns array
-            dose_patterns.append(r'|'.join(p))        
-        pattern = re.compile(r'\b(?P<dose_unit>' + r'|'.join(dose_patterns) + r')\b', flags = re.I)
-        return pattern
-    def normalize_match(self, match):
-        dose_unit = get_normalized(DOSE_UNITS, match.group('dose_unit'))
-        dose_text_start, dose_text_end = match.span()
-        dose_text = match[0]
-        dose_readable = self.get_readable(dose_unit=dose_unit)
-        return self.generate_match({'dose_unit': dose_unit, 'dose_text_start': dose_text_start, 'dose_text_end': dose_text_end, 'dose_text': dose_text, 'dose_readable': dose_readable})
-
 class DoseOnlyParser(DoseParser):
     def normalize_pattern(self):
         method_patterns = []
@@ -86,10 +68,30 @@ def normalize_match(self, match):
         dose_readable = self.get_readable(dose=dose, dose_max=dose_max)
         return self.generate_match({'dose': dose, 'dose_max': dose_max, 'dose_text_start': dose_text_start, 'dose_text_end': dose_text_end, 'dose_text': dose_text, 'dose_readable': dose_readable})
 
+# NOTE: moved dose unit only BELOW dose only because prefer "2" over "tablets" from "2 po qid max dose 6 tabs"
+class DoseUnitOnlyParser(DoseParser):
+    def normalize_pattern(self):
+        dose_patterns = []
+        for n, p in DOSE_UNITS.items():
+            # add the name of the pattern to the list of matched patterns
+            p.append(n)
+            # and join them with a | character
+            # and add them to the dose_patterns array
+            dose_patterns.append(r'|'.join(p))        
+        pattern = re.compile(r'\b(?P<dose_unit>' + r'|'.join(dose_patterns) + r')\b', flags = re.I)
+        return pattern
+    def normalize_match(self, match):
+        dose_unit = get_normalized(DOSE_UNITS, match.group('dose_unit'))
+        dose_text_start, dose_text_end = match.span()
+        dose_text = match[0]
+        dose_readable = self.get_readable(dose_unit=dose_unit)
+        return self.generate_match({'dose_unit': dose_unit, 'dose_text_start': dose_text_start, 'dose_text_end': dose_text_end, 'dose_text': dose_text, 'dose_readable': dose_readable})
+
+
 parsers = [
     DoseParser(),
-    DoseUnitOnlyParser(),
     DoseOnlyParser(),
+    DoseUnitOnlyParser(),
 ]
 
 #print(DoseParser().parse('take one capsule prn nausea for 5 days'))
 
@@ -0,0 +1,105 @@
+from .classes.parser import *
+
+class MaxParser(Parser):
+    parser_type = 'max'
+    match_keys = ['max_numerator_value', 'max_numerator_unit', 'max_denominator_value', 'max_denominator_unit', 'max_text_start', 'max_text_end', 'max_text', 'max_readable']
+    def normalize_pattern(self):
+        dose_patterns = []
+        for n, p in DOSE_UNITS.items():
+            # add the name of the pattern to the list of matched patterns
+            p.append(n)
+            # and join them with a | character
+            # and add them to the dose_patterns array
+            dose_patterns.append(r'|'.join(p))
+        strength_patterns = []
+        for n, p in STRENGTH_UNITS.items():
+            # add the name of the pattern to the list of matched patterns
+            p.append(n)
+            # and join them with a | character
+            # and add them to the strength_patterns array
+            strength_patterns.append(r'|'.join(p))        
+        pattern = re.compile(r'(?:max(?: dose)?|do not take more than|no more than|nmt|do not exceed)\s?(?P<dose>' + RE_RANGE + r')\s?(?P<dose_unit>' + r'|'.join(dose_patterns) + '|' + r'|'.join(strength_patterns) + r')?\s?(?:per|\/|in(?: a)?)\s?(?P<period>' + RE_RANGE + r')?\s?(?P<period_unit>day|hours|hour|hrs|hr|\bh\b|week|month|year|d\b|w\b|mon|m\b|yr)', flags = re.I)
+        return pattern
+    def normalize_match(self, match):
+        dose_range = split_range(match.group('dose'))
+        max_numerator_value = dose_range[1] or dose_range[0]
+        # need to check for normalizaion for both dose units and strength units
+        # will return text of match if no normalization found
+        max_numerator_unit = get_normalized(STRENGTH_UNITS, get_normalized(DOSE_UNITS, match.group('dose_unit'))) if match.group('dose_unit') else None
+        max_denominator_value = None
+        if match.group('period'):
+            period_range = split_range(match.group('period')) 
+            max_denominator_value = period_range[1] or period_range[0]
+        max_denominator_unit = get_normalized(PERIOD_UNIT, match.group('period_unit')) if match.group('period_unit') else None
+        # set denominator value to 1 in the example of "max 3 tablets per day" -> "max 3 tablets per 1 day"
+        if max_denominator_unit and not max_denominator_value:
+            max_denominator_value = 1
+        max_text_start, max_text_end = match.span()
+        max_text = match[0]
+        max_readable = self.get_readable(
+            max_numerator_value=max_numerator_value, 
+            max_numerator_unit=max_numerator_unit, 
+            max_denominator_value=max_denominator_value,
+            max_denominator_unit=max_denominator_unit)
+        return self.generate_match({'max_numerator_value': max_numerator_value, 'max_numerator_unit': max_numerator_unit, 'max_denominator_value': max_denominator_value, 'max_denominator_unit': max_denominator_unit, 'max_text_start': max_text_start, 'max_text_end': max_text_end, 'max_text': max_text, 'max_readable': max_readable})
+    def get_readable(self, max_numerator_value=None, max_numerator_unit=None, max_denominator_value=None, max_denominator_unit=None):
+        if 1 == 2:
+            return ''
+        
+        readable = ' - max'
+        readable += ' ' + str(max_numerator_value) if max_numerator_value else ''
+        plural_dose_unit = max_numerator_value and max_numerator_value > 1
+        if max_numerator_unit:
+            if plural_dose_unit:
+                max_numerator_unit += 'e' if max_numerator_unit[-1:] == 'h' else ''
+                max_numerator_unit += 's' if max_numerator_unit not in ['oz','mL','L','cm', 'mg','mcg','g','mEq'] else ''
+            readable += ' ' + max_numerator_unit
+
+        readable += ' per' if max_denominator_value or max_denominator_unit else ''
+        readable += ' ' + str(max_denominator_value) if max_denominator_value and max_denominator_value != 1 else ''
+        plural_duration = max_denominator_value and max_denominator_value > 1
+        if max_denominator_unit:
+            max_denominator_unit += 's' if plural_duration else ''
+        readable += ' ' + max_denominator_unit
+        return readable
+
+
+class MaxDailyParser(MaxParser):
+    def normalize_pattern(self):
+        dose_patterns = []
+        for n, p in DOSE_UNITS.items():
+            # add the name of the pattern to the list of matched patterns
+            p.append(n)
+            # and join them with a | character
+            # and add them to the dose_patterns array
+            dose_patterns.append(r'|'.join(p))
+        strength_patterns = []
+        for n, p in STRENGTH_UNITS.items():
+            # add the name of the pattern to the list of matched patterns
+            p.append(n)
+            # and join them with a | character
+            # and add them to the strength_patterns array
+            strength_patterns.append(r'|'.join(p))        
+        pattern = re.compile(r'(?:max(?:imum)? daily (?:dose|amount)|mdd)\s?(?:=|is)?\s?(?P<dose>' + RE_RANGE + r')\s?(?P<dose_unit>' + r'|'.join(dose_patterns) + '|' + r'|'.join(strength_patterns) + r')?', flags = re.I)
+        return pattern
+    def normalize_match(self, match):
+        dose_range = split_range(match.group('dose'))
+        max_numerator_value = dose_range[1] or dose_range[0]
+        # need to check for normalizaion for both dose units and strength units
+        # will return text of match if no normalization found
+        max_numerator_unit = get_normalized(STRENGTH_UNITS, get_normalized(DOSE_UNITS, match.group('dose_unit'))) if match.group('dose_unit') else None
+        max_denominator_value = 1
+        max_denominator_unit = get_normalized(PERIOD_UNIT, 'day')
+        max_text_start, max_text_end = match.span()
+        max_text = match[0]
+        max_readable = self.get_readable(
+            max_numerator_value=max_numerator_value, 
+            max_numerator_unit=max_numerator_unit, 
+            max_denominator_value=max_denominator_value,
+            max_denominator_unit=max_denominator_unit)
+        return self.generate_match({'max_numerator_value': max_numerator_value, 'max_numerator_unit': max_numerator_unit, 'max_denominator_value': max_denominator_value, 'max_denominator_unit': max_denominator_unit, 'max_text_start': max_text_start, 'max_text_end': max_text_end, 'max_text': max_text, 'max_readable': max_readable})
+
+parsers = [
+    MaxParser(),
+    MaxDailyParser(),
+]
@@ -377,10 +377,7 @@
   'mcg': [r'(?:microgram(?:s)?|mcgs)\b'],
   'g': [r'(?:gm|gms|gram(?:s)?)\b'],
   'international unit': [r'i\.u\.\b', r'iu\b', 'international units', r'int\'l unit',  'intl unit'],
-  'unit': [r'units'],
   'mEq': [r'milliequivalent(?:s)?'],
-  'teaspoon': [r'\btsp\b', 'teaspoons', 'teaspoonsful', 'teaspoonful', 'teaspoonfuls'],
-  'tablespoon': [r'\btbsp\b', 'tablespoon', 'tablespoonsful', 'tablespoonful', 'tablespoonfuls'],
 }
 
 DOSE_STRENGTH_NEGATION = [
@@ -400,6 +397,30 @@
 ]
 RE_DOSE_STRENGTH_NEGATION = r'|'.join(DOSE_STRENGTH_NEGATION)
 
+EXCLUDED_MDD_DOSE_UNITS = [
+  'mg',
+  'mcg',
+  # 'g',
+  # 'mL',
+  'L',
+  'international unit',
+  'unit',
+  'mEq',
+  'teaspoon',
+  'tablespoon',
+  'puff',
+  'application',
+  'spray',
+  'drop',
+  'syringe',
+  'vial',
+  'packet',
+  'pen',
+  'oz',
+  'injection',
+  'cm',
+]
+
 DOSE_UNITS = {
   # to match ahead of one-letter dose form (L)
   'lozenge': [r'\bloz\b'],
@@ -411,6 +432,9 @@
   'L': [r'(?:\bliter)'],
   'oz': ['ounce'],
   'cm': ['centimeter', r'cm\b', r'cms\b'],
+  'unit': [r'units'],
+  'teaspoon': [r'\btsp\b', 'teaspoons', 'teaspoonsful', 'teaspoonful', 'teaspoonfuls'],
+  'tablespoon': [r'\btbsp\b', 'tablespoon', 'tablespoonsful', 'tablespoonful', 'tablespoonfuls'],
   # tablet
   # TODO: add all synonyms to exclusion for tablet
   # ERROR: make sure "tablespoon" does not match on "tab" -- use a negative lookahead
 
@@ -1,5 +1,5 @@
 from .classes.parser import *
-from . import method, dose, strength, route, frequency, when, duration, indication, additional_info
+from . import method, dose, strength, route, frequency, when, duration, indication, max, additional_info
 import csv
 
 # TODO: need to move all this to the main app and re-purpose the sig.py parser
@@ -19,11 +19,12 @@ class SigParser(Parser):
         'when': when.parsers,
         'duration': duration.parsers,
         'indication': indication.parsers,
+        'max': max.parsers,
         'additional_info': additional_info.parsers,
     }
     # TODO: make this match_keys assignment more elegant
-    #match_keys = ['original_sig_text'] + ['sig_text', 'sig_readable'] + method.parsers[0].match_keys + dose.parsers[0].match_keys + strength.parsers[0].match_keys + route.parsers[0].match_keys + frequency.parsers[0].match_keys + when.parsers[0].match_keys + duration.parsers[0].match_keys + indication.parsers[0].match_keys + additional_info.parsers[0].match_keys
-    match_keys = ['sig_text', 'sig_readable'] + method.parsers[0].match_keys + dose.parsers[0].match_keys + strength.parsers[0].match_keys + route.parsers[0].match_keys + frequency.parsers[0].match_keys + when.parsers[0].match_keys + duration.parsers[0].match_keys + indication.parsers[0].match_keys + additional_info.parsers[0].match_keys
+    #match_keys = ['original_sig_text'] + ['sig_text', 'sig_readable'] + method.parsers[0].match_keys + dose.parsers[0].match_keys + strength.parsers[0].match_keys + route.parsers[0].match_keys + frequency.parsers[0].match_keys + when.parsers[0].match_keys + duration.parsers[0].match_keys + indication.parsers[0].match_keys + max.parsers[0].match_keys + additional_info.parsers[0].match_keys
+    match_keys = ['sig_text', 'sig_readable'] + method.parsers[0].match_keys + dose.parsers[0].match_keys + strength.parsers[0].match_keys + route.parsers[0].match_keys + frequency.parsers[0].match_keys + when.parsers[0].match_keys + duration.parsers[0].match_keys + indication.parsers[0].match_keys + max.parsers[0].match_keys + additional_info.parsers[0].match_keys
     parser_type = 'sig'
 
     def get_normalized_sig_text(self, sig_text):
@@ -38,27 +39,85 @@ def get_normalized_sig_text(self, sig_text):
         sig_text = ' '.join(sig_text.split())
         return sig_text
 
-    def get_readable(self, method=None, dose=None, strength=None, route=None, frequency=None, when=None, duration=None, indication=None, additional_info=None):
-        method = method if method else ''
-        dose = dose if dose else ''
-        strength = strength if strength else ''
-        route = route if route else ''
-        frequency = frequency if frequency else ''
-        when = when if when else ''
-        duration = duration if duration else ''
-        indication = indication if indication else ''
-        additional_info = additional_info if additional_info else ''
+    def get_readable(self, match_dict):
+        method = match_dict['method_readable'] if match_dict['method_readable'] else ''
+        dose = match_dict['dose_readable'] if match_dict['dose_readable'] else ''
+        strength = match_dict['strength_readable'] if match_dict['strength_readable'] else ''
+        route = match_dict['route_readable'] if match_dict['route_readable'] else ''
+        frequency = match_dict['frequency_readable'] if match_dict['frequency_readable'] else ''
+        when = match_dict['when_readable'] if match_dict['when_readable'] else ''
+        duration = match_dict['duration_readable'] if match_dict['duration_readable'] else ''
+        indication = match_dict['indication_readable'] if match_dict['indication_readable'] else ''
+        max = match_dict['max_readable'] if match_dict['max_readable'] else ''
+        additional_info = match_dict['additional_info_readable'] if match_dict['additional_info_readable'] else ''
 
         if dose != '' and strength != '':
             strength = '(' + strength + ')'
-        sig_elements = [method, dose, strength, route, frequency, when, duration, indication, additional_info]
+        sig_elements = [method, dose, strength, route, frequency, when, duration, indication, max, additional_info]
         # join sig elements with spaces
         readable = ' '.join(sig_elements)
         # remove duplicate spaces, and in doing so, also trim whitespaces from around sig
         # this accounts for empty sig elements
         readable = ' '.join(readable.split())
         return readable
 
+    def get_period_per_day(self, period, period_unit):
+        if not period:
+            return None
+
+        if period_unit == 'hour':
+            return 24 / period
+        elif period_unit == 'day':
+            return 1 / period
+        elif period_unit == 'week':
+            return 1 / (7 * period)
+        elif period_unit == 'month':
+            return 1 / (30 * period)
+        else:
+            return None
+        
+    def get_max_dose_per_day(self, match_dict):
+        # calculate max per day from sig instructions
+        frequency = match_dict['frequency_max'] or match_dict['frequency']
+        period = match_dict['period']
+        period_unit = get_normalized(PERIOD_UNIT, match_dict['period_unit']) if match_dict['period_unit'] else match_dict['period_unit']
+        # period_per_day can be null if period_unit doesn't match hour / day / week / month
+        period_per_day = self.get_period_per_day(period, period_unit)
+
+        dose = match_dict['dose_max'] or match_dict['dose']
+        dose_unit = match_dict['dose_unit']
+
+        max_dose_per_day_sig = None
+        if frequency and period_per_day and dose:
+            max_dose_per_day_sig = frequency * period_per_day * dose
+
+        # calculate max per day from max dose (i.e. "max daily dose = 3" or "no more than 2 per week")
+        frequency_max = 1
+        period_max = match_dict['max_denominator_value']
+        period_unit_max = match_dict['max_denominator_unit']
+        # can be null if period_unit doesn't match
+        period_per_day_max = self.get_period_per_day(period_max, period_unit_max)
+        
+        dose_max = match_dict['max_numerator_value']
+        dose_unit_max = match_dict['max_numerator_unit']
+
+        max_dose_per_day_max = None
+        if frequency_max and period_per_day_max and dose_max:
+            max_dose_per_day_max = frequency_max * period_per_day_max * dose_max
+        
+        max_dose_per_day = None
+        # if we are dealing with a complex dose unit, don't return a max_dose_per_day
+        if dose_unit in EXCLUDED_MDD_DOSE_UNITS or dose_unit_max in EXCLUDED_MDD_DOSE_UNITS:
+            return max_dose_per_day
+        # if (at least one max dose is not null) and (the dose units match or one of the dose units is null)
+        if (max_dose_per_day_sig or max_dose_per_day_max) and (dose_unit == dose_unit_max or not dose_unit or not dose_unit_max):
+            # originally wrote this to choose the lowest dose per day
+            # max_dose_per_day = min(d for d in [max_dose_per_day_sig, max_dose_per_day_max] if d is not None)
+            # however, requirements changed to always prefer max over sig
+            max_dose_per_day = max_dose_per_day_max or max_dose_per_day_sig
+
+        return max_dose_per_day
+
     def parse(self, sig_text):
         match_dict = dict(self.match_dict)
         #match_dict['original_sig_text'] = sig_text
@@ -82,17 +141,9 @@ def parse(self, sig_text):
                 for k, v in match.items():
                     match_dict[k] = v
             #elif len(matches) == 0:
-        match_dict['sig_readable'] = self.get_readable(
-            method=match_dict['method_readable'],
-            dose=match_dict['dose_readable'],
-            strength=match_dict['strength_readable'],
-            route=match_dict['route_readable'],
-            frequency=match_dict['frequency_readable'],
-            when=match_dict['when_readable'],
-            duration=match_dict['duration_readable'],
-            indication=match_dict['indication_readable'],
-            additional_info=match_dict['additional_info_readable'],
-        )
+        match_dict['sig_readable'] = self.get_readable(match_dict)
+        match_dict ['max_dose_per_day'] = self.get_max_dose_per_day(match_dict)
+
         # calculate admin instructions based on leftover pieces of sig
         # would need to calculate overlap in each of the match_dicts
         # in doing so, maybe also return a map of the parsed parts of the sig for use in frontend highlighting
 
@@ -0,0 +1,33 @@
+# Generated by Django 3.2.9 on 2022-07-18 16:26
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('sig', '0010_auto_20220707_0254'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='sigparsed',
+            name='max_dose_denominator_unit',
+            field=models.CharField(max_length=100, null=True),
+        ),
+        migrations.AddField(
+            model_name='sigparsed',
+            name='max_dose_denominator_value',
+            field=models.CharField(max_length=100, null=True),
+        ),
+        migrations.AddField(
+            model_name='sigparsed',
+            name='max_dose_numerator_unit',
+            field=models.CharField(max_length=100, null=True),
+        ),
+        migrations.AddField(
+            model_name='sigparsed',
+            name='max_dose_numerator_value',
+            field=models.CharField(max_length=100, null=True),
+        ),
+    ]