Fix Kanban board issues

jrlegrand · jrlegrand · commit 60678651a47d · 2022-09-09T04:28:54.000Z
diff --git a/parsers/duration.py b/parsers/duration.py
@@ -20,7 +20,7 @@ def get_readable(self, duration=None, duration_max=None, duration_unit=None):
 
 # for x [more] days
 class DurationParserForXDays(DurationParser):
-    pattern = r'(?:for|x)\s*(?P<duration>' + RE_RANGE + r')\s*(?:more)?\s?(?P<duration_unit>year|month|week|day|yr\b|mon\b|wk\b|d\b)'
+    pattern = r'(?:for|\bf|x)\s*(?P<duration>' + RE_RANGE + r')\s*(?:more)?\s?(?P<duration_unit>year(?:s)|month(?:s)|week(?:s)|day(?:s)|yr(?:s)\b|mon(?:s)\b|wk(?:s)?|d\b|w\b)'
     def normalize_match(self, match):
         duration_range = split_range(match.group('duration'))
         duration_text_start, duration_text_end = match.span()
@@ -33,7 +33,7 @@ def normalize_match(self, match):
 
 # up to x days
 class DurationParserUpToXDays(DurationParser):
-    pattern = r'(?:for )?up to (?P<duration>' + RE_RANGE + r')\s?(?P<duration_unit>year|month|week|day|yr\b|mon\b|wk\b|d\b)'
+    pattern = r'(?:for )?up to (?P<duration>' + RE_RANGE + r')\s?(?P<duration_unit>year(?:s)|month(?:s)|week(?:s)|day(?:s)|yr(?:s)\b|mon(?:s)\b|wk(?:s)|d\b)'
     def normalize_match(self, match):
         duration_range = split_range(match.group('duration'))
         duration_text_start, duration_text_end = match.span()
diff --git a/parsers/indication.py b/parsers/indication.py
@@ -5,7 +5,7 @@
 # TODO: replace wildcard indication search with INDICATIONS list from normalize.py
 class IndicationParser(Parser):
     parser_type = 'indication'
-    pattern = r'(?P<as_needed>as needed for|if needed for|as needed|if needed|prn for|prf|prn)(?:\s?(?P<indication>.{,250}))?'
+    pattern = r'(?P<as_needed>as needed for|if needed for|as needed|to prevent|if needed|prn for|prnf|prf|prn)(?:\s?(?P<indication>.{,250}))?'
     match_keys = ['as_needed', 'indication', 'indication_text_start', 'indication_text_end', 'indication_text', 'indication_readable']
     def normalize_match(self, match):
         as_needed = 1
@@ -30,7 +30,7 @@ def get_readable(self, as_needed=None, indication=None):
         return readable
 
 class ChronicIndicationParser(IndicationParser):
-    pattern = r'(?!as needed|if needed|prn|prf) for (?P<indication>.{,250})(?!' + RE_RANGE + r')'
+    pattern = r'(?!as needed |if needed |prn |prf |prnf )(?:for|indications) (?P<indication>.{,250})(?!' + RE_RANGE + r')'
     def normalize_match(self, match):
         indication_text = match.group('indication')
         indication = (get_indication(indication_text) if indication_text != None else indication_text)
diff --git a/parsers/services/normalize.py b/parsers/services/normalize.py
@@ -66,7 +66,7 @@
 # NOTE: periodUnit 'day' should include pretty much all of 'when' array
 # for FHIR conversion: https://www.hl7.org/fhir/valueset-units-of-time.html
 PERIOD_UNIT = {
-  'day': [ 'daily', 'dialy', 'nightly', 'days', 'day', r'\bd\b', 'morning', 'morn', 'am', 'afternoon', 'aft', 'pm', 'evening at bedtime', 'bedtime', 'evening', 'eve', 'night', 'hs' ],
+  'day': [ 'daily', 'dialy', 'nightly', 'days', 'day', r'\bd\b', 'morning', 'morn', 'am', 'afternoon', 'aft', 'pm', r'evening at bed(?:\s)?time', r'bed(?:\s)?time', 'evening', 'eve', 'night', 'hs' ],
   'week': [ 'weekly', 'weeks', 'week', 'wks', 'wk', r'\bw\b' ],
   'month': [ 'monthly', 'months', 'month', 'mon', 'mo' ],
   'hour': [ 'hourly', 'hours', 'hour', 'hrs', 'hr', r'\bh\b' ],
@@ -90,10 +90,10 @@
 WHEN = {
   'in the morning': [ r'(?:in the|every|each)\s?(?:morn(?:ing)?|a m\b|am)', r'a m\b', r'\bam\b', r'\bqam\b', r'q am\b' ],
   'in the afternoon': [ r'(?:in the|every|each|at)\s?(?:aft(?:ernoon)?|p m\b|pm)', r'\bqpm\b', 'q afternoon' ],
-  'in the evening at bedtime': [r'(?:in the|every)\s?evening at bedtime'],
-  'in the evening': [ r'(?:in the|every|each)\s?eve(?:ning)?(?! at bedtime)' ],
-  'at night': [ r'(?:in the|at|every|each)\s?night(?! at bedtime)', r'nightly(?! at bedtime)' ],
-  'at bedtime': [ r'(?!eve(?:ning) )(?:in the|at|every|every night at|nightly at|each)\s?bedtime', r'\bqhs\b', r'q hs\b', 'bedtime', r'\bhs\b' ],
+  'in the evening at bedtime': [r'(?:in the|every)\s?evening at bed(?:\s)?time'],
+  'in the evening': [ r'(?:in the|every|each)\s?eve(?:ning)?(?! at bed(?:\s)?time)' ],
+  'at night': [ r'(?:in the|at|every|each)\s?night(?! at bed(?:\s)?time)', r'nightly(?! at bed(?:\s)?time)' ],
+  'at bedtime': [ r'(?!eve(?:ning) )(?:in the|at|every|before|every night at|nightly at|each)\s?bed(?:\s)?time', r'\bqhs\b', r'q hs\b', r'bed(?:\s)?time', r'\bhs\b' ],
   'with meal': [ r'(?:with|each|every|at)?\s?meal(?:s)?', r'c c\b', r'\bcc\b' ],
   'with breakfast': [ r'(?:with|each|every|at)? breakfast' ],
   'with lunch': [ r'(?:with|each|every|at)?\s?lunch', r'\bcd\b', r'c d\b' ],
@@ -123,7 +123,7 @@
   'use': [],
   'push': [],
   'give': [],
-  'take': [r'\btk(?:\b|\d)', r'^t(?:\b|\d)'],
+  'take': [r'\btk(?:\b|\d)', r'^t(?:\b|\d)', 'taking'],
   'swallow': [],
   'instill': [],
   'chew': [],
@@ -181,13 +181,13 @@
 	'by mouth': ['by oral route', 'oral', r'orally(?! disintegrating)', r'po\b', r'p o\b', r'oral\b'],
   'in left ear': [r'(?:in to |into |in |to |per )?(?:the )?left ear', r'\ba\.s\.\b'],
   'in right ear': [r'(?:in to |into |in |to |per )?(?:the )?right ear', r'\ba\.d\.\b'],
-  'in each ear': [r'(?:in to |into |in |to |per )?(?:both ears|each ear|(?!affected )ears)', r'\ba\.u\.\b', r'\bau\b'],
+  'in each ear': [r'(?:in to |into |in |to |per )?(?:bilateral ears|both ears|each ear|(?!affected )ears)', r'\ba\.u\.\b', r'\bau\b'],
   'in affected ear': [r'(?:in to |into |in |to |per )?(?:the )?affected ear\b'],
   'in ear(s)': ['by ear', 'otically', 'otic', r'(?:in to |into |in |to |per )?(?:the )?(?!affected )ear\b'],
   'in left nostril': [r'(?:in to |into |in |to |per )?(?:the )?left (?:nose|nostril|nare)'],
   'in right nostril': [r'(?:in to |into |in |to |per )?(?:the )?right (?:nose|nostril|nare)'],
   'in each nostril': [r'(?:in to |into |in |to |per )?(?:both nostrils|each nostril|(?<!affected )nostrils|each nare)', r'\bien\b'],
-  'in nostril(s)': ['by nose', 'nasally', r'nasal(?! spray)', 'intranasal', r'(?:in to |into |in |to |per )?(?:the )?(?!each|left|right|both)(?:affected)?(?:nose|nostrils|nostril|nare)\b'],
+  'in nostril(s)': [r'\bvn\b', 'by nose', 'nasally', r'nasal(?! spray)', 'intranasal', r'(?:in to |into |in |to |per )?(?:the )?(?!each|left|right|both)(?:affected)?(?:nose|nostrils|nostril|nare)\b'],
   'in left eye': [r'(?:in to |into |in |to |per )?(?:the )?left eye', r'\bo\.s\.\b', r'\bos\b'],
   'in right eye': [r'(?:in to |into |in |to |per )?(?:the )?right eye', r'\bo\.d\.\b', r'\bod\b'],
   'in each eye': [r'(?:in to |into |in |to |per )?(?:both eyes|each eye|(?!affected )eyes)', r'\bo\.u\.\b', r'\bou\b'],
@@ -198,7 +198,7 @@
   'under the tongue': ['sublingually', 'sublingual', r'under (?:the )?tongue', r'sub(?: |-)?lingual(?:ly)?', r'\bs\.l\.\b', r'\bsl\b'],
   'under the skin': ['subcutaneously', 'subcutaneous', r'(?:into|in|under) (?:the )?skin', r'sub(?: |-)*cutaneous(?:ly)?', r'subq\b', r'sub\.q\.', r'sc\b', r'subcu\b', r's\.c\.', r'sq\b', r's\.q\.', 's/q'],
   'rectally': ['rectal', r'p\.r\.\b', r'pr\b', r'in(?:to)* (?:the )?(?:butt|anus|rectum)'],
-  'into the muscle': ['intramuscularly', r'i\.m\.\b', r'\bim\b', 'intramuscular', r'in(?:to)?(?: the)? muscle' ],
+  'into the muscle': ['intramuscularly', r'i\.m\.\b', r'\bim\b', 'intramuscular', r'in(?:to)?(?: the)? muscle', 'intramuscularrly'],
   'intravenously': [r'i\.v\.', r'\biv\b', 'intravenous'],
   'cutaneously': [r'\bcutaneous'],
   'to the skin': ['transdermally', 'transdermal', 'patch', 'patches'],
@@ -347,13 +347,15 @@
 """
 
 TOPICAL_ROUTES = {
-  'topically': [r'topical\b', 'application', 'apply', 'patch'],
+  'topically': [r'topical\b', r'\btop\b', 'application', 'apply', 'patch'],
   'affected areas': [r'involved (?:areas|sites)'],
   'affected area': [r'\baa\b', r'involved (?:area|site)\b'],
   'back': [],
   'scalp': [],
   'torso': [],
   'arms': [],
+  'legs': [],
+  'abdomen': [],
   'arm': [],
   'eyelids': ['eye lids'],
   'eyelid': [r'eye lid\b'],
@@ -366,6 +368,8 @@
   'buttocks': [r'butt\b'],
   'blood blister': [],
   'face': [],
+  'chest': [],
+  'shoulders': [],
 }
 
 INHALATION_ROUTES = {
@@ -441,7 +445,7 @@
   # tablet
   # TODO: add all synonyms to exclusion for tablet
   # ERROR: make sure "tablespoon" does not match on "tab" -- use a negative lookahead
-  'tablet': [r'(?<!film-coated)(?<!effervescentgastro-resistant)(?<!orodispersible)(?<!prolonged-release)(?<!vaginal)(?<!effervescent vaginal)(?<!modified-release)(?<!chewable)(?<!sublingual)(?<!buccalmuco-adhesive buccal)(?<!soluble)(?<!dispersible)(?<!delayed-release particles)(?<!oral)(?<!inhalation vapor)(?<!implantation)(?<!extended-release film coated)(?<!ultramicronized)(?<!extended-release)(?<!extended-release enteric coated)(?<!delayed-release)(?<!coated particles)(?<!sustained-release buccal)(?<!multilayer)\s*tab(?:let)?(?:s)?', r't\b', r'ts\b'],
+  'tablet': [r'(?<!film-coated)(?<!effervescentgastro-resistant)(?<!orodispersible)(?<!prolonged-release)(?<!vaginal)(?<!effervescent vaginal)(?<!modified-release)(?<!chewable)(?<!sublingual)(?<!buccalmuco-adhesive buccal)(?<!soluble)(?<!dispersible)(?<!delayed-release particles)(?<!oral)(?<!inhalation vapor)(?<!implantation)(?<!extended-release film coated)(?<!ultramicronized)(?<!extended-release)(?<!extended-release enteric coated)(?<!delayed-release)(?<!coated particles)(?<!sustained-release buccal)(?<!multilayer)\s*tab(?:let)?(?:s)?', r'tb\b', r't\b', r'ts\b'],
   'film-coated tablet': [r'(?:film-coated|film coated) tab(?:let)?(?:s)?'],
   'effervescent tablet': [r'effervescent tab(?:let)?(?:s)?'],
   'gastro-resistant tablet': [r'(?:gastro-resistant|gastro resistant) tab(?:let)?(?:s)?'],
@@ -482,7 +486,6 @@
   'prolonged-release capsule': [r'(?:prolonged-release|prolonged release) cap(?:sule)?(?:s)?\b'],
   'coated pellets capsule': [r'coated pellets cap(?:sule)?(?:s)?\b'],
   'delayed-release capsule': [r'(?:delayed-release|delayed release|d.r.|dr) cap(?:sule)?(?:s)?\b'],
-  'oral capsule': [r'oral cap(?:sule)?(?:s)?\b'],
   'extended-release film coated capsule': [r'(?:extended-release|extended release|e.r.|er) (?:film coated|film-coated) cap(?:sule)?(?:s)?\b'],
   'extended-release coated capsule': [r'(?:extended-release|extended release|e.r.|er) coated cap(?:sule)?(?:s)?\b'],
   'extended-release capsule': [r'(?:extended-release|extended release|e.r.|er) cap(?:sule)?(?:s)?\b'],
@@ -540,6 +543,7 @@
   'cutaneous aerosol': [],
   'metered dose aerosol': [],
   # gum
+  'gummie': [],
   'gum': [],
   'oral gum': [],
   'medicated chewing-gum': [r'medicated chewing gum'],
@@ -630,7 +634,7 @@
   'pen': [],
   'applicatorful': ['applicatorsful', 'applicator'],
   # NOTE: have a separate parser for generic application keywords (i.e. 'apply')
-  'application': [r'app\b', r'applic\b'],
+  'application': [ r'applic\b', r'appl\b', r'app\b'],
   'capful': [],
   'injection': [],
   'packet': ['pkt'],
@@ -645,6 +649,8 @@
   'swab': [],
   'squirt': [],
   'pump': [],
+  'troche': [],
+  'cartridge': ['cartridges'],
 }
 
 PAIN_SEVERITIES = {
@@ -669,6 +675,8 @@
   'eye': [],
   'muscle/joint': [],
   'muscle': [],
+  'nerve': [],
+  'neuropathic': [],
   'joint': [],
   'throat': [],
   'back': [],
@@ -719,9 +727,12 @@
   'bladder irritation': [],
   'bladder spasm': [],
   'bleeding': [],
+  'blood glucose monitoring': [],
   'blood sugar less than': [],
+  'blood pressure': [],
   'bradycardia': [],
   'burning with urination': [],
+  'cholesterol': ['choleserol'],
   'chest pain': [],
   'cold sore': ['cold sores'],
   'cold': [],
@@ -735,6 +746,7 @@
   'demand feeding': [],
   'depression': [],
   'diarrhea': [],
+  'diabetes': ['type 2 diabetes'],
   'difficulty breathing': [],
   'discomfort': [],
   'dizziness': [],
@@ -763,16 +775,23 @@
   'hallucination': [],
   'headache': [],
   'heartburn': [],
+  'heart rate': ['heart rates', r'\bhr\b'],
   'hiccups': [],
+  'heart failure': [],
+  'heart': [],
   'hemorrhoids': [],
+  'hair': [],
   'high blood pressure': [],
   'high blood sugar': [],
+  'high cholesterol': ['hyperlipidemia'],
   'hives': [],
   'hypoglycemia': [],
+  'hypothyroidism': ['underactive thyroid'],
   'incontinence': [],
   'indigestion': [],
   'infection': [],
   'inflammation': [],
+  'inflammatory bowel disease': [r'\bibd\b'],
   'insomnia': [],
   'intercourse': [],
   'irritation': [],
@@ -782,22 +801,28 @@
   'low blood sugar': [],
   'low heart rate': [],
   'migraine': [],
+  'mood': [],
   'mucositis': [],
   'movement disorder': [],
   'muscle spasm': [],
   'nasal congestion': [],
   'nasal dryness': [],
   'nausea': [],
   'numbness': [],
+  'opioid dependence': [],
   'opioid reversal': [],
   'outbreak': [],
+  'overdose': [],
   'palpitations': [],
+  'panic attack': [],
   'perianal irritation': [],
   'pharyngitis': [],
+  'prostate': [],
   'rash': [],
   'reflux': [],
   'respiratory distress': [],
   'respiratory depression': [],
+  'restlessness': [],
   'rhinitis': [],
   'rigors': [],
   'seasonal allergies': [],
@@ -822,14 +847,18 @@
   'soreness': [],
   'spasm': [],
   'stomatitis': [],
+  'stroke': [],
   'swelling': [],
   'teething': [],
   'thrush': [],
   'thyroid': [],
+  'tick bite': [],
   'tremor': ['tremors'],
+  'ulcerative colitis': [],
   'unable to take po': [],
   'urge to smoke': [],
   'urinary burning': [],
+  'urinary tract infection': [r'\buti'],
   'urinary tract irritation': [],
   'urinary tract symptoms': [],
   'vertigo': [],
diff --git a/parsers/sig.py b/parsers/sig.py
@@ -24,7 +24,7 @@ class SigParser(Parser):
     }
     # TODO: make this match_keys assignment more elegant
     #match_keys = ['original_sig_text'] + ['sig_text', 'sig_readable'] + method.parsers[0].match_keys + dose.parsers[0].match_keys + strength.parsers[0].match_keys + route.parsers[0].match_keys + frequency.parsers[0].match_keys + when.parsers[0].match_keys + duration.parsers[0].match_keys + indication.parsers[0].match_keys + max.parsers[0].match_keys + additional_info.parsers[0].match_keys
-    match_keys = ['original_sig_text', 'sig_text', 'sig_readable', 'max_dose_per_day'] + method.parsers[0].match_keys + dose.parsers[0].match_keys + strength.parsers[0].match_keys + route.parsers[0].match_keys + frequency.parsers[0].match_keys + when.parsers[0].match_keys + duration.parsers[0].match_keys + indication.parsers[0].match_keys + max.parsers[0].match_keys + additional_info.parsers[0].match_keys
+    match_keys = ['sig_text', 'sig_readable', 'max_dose_per_day'] + method.parsers[0].match_keys + dose.parsers[0].match_keys + strength.parsers[0].match_keys + route.parsers[0].match_keys + frequency.parsers[0].match_keys + when.parsers[0].match_keys + duration.parsers[0].match_keys + indication.parsers[0].match_keys + max.parsers[0].match_keys + additional_info.parsers[0].match_keys
     parser_type = 'sig'
 
     def get_normalized_sig_text(self, sig_text):
@@ -120,7 +120,7 @@ def get_max_dose_per_day(self, match_dict):
 
     def parse(self, sig_text):
         match_dict = dict(self.match_dict)
-        match_dict['original_sig_text'] = sig_text
+        #match_dict['original_sig_text'] = sig_text
         sig_text = self.get_normalized_sig_text(sig_text)
         match_dict['sig_text'] = sig_text
         for parser_type, parsers in self.parsers.items():
@@ -267,7 +267,7 @@ def print_progress_bar (iteration, total, prefix = 'progress:', suffix = 'comple
         print()
 
 #print(SigParser().infer(ndc='68788640709'))
-parsed_sigs = SigParser().parse_sig_csv()
+#parsed_sigs = SigParser().parse_sig_csv()
 #parsed_sigs = SigParser().parse_validate_sig_csv()
 #print(parsed_sigs)
 
diff --git a/parsers/when.py b/parsers/when.py
@@ -6,7 +6,7 @@
 class WhenParser(Parser):
 	parser_type = 'when'
 	match_keys = ['when', 'when_text_start', 'when_text_end', 'when_text', 'when_readable']
-	pattern = r'(?P<when_relation>with|\bc\.|\bc|before|\ba|\ba\.|after|\bp|\bp\.|in the|at|every|each|\bq|\bq.|night)(?: each| every)?(?P<when_time>\s?(?:c\b|c\.\b|meal(?:s)? and at bedtime|meal(?:s)?|c\.m\.\b|cm\b|breakfast|c\.d\.\b|cd\b|lunch|c\.v\.\b|cv\b|dinner|morning before breakfast|morning|morn|a\.m\.\b|am\b|evening at bedtime|bedtime|evening|eve|aft(?:ernoon)?|p\.m\.\b|pm\b|night at bedtime|night|hs\b|h\.s\.\b|ly))'
+	pattern = r'(?P<when_relation>with|\bc\.|\bc|before|\ba|\ba\.|after|\bp|\bp\.|in the|at|every|each|\bq|\bq.|night)(?: each| every)?(?P<when_time>\s?(?:c\b|c\.\b|meal(?:s)? and at bed(?:\s)?time|meal(?:s)?|c\.m\.\b|cm\b|breakfast|c\.d\.\b|cd\b|lunch|c\.v\.\b|cv\b|dinner|morning before breakfast|morning|morn|a\.m\.\b|am\b|evening at bed(?:\s)?time|bed(?:\s)?time|evening|eve|aft(?:ernoon)?|p\.m\.\b|pm\b|night at bed(?:\s)?time|night|hs\b|h\.s\.\b|ly))'
 	def normalize_match(self, match):
 		# TODO: normalize before to 'a' and after to 'p', etc
 		# TODO: normalize meals to 'm', etc