Skip to content

Commit a1463a4

Browse files
committed
Initial fixes for phase 3
1 parent 6067865 commit a1463a4

File tree

4 files changed

+20
-13
lines changed

4 files changed

+20
-13
lines changed

parsers/frequency.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ def normalize_match(self, match):
151151
# Monday, Tuesday, Wednesday, and Friday
152152
# dayOfWeek = a
153153
class FrequencySpecificDayOfWeek(FrequencyParser):
154-
pattern = r'(?:every|on|q)\s+(?P<day_of_week>(?:(?:\s*(?:and|&|\+|,)\s*)*(?:' + RE_DAYS_OF_WEEK + '))+)'
154+
pattern = r'(?:every|on|q)\s?(?P<day_of_week>(?:(?:\s?(?:and|&|\+|,|\s)\s?)?(?:' + RE_DAYS_OF_WEEK + '))+)'
155155
def normalize_match(self, match):
156156
# TODO: normalize days of week to be comma or pipe delimited - tuesday and thursday -> tuesday|thursday or tuesday,thursday
157157
day_of_week = match.group('day_of_week')

parsers/route.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def normalize_pattern(self):
5151
# and join them with a | character
5252
# and add them to the route_patterns array
5353
topical_route_patterns.append(r'|'.join(p))
54-
pattern = re.compile(r'\b(?P<route>' + r'|'.join(topical_route_patterns) + r')\b', flags = re.I)
54+
pattern = re.compile(r'\b(?P<route>' + r'|'.join(topical_route_patterns) + r')(?!\s?pain)\b', flags = re.I)
5555
return pattern
5656
def normalize_match(self, match):
5757
route = get_normalized(TOPICAL_ROUTES, match.group('route'))
@@ -127,10 +127,10 @@ def normalize_match(self, match):
127127
# NOTE: moved InhalationRouteParser above RouteParser here so that "2 PUFFS BY MOUTH DAILY" resolved to "into the lungs" instead of "by mouth"...
128128
# however, left it in different order above for class inheritance
129129
parsers = [
130-
InhalationRouteParser(),
130+
# InhalationRouteParser(), # turned off for VUMC - TODO: need to create customer "settings"
131131
RouteParser(),
132132
TopicalRouteParser(),
133-
InferredOralRouteParser()
133+
# InferredOralRouteParser()
134134
]
135135

136136
#print(RouteParser().parse('take one by mouth daily'))

parsers/services/normalize.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,15 +87,18 @@
8787
}
8888

8989
#(?:with|\bc\.|before|\ba|\ba\.|after|\bp|\bp\.|in the|at|every)
90+
# NOTE: attempting to exclude UMS by excluding "morning and"
9091
WHEN = {
91-
'in the morning': [ r'(?:in the|every|each)\s?(?:morn(?:ing)?|a m\b|am)', r'a m\b', r'\bam\b', r'\bqam\b', r'q am\b' ],
92+
'in the morning': [ r'(?:in the|every|each)\s?(?:morning|morn(?!ing)|a m\b|am)(?! and)', r'a m\b', r'\bam\b', r'\bqam\b', r'q am\b' ],
9293
'in the afternoon': [ r'(?:in the|every|each|at)\s?(?:aft(?:ernoon)?|p m\b|pm)', r'\bqpm\b', 'q afternoon' ],
9394
'in the evening at bedtime': [r'(?:in the|every)\s?evening at bed(?:\s)?time'],
9495
'in the evening': [ r'(?:in the|every|each)\s?eve(?:ning)?(?! at bed(?:\s)?time)' ],
9596
'at night': [ r'(?:in the|at|every|each)\s?night(?! at bed(?:\s)?time)', r'nightly(?! at bed(?:\s)?time)' ],
9697
'at bedtime': [ r'(?!eve(?:ning) )(?:in the|at|every|before|every night at|nightly at|each)\s?bed(?:\s)?time', r'\bqhs\b', r'q hs\b', r'bed(?:\s)?time', r'\bhs\b' ],
9798
'with meal': [ r'(?:with|each|every|at)?\s?meal(?:s)?', r'c c\b', r'\bcc\b' ],
98-
'with breakfast': [ r'(?:with|each|every|at)? breakfast' ],
99+
'with breakfast and lunch': [],
100+
'with breakfast and dinner': [],
101+
'with breakfast': [ r'(?:with|each|every|at)? breakfast(?! and lunch| and dinner)' ],
99102
'with lunch': [ r'(?:with|each|every|at)?\s?lunch', r'\bcd\b', r'c d\b' ],
100103
'with dinner': [ r'(?:with|each|every|at)?\s?dinner', r'\bcv\b', r'c v\b' ],
101104
'before meal': [ r'before meal(?:s)?', r'\bac\b', r'a c\b' ],
@@ -196,7 +199,7 @@
196199
'vaginally': ['vaginal', r'(?:in to|into|in|per)(?: the)? vagina', r'p\.v\.', r'pv\b'],
197200
'into the uterus': ['intrauterine', 'uterus'],
198201
'under the tongue': ['sublingually', 'sublingual', r'under (?:the )?tongue', r'sub(?: |-)?lingual(?:ly)?', r'\bs\.l\.\b', r'\bsl\b'],
199-
'under the skin': ['subcutaneously', 'subcutaneous', r'(?:into|in|under) (?:the )?skin', r'sub(?: |-)*cutaneous(?:ly)?', r'subq\b', r'sub\.q\.', r'sc\b', r'subcu\b', r's\.c\.', r'sq\b', r's\.q\.', 's/q'],
202+
'under the skin': ['subcutaneously', 'subcutaneous', r'(?<!massage )(?:into|in|under) (?:the )?skin', r'sub(?: |-)*cutaneous(?:ly)?', r'subq\b', r'sub\.q\.', r'sc\b', r'subcu\b', r's\.c\.', r'sq\b', r's\.q\.', 's/q'],
200203
'rectally': ['rectal', r'p\.r\.\b', r'pr\b', r'in(?:to)* (?:the )?(?:butt|anus|rectum)'],
201204
'into the muscle': ['intramuscularly', r'i\.m\.\b', r'\bim\b', 'intramuscular', r'in(?:to)?(?: the)? muscle', 'intramuscularrly'],
202205
'intravenously': [r'i\.v\.', r'\biv\b', 'intravenous'],
@@ -219,6 +222,8 @@
219222
'swish and swallow': [],
220223
'miscellaneous': ['misc', 'device', 'meter', 'needle', 'pen needle', 'strip', r'(?:test )?strip(?:s)', r'test(?:ing)?', r'check(?:ing|s)?', 'monitor'],
221224
'subdermal': [],
225+
'to the mouth or throat': [],
226+
'scalp': ['scalp area'],
222227
}
223228

224229
"""
@@ -350,6 +355,7 @@
350355
'topically': [r'topical\b', r'\btop\b', 'application', 'apply', 'patch'],
351356
'affected areas': [r'involved (?:areas|sites)'],
352357
'affected area': [r'\baa\b', r'involved (?:area|site)\b'],
358+
'affected and surrounding areas': [],
353359
'back': [],
354360
'scalp': [],
355361
'torso': [],
@@ -377,11 +383,13 @@
377383
}
378384

379385
# TODO: add a lot more here (mL, mcg, g, etc)
386+
# NOTE: moved unit here - need to do more testing
380387
STRENGTH_UNITS = {
381388
'mg': [r'(?:milligram(?:s)?|mgs)\b'],
382389
'mcg': [r'(?:microgram(?:s)?|mcgs)\b'],
383390
'g': [r'(?:gm|gms|gram(?:s)?)\b'],
384391
'international unit': [r'i\.u\.\b', r'iu\b', 'international units', r'int\'l unit(?:s)?', r'intl unit(?:s)?'],
392+
'unit': [r'units', r'un\b', r'u\b'],
385393
'mEq': [r'milliequivalent(?:s)?'],
386394
}
387395

@@ -439,7 +447,6 @@
439447
'oz': ['ounce'],
440448
'cm': ['centimeter', r'cm\b', r'cms\b'],
441449
'inch': [],
442-
'unit': [r'units', r'un\b', r'u\b'],
443450
'teaspoon': [r'tsp\b', 'teaspoons', 'teaspoonsful', 'teaspoonful', 'teaspoonfuls'],
444451
'tablespoon': [r'tbsp\b', 'tablespoon', 'tablespoonsful', 'tablespoonful', 'tablespoonfuls'],
445452
# tablet

parsers/sig.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class SigParser(Parser):
2424
}
2525
# TODO: make this match_keys assignment more elegant
2626
#match_keys = ['original_sig_text'] + ['sig_text', 'sig_readable'] + method.parsers[0].match_keys + dose.parsers[0].match_keys + strength.parsers[0].match_keys + route.parsers[0].match_keys + frequency.parsers[0].match_keys + when.parsers[0].match_keys + duration.parsers[0].match_keys + indication.parsers[0].match_keys + max.parsers[0].match_keys + additional_info.parsers[0].match_keys
27-
match_keys = ['sig_text', 'sig_readable', 'max_dose_per_day'] + method.parsers[0].match_keys + dose.parsers[0].match_keys + strength.parsers[0].match_keys + route.parsers[0].match_keys + frequency.parsers[0].match_keys + when.parsers[0].match_keys + duration.parsers[0].match_keys + indication.parsers[0].match_keys + max.parsers[0].match_keys + additional_info.parsers[0].match_keys
27+
match_keys = ['original_sig_text'] + ['sig_text', 'sig_readable', 'max_dose_per_day'] + method.parsers[0].match_keys + dose.parsers[0].match_keys + strength.parsers[0].match_keys + route.parsers[0].match_keys + frequency.parsers[0].match_keys + when.parsers[0].match_keys + duration.parsers[0].match_keys + indication.parsers[0].match_keys + max.parsers[0].match_keys + additional_info.parsers[0].match_keys
2828
parser_type = 'sig'
2929

3030
def get_normalized_sig_text(self, sig_text):
@@ -85,7 +85,7 @@ def get_max_dose_per_day(self, match_dict):
8585
period_per_day = self.get_period_per_day(period, period_unit)
8686

8787
dose = match_dict['dose_max'] or match_dict['dose']
88-
dose_unit = match_dict['dose_unit']
88+
dose_unit = match_dict['dose_unit'] # NOTE: moved units to strength unit instead of dose unit - eventually need to update this part to include units
8989

9090
max_dose_per_day_sig = None
9191
if frequency and period_per_day and dose:
@@ -120,7 +120,7 @@ def get_max_dose_per_day(self, match_dict):
120120

121121
def parse(self, sig_text):
122122
match_dict = dict(self.match_dict)
123-
#match_dict['original_sig_text'] = sig_text
123+
match_dict['original_sig_text'] = sig_text
124124
sig_text = self.get_normalized_sig_text(sig_text)
125125
match_dict['sig_text'] = sig_text
126126
for parser_type, parsers in self.parsers.items():
@@ -163,7 +163,7 @@ def infer(self, match_dict, ndc=None, rxcui=None):
163163
# parse a csv
164164
def parse_sig_csv(self):
165165
file_path='parsers/csv/'
166-
file_name='vumc_sigs_phase_2'
166+
file_name='vumc_phase_2_incorrect'
167167
csv_columns = self.match_keys
168168
# create an empty list to collect the data
169169
parsed_sigs = []
@@ -267,7 +267,7 @@ def print_progress_bar (iteration, total, prefix = 'progress:', suffix = 'comple
267267
print()
268268

269269
#print(SigParser().infer(ndc='68788640709'))
270-
#parsed_sigs = SigParser().parse_sig_csv()
270+
parsed_sigs = SigParser().parse_sig_csv()
271271
#parsed_sigs = SigParser().parse_validate_sig_csv()
272272
#print(parsed_sigs)
273273

0 commit comments

Comments
 (0)