Skip to content

Commit afa0267

Browse files
authored
Merge pull request jrlegrand#78 from jrlegrand/vumc_phase_3
Vumc phase 3
2 parents 6e921d5 + 33f4cb7 commit afa0267

File tree

9 files changed

+259
-114
lines changed

9 files changed

+259
-114
lines changed

parsers/dose.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def normalize_pattern(self):
1111
# and join them with a | character
1212
# and add them to the dose_patterns array
1313
dose_patterns.append(r'|'.join(p))
14-
pattern = re.compile(r'(?:(?P<dose_negation>' + RE_DOSE_STRENGTH_NEGATION + r')\s?)?(?P<dose>' + RE_RANGE + r')\s?(?P<dose_unit>' + r'|'.join(dose_patterns) + r')', flags = re.I)
14+
pattern = re.compile(r'(?:(?P<dose_negation>' + RE_DOSE_STRENGTH_NEGATION + r')\s?)?(?P<dose>' + RE_RANGE + r')\s?(?P<dose_unit>' + r'|'.join(dose_patterns) + r')(?!\s?\/\s?act)', flags = re.I)
1515
return pattern
1616
def normalize_match(self, match):
1717
# alternatively, if negation text is found before the dose, don't generate a match
@@ -21,6 +21,16 @@ def normalize_match(self, match):
2121
dose_range = split_range(match.group('dose'))
2222
dose, dose_max = dose_range
2323
dose_unit = get_normalized(DOSE_UNITS, match.group('dose_unit'))
24+
# convert teaspoon and tablespoon to mL
25+
if (dose_unit in ['teaspoon', 'tablespoon']):
26+
multipliers = {
27+
'teaspoon': 5,
28+
'tablespoon': 15
29+
}
30+
multiplier = multipliers[dose_unit]
31+
dose = dose * multiplier if dose else dose
32+
dose_max = dose_max * multiplier if dose_max else dose_max
33+
dose_unit = 'mL'
2434
dose_text_start, dose_text_end = match.span()
2535
dose_text = match[0]
2636
dose_readable = self.get_readable(dose=dose, dose_max=dose_max, dose_unit=dose_unit)
@@ -58,7 +68,7 @@ def normalize_pattern(self):
5868
# and join them with a | character
5969
# and add them to the dose_patterns array
6070
strength_unit_patterns.append(r'|'.join(p))
61-
pattern = re.compile(r'^(?:' + r'|'.join(method_patterns) + r')?\s?(?P<dose>' + RE_RANGE + r')(?!\d)(?!\s?(?:' + r'|'.join(strength_unit_patterns) + r'))', flags = re.I)
71+
pattern = re.compile(r'^(?:' + r'|'.join(method_patterns) + r')?\s?(?P<dose>' + RE_RANGE + r')(?!(?:\s)?\d)(?!(?:\s)?(?:times|x))(?!\s?(?:' + r'|'.join(strength_unit_patterns) + r'))', flags = re.I)
6272
return pattern
6373
def normalize_match(self, match):
6474
dose_range = split_range(match.group('dose'))
@@ -87,11 +97,44 @@ def normalize_match(self, match):
8797
dose_readable = self.get_readable(dose_unit=dose_unit)
8898
return self.generate_match({'dose_unit': dose_unit, 'dose_text_start': dose_text_start, 'dose_text_end': dose_text_end, 'dose_text': dose_text, 'dose_readable': dose_readable})
8999

100+
class ApplyDoseUnitParser(DoseParser):
101+
def normalize_pattern(self):
102+
pattern = re.compile(r'apply', flags = re.I)
103+
return pattern
104+
def normalize_match(self, match):
105+
dose = 1
106+
dose_unit = 'application'
107+
dose_text_start, dose_text_end = match.span()
108+
dose_text = match[0]
109+
dose_readable = self.get_readable(dose_unit=dose_unit)
110+
return self.generate_match({'dose': dose, 'dose_unit': dose_unit, 'dose_text_start': dose_text_start, 'dose_text_end': dose_text_end, 'dose_text': dose_text, 'dose_readable': dose_readable})
111+
112+
class EachDoseUnitParser(DoseParser):
113+
def normalize_pattern(self):
114+
dose_patterns = []
115+
for n, p in MISCELLANEOUS_ROUTES.items():
116+
# add the name of the pattern to the list of matched patterns
117+
p.append(n)
118+
# and join them with a | character
119+
# and add them to the dose_patterns array
120+
dose_patterns.append(r'|'.join(p))
121+
pattern = re.compile(r'(' + r'|'.join(dose_patterns) + r')', flags = re.I)
122+
return pattern
123+
def normalize_match(self, match):
124+
dose = 1
125+
dose_unit = 'each'
126+
dose_text_start, dose_text_end = match.span()
127+
dose_text = match[0]
128+
dose_readable = self.get_readable(dose_unit=dose_unit)
129+
return self.generate_match({'dose': dose, 'dose_unit': dose_unit, 'dose_text_start': dose_text_start, 'dose_text_end': dose_text_end, 'dose_text': dose_text, 'dose_readable': dose_readable})
130+
90131

91132
parsers = [
92133
DoseParser(),
93134
DoseOnlyParser(),
94135
DoseUnitOnlyParser(),
136+
ApplyDoseUnitParser(),
137+
EachDoseUnitParser(),
95138
]
96139

97140
#print(DoseParser().parse('take one capsule prn nausea for 5 days'))

parsers/duration.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def get_readable(self, duration=None, duration_max=None, duration_unit=None):
2020

2121
# for x [more] days
2222
class DurationParserForXDays(DurationParser):
23-
pattern = r'(?:for|x)\s*(?P<duration>' + RE_RANGE + r')\s*(?:more)?\s?(?P<duration_unit>year|month|week|day|yr\b|mon\b|wk\b|d\b)'
23+
pattern = r'(?:for|\bf|x)\s*(?P<duration>' + RE_RANGE + r')\s*(?:more)?\s?(?P<duration_unit>year(?:s)|month(?:s)|week(?:s)|day(?:s)|yr(?:s)\b|mon(?:s)\b|wk(?:s)?|d\b|w\b)'
2424
def normalize_match(self, match):
2525
duration_range = split_range(match.group('duration'))
2626
duration_text_start, duration_text_end = match.span()
@@ -33,7 +33,7 @@ def normalize_match(self, match):
3333

3434
# up to x days
3535
class DurationParserUpToXDays(DurationParser):
36-
pattern = r'(?:for )?up to (?P<duration>' + RE_RANGE + r')\s?(?P<duration_unit>year|month|week|day|yr\b|mon\b|wk\b|d\b)'
36+
pattern = r'(?:for )?up to (?P<duration>' + RE_RANGE + r')\s?(?P<duration_unit>year(?:s)|month(?:s)|week(?:s)|day(?:s)|yr(?:s)\b|mon(?:s)\b|wk(?:s)|d\b)'
3737
def normalize_match(self, match):
3838
duration_range = split_range(match.group('duration'))
3939
duration_text_start, duration_text_end = match.span()

parsers/frequency.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def normalize_match(self, match):
7272
# frequency = a (1 if once, 2 if twice), period = 1, periodUnit = b (normalize to d, wk, mo, yr)
7373
# NOTE: 'daily' won't match this pattern because it requires specific times *per* day
7474
class FrequencyXTimesPerDay(FrequencyParser):
75-
pattern = r'(?P<frequency>' + RE_RANGE + r'\s*(?:time(?:s)?|x|nights|days)|once|twice)\s*(?:per|a|each|every|\/)\s*(?P<period_unit>day|week|month|year|d\b|w\b|mon|m\b|yr)'
75+
pattern = r'(?P<frequency>' + RE_RANGE + r'\s?(?:time(?:s)?|x|nights|days)|once|twice)\s?(?:per|a|each|every|\/)\s?(?P<period_unit>day|week|wk\b|month|year|d\b|w\b|mon|m\b|yr)'
7676
def normalize_match(self, match):
7777
frequency = frequency_max = match.group('frequency')
7878
if (frequency):
@@ -95,7 +95,7 @@ def normalize_match(self, match):
9595
# frequency = a[0], frequencyMax = a[1], period = 1, periodUnit = b (normalize to d, wk, mo, yr)
9696
# frequency = a (1 if once, 2 if twice, 1 if null), period = 1, periodUnit = b (normalize to d, wk, mo, yr)
9797
class FrequencyXTimesDaily(FrequencyParser):
98-
pattern = r'(?:(?P<frequency>' + RE_RANGE + r'\s?(?:time(?:s)?|x)|once|twice)(?: \ba\b| per)?\s?)(?P<period_unit>day|\bd\b|daily|dialy|weekly|monthly|yearly|\bhs\b)'
98+
pattern = r'(?:(?P<frequency>' + RE_RANGE + r'\s?(?:time(?:s)?|x)|once|twice)(?: \ba\b| per)?\s?)(?P<period_unit>day|d\b|daily|dialy|weekly|monthly|yearly|\bhs\b)'
9999
def normalize_match(self, match):
100100
frequency = frequency_max = match.group('frequency')
101101
if (frequency):
@@ -112,11 +112,11 @@ def normalize_match(self, match):
112112

113113

114114
# daily | nightly | weekly | monthly | yearly
115-
# NOTE: this is where 'daily' matches
115+
# NOTE: this is where 'daily' matches
116116
# frequency = a[0], frequencyMax = a[1], period = 1, periodUnit = b (normalize to d, wk, mo, yr)
117117
# frequency = a (1 if once, 2 if twice, 1 if null), period = 1, periodUnit = b (normalize to d, wk, mo, yr)
118118
class FrequencyDaily(FrequencyParser):
119-
pattern = r'(?P<period_unit>day|\bd\b|daily|dialy|weekly|monthly|yearly|\bhs\b)'
119+
pattern = r'(?P<period_unit>\bd\b|daily|dialy|weekly|monthly|yearly|\bhs\b)'
120120
def normalize_match(self, match):
121121
frequency = 1
122122
period = 1
@@ -129,13 +129,13 @@ def normalize_match(self, match):
129129

130130

131131
# every | each | per | q
132-
# [other]
132+
# [other]
133133
# day | week | month | morning | afternoon | evening | night | hs
134134
# TODO: combine with the qpm/qhs/qday/qdaily group above (not sure if this still applies)
135135
# frequency = 1, period = 1 (or 2 if a is not null), periodUnit = b (normalize to d, wk, mo), [when = b (normalize to MORN, AFT, EVE, etc]
136136
# NOTE: moved below FrequencyDaily because "per day" was taking precedence in max daily dose text
137137
class FrequencyEveryDay(FrequencyParser):
138-
pattern = r'(?:every|each|q|per)\s?(?P<period>other\b|o\b)?\s*(?:day (?:in the|at)\s?)?(?P<period_unit>hour|day|week|month|morning|afternoon|evening at bedtime|bedtime|evening|night|hs\b|pm\b|am\b|d\b)'
138+
pattern = r'(?:every|each|q|per|\ba)\s?(?P<period>other\b|o)?\s?(?:day (?:in the|at)\s?)?(?P<period_unit>hour|day|week|wk\b|month|morning(?! and)|afternoon|evening at bedtime|bedtime|evening|night|hs|pm|am|d\b)'
139139
def normalize_match(self, match):
140140
frequency = 1
141141
period = 2 if match.group('period') else 1
@@ -151,7 +151,7 @@ def normalize_match(self, match):
151151
# Monday, Tuesday, Wednesday, and Friday
152152
# dayOfWeek = a
153153
class FrequencySpecificDayOfWeek(FrequencyParser):
154-
pattern = r'(?:every|on|q)\s+(?P<day_of_week>(?:(?:\s*(?:and|&|\+|,)\s*)*(?:' + RE_DAYS_OF_WEEK + '))+)'
154+
pattern = r'(?:every|on|q)\s?(?P<day_of_week>(?:(?:\s?(?:and|&|\+|,|\s)\s?)?(?:' + RE_DAYS_OF_WEEK + '))+)'
155155
def normalize_match(self, match):
156156
# TODO: normalize days of week to be comma or pipe delimited - tuesday and thursday -> tuesday|thursday or tuesday,thursday
157157
day_of_week = match.group('day_of_week')
@@ -166,7 +166,7 @@ def normalize_match(self, match):
166166
# morning | evening | afternoon
167167
# frequency = 1, when = a
168168
class FrequencyInTheX(FrequencyParser):
169-
pattern = r'in the\s*(morning|evening|afternoon)'
169+
pattern = r'in the\s?(morning|evening|afternoon)'
170170
def normalize_match(self, match):
171171
frequency = 1
172172
period = 1
@@ -197,7 +197,7 @@ def normalize_match(self, match):
197197
# one time only
198198
# count = 1
199199
class FrequencyOneTime(FrequencyParser):
200-
pattern = r'(?:x\s?1\b(?!day| day|d\b| d\b|week| week|w\b| w\b|month| month|mon|m\b| m\b| mon\b)|(?:1|one) time(?: only)?(?! daily| per)|for (?:1|one) dose|once$|once in imaging|at (?:the )?(?:first|1st) (?:onset:sign) of symptoms)'
200+
pattern = r'(?:x\s?1\b(?!day| day|d\b| d\b|week| week|w\b| w\b|month| month|mon|m\b| m\b| mon\b)|(?:1|one) time(?: only)?(?! daily| per)|for (?:1|one) dose|once|once in imaging|before transfusion|(?:one|1) hour prior to (?:dental )?appointment|at (?:the )?(?:first|1st) (?:onset:sign) of symptoms)'
201201
def normalize_match(self, match):
202202
count = 1
203203
frequency_text_start, frequency_text_end = match.span()
@@ -208,12 +208,11 @@ def normalize_match(self, match):
208208

209209

210210
class FrequencyAsDirected(FrequencyParser):
211-
pattern = r'as directed(?: on package)?|ad lib|as instructed|see admin instructions|see notes'
211+
pattern = r'as directed(?: on package)?|ad lib|as dir\b|as instructed|see admin instructions|follow package directions|see notes|sliding scale|per package instructions'
212212
def normalize_match(self, match):
213-
# TODO: how to capture just text?
214-
# text = 'as directed'
215213
frequency_text_start, frequency_text_end = match.span()
216-
frequency_text = match.group(0)
214+
# frequency_text = match.group(0)
215+
frequency_text = 'as directed'
217216
frequency_readable = frequency_text
218217
# TODO: normalize text numbers to integer numbers - maybe make separate normalize_period_unit function that also hits the text_to_int function?
219218
return self.generate_match({'frequency_text_start': frequency_text_start, 'frequency_text_end': frequency_text_end, 'frequency_text': frequency_text, 'frequency_readable': frequency_readable})
@@ -236,5 +235,5 @@ def normalize_match(self, match):
236235
FrequencyAtBedtime(),
237236
FrequencyOneTime(),
238237
# NOTE: removing this parser for DRX implementation - may consider adding back
239-
# FrequencyAsDirected(),
238+
FrequencyAsDirected(),
240239
]

parsers/indication.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# TODO: replace wildcard indication search with INDICATIONS list from normalize.py
66
class IndicationParser(Parser):
77
parser_type = 'indication'
8-
pattern = r'(?P<as_needed>as needed for|if needed for|as needed|if needed|prn for|prf|prn)(?:\s?(?P<indication>.{,250}))?'
8+
pattern = r'(?P<as_needed>as needed for|if needed for|as needed|to prevent|if needed|prn for|prnf|prf|prn|at (?:the )?(?:first|1st) sign of)(?:\s?(?P<indication>.{,250}))?'
99
match_keys = ['as_needed', 'indication', 'indication_text_start', 'indication_text_end', 'indication_text', 'indication_readable']
1010
def normalize_match(self, match):
1111
as_needed = 1
@@ -30,7 +30,7 @@ def get_readable(self, as_needed=None, indication=None):
3030
return readable
3131

3232
class ChronicIndicationParser(IndicationParser):
33-
pattern = r'(?!as needed|if needed|prn|prf) for (?P<indication>.{,250})(?!' + RE_RANGE + r')'
33+
pattern = r'(?!as needed |if needed |prn |prf |prnf )(?:for|indications) (?P<indication>.{,250})(?!' + RE_RANGE + r')'
3434
def normalize_match(self, match):
3535
indication_text = match.group('indication')
3636
indication = (get_indication(indication_text) if indication_text != None else indication_text)

parsers/route.py

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def normalize_pattern(self):
1111
# and join them with a | character
1212
# and add them to the route_patterns array
1313
route_patterns.append(r'|'.join(p))
14-
pattern = re.compile(r'\b(?P<route>' + r'|'.join(route_patterns) + r')\b', flags = re.I)
14+
pattern = re.compile(r'(?P<route>' + r'|'.join(route_patterns) + r')', flags = re.I)
1515
return pattern
1616
def normalize_match(self, match):
1717
route = get_normalized(ROUTES, match.group('route'))
@@ -32,7 +32,7 @@ def normalize_pattern(self):
3232
# and join them with a | character
3333
# and add them to the route_patterns array
3434
route_patterns.append(r'|'.join(p))
35-
pattern = re.compile(r'\b(?P<route>' + r'|'.join(route_patterns) + r')\b', flags = re.I)
35+
pattern = re.compile(r'(?P<route>' + r'|'.join(route_patterns) + r')', flags = re.I)
3636
return pattern
3737
def normalize_match(self, match):
3838
route = get_normalized(INHALATION_ROUTES, match.group('route'))
@@ -51,7 +51,7 @@ def normalize_pattern(self):
5151
# and join them with a | character
5252
# and add them to the route_patterns array
5353
topical_route_patterns.append(r'|'.join(p))
54-
pattern = re.compile(r'\b(?P<route>' + r'|'.join(topical_route_patterns) + r')\b', flags = re.I)
54+
pattern = re.compile(r'(?P<route>' + r'|'.join(topical_route_patterns) + r')(?!\s?pain)', flags = re.I)
5555
return pattern
5656
def normalize_match(self, match):
5757
route = get_normalized(TOPICAL_ROUTES, match.group('route'))
@@ -113,8 +113,9 @@ def parse(self, sig):
113113
self.matches = matches
114114
return matches
115115

116+
116117
class InferredOralRouteParser(RouteParser):
117-
pattern = r'\b(?P<route>(?!vaginal|sublingual)tab(?:let)?(?:s)?(?!.*(?:sublingual(?:ly)?|into|per|on the|between the|under|by sublingual route|by buccal route))|cap(?:sule)?(?:s)?|chew(?:able)?|\dpo|capful|pill)\b'
118+
pattern = r'(?P<route>(?!vaginal|sublingual)tab(?:let)?(?:s)?(?!.*(?:sublingual(?:ly)?|into|per|on the|between the|under|by sublingual route|by buccal route))|cap(?:sule)?(?:s)?|chew(?:able)?|\dpo|capful|pill)'
118119
def normalize_pattern(self):
119120
return re.compile(self.pattern, flags = re.I)
120121
def normalize_match(self, match):
@@ -124,11 +125,48 @@ def normalize_match(self, match):
124125
route_readable = self.get_readable(route=route)
125126
return self.generate_match({'route': route, 'route_text_start': route_text_start, 'route_text_end': route_text_end, 'route_text': route_text, 'route_readable': route_readable})
126127

128+
129+
# infers inhalation route for things like 'puffs' in the absence of other more specific routes
130+
class InferredInhalationRouteParser(RouteParser):
131+
pattern = r'puff(?:s)?(?! in each nostril)(?! in the nose)(?! each nostril)(?! in nostril)'
132+
def normalize_pattern(self):
133+
return re.compile(self.pattern, flags = re.I)
134+
def normalize_match(self, match):
135+
route = 'into the lungs'
136+
route_text_start, route_text_end = match.span()
137+
route_text = match[0]
138+
route_readable = self.get_readable(route=route)
139+
return self.generate_match({'route': route, 'route_text_start': route_text_start, 'route_text_end': route_text_end, 'route_text': route_text, 'route_readable': route_readable})
140+
141+
142+
class MiscellaneousRouteParser(RouteParser):
143+
def normalize_pattern(self):
144+
dose_patterns = []
145+
for n, p in MISCELLANEOUS_ROUTES.items():
146+
# add the name of the pattern to the list of matched patterns
147+
p.append(n)
148+
# and join them with a | character
149+
# and add them to the dose_patterns array
150+
dose_patterns.append(r'|'.join(p))
151+
pattern = re.compile(r'(' + r'|'.join(dose_patterns) + r')', flags = re.I)
152+
return pattern
153+
def normalize_match(self, match):
154+
route = 'miscellaneous'
155+
route_text_start, route_text_end = match.span()
156+
route_text = match[0]
157+
route_readable = self.get_readable(route=route)
158+
return self.generate_match({'route': route, 'route_text_start': route_text_start, 'route_text_end': route_text_end, 'route_text': route_text, 'route_readable': route_readable})
159+
160+
161+
# NOTE: moved InhalationRouteParser above RouteParser here so that "2 PUFFS BY MOUTH DAILY" resolved to "into the lungs" instead of "by mouth"...
162+
# however, left it in different order above for class inheritance
127163
parsers = [
128-
RouteParser(),
129164
InhalationRouteParser(),
165+
RouteParser(),
130166
TopicalRouteParser(),
131-
InferredOralRouteParser()
167+
# InferredOralRouteParser(), # turned off for VUMC - TODO: need to create customer "settings"
168+
# InferredInhalationRouteParser(), # turned off for VUMC - TODO: need to create customer "settings"
169+
MiscellaneousRouteParser(),
132170
]
133171

134172
#print(RouteParser().parse('take one by mouth daily'))

0 commit comments

Comments
 (0)