Skip to content

Commit 7f0e953

Browse files
committed
Phase 3 work
1 parent a1463a4 commit 7f0e953

File tree

7 files changed

+144
-84
lines changed

7 files changed

+144
-84
lines changed

parsers/dose.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def normalize_pattern(self):
1111
# and join them with a | character
1212
# and add them to the dose_patterns array
1313
dose_patterns.append(r'|'.join(p))
14-
pattern = re.compile(r'(?:(?P<dose_negation>' + RE_DOSE_STRENGTH_NEGATION + r')\s?)?(?P<dose>' + RE_RANGE + r')\s?(?P<dose_unit>' + r'|'.join(dose_patterns) + r')', flags = re.I)
14+
pattern = re.compile(r'(?:(?P<dose_negation>' + RE_DOSE_STRENGTH_NEGATION + r')\s?)?(?P<dose>' + RE_RANGE + r')\s?(?P<dose_unit>' + r'|'.join(dose_patterns) + r')(?!\s?\/\s?act)', flags = re.I)
1515
return pattern
1616
def normalize_match(self, match):
1717
# alternatively, if negation text is found before the dose, don't generate a match
@@ -111,15 +111,22 @@ def normalize_match(self, match):
111111

112112
class EachDoseUnitParser(DoseParser):
113113
def normalize_pattern(self):
114-
dose_patterns = r'|'.join(ROUTES['miscellaneous'])
115-
pattern = re.compile(r'(' + dose_patterns + r')', flags = re.I)
114+
dose_patterns = []
115+
for n, p in MISCELLANEOUS_ROUTES.items():
116+
# add the name of the pattern to the list of matched patterns
117+
p.append(n)
118+
# and join them with a | character
119+
# and add them to the dose_patterns array
120+
dose_patterns.append(r'|'.join(p))
121+
pattern = re.compile(r'(' + r'|'.join(dose_patterns) + r')', flags = re.I)
116122
return pattern
117123
def normalize_match(self, match):
124+
dose = 1
118125
dose_unit = 'each'
119126
dose_text_start, dose_text_end = match.span()
120127
dose_text = match[0]
121128
dose_readable = self.get_readable(dose_unit=dose_unit)
122-
return self.generate_match({'dose_unit': dose_unit, 'dose_text_start': dose_text_start, 'dose_text_end': dose_text_end, 'dose_text': dose_text, 'dose_readable': dose_readable})
129+
return self.generate_match({'dose': dose, 'dose_unit': dose_unit, 'dose_text_start': dose_text_start, 'dose_text_end': dose_text_end, 'dose_text': dose_text, 'dose_readable': dose_readable})
123130

124131

125132
parsers = [

parsers/frequency.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def normalize_match(self, match):
7272
# frequency = a (1 if once, 2 if twice), period = 1, periodUnit = b (normalize to d, wk, mo, yr)
7373
# NOTE: 'daily' won't match this pattern because it requires specific times *per* day
7474
class FrequencyXTimesPerDay(FrequencyParser):
75-
pattern = r'(?P<frequency>' + RE_RANGE + r'\s*(?:time(?:s)?|x|nights|days)|once|twice)\s*(?:per|a|each|every|\/)\s*(?P<period_unit>day|week|month|year|d\b|w\b|mon|m\b|yr)'
75+
pattern = r'(?P<frequency>' + RE_RANGE + r'\s?(?:time(?:s)?|x|nights|days)|once|twice)\s?(?:per|a|each|every|\/)\s?(?P<period_unit>day|week|wk\b|month|year|d\b|w\b|mon|m\b|yr)'
7676
def normalize_match(self, match):
7777
frequency = frequency_max = match.group('frequency')
7878
if (frequency):
@@ -95,7 +95,7 @@ def normalize_match(self, match):
9595
# frequency = a[0], frequencyMax = a[1], period = 1, periodUnit = b (normalize to d, wk, mo, yr)
9696
# frequency = a (1 if once, 2 if twice, 1 if null), period = 1, periodUnit = b (normalize to d, wk, mo, yr)
9797
class FrequencyXTimesDaily(FrequencyParser):
98-
pattern = r'(?:(?P<frequency>' + RE_RANGE + r'\s?(?:time(?:s)?|x)|once|twice)(?: \ba\b| per)?\s?)(?P<period_unit>day|\bd\b|daily|dialy|weekly|monthly|yearly|\bhs\b)'
98+
pattern = r'(?:(?P<frequency>' + RE_RANGE + r'\s?(?:time(?:s)?|x)|once|twice)(?: \ba\b| per)?\s?)(?P<period_unit>day|d\b|daily|dialy|weekly|monthly|yearly|\bhs\b)'
9999
def normalize_match(self, match):
100100
frequency = frequency_max = match.group('frequency')
101101
if (frequency):
@@ -135,7 +135,7 @@ def normalize_match(self, match):
135135
# frequency = 1, period = 1 (or 2 if a is not null), periodUnit = b (normalize to d, wk, mo), [when = b (normalize to MORN, AFT, EVE, etc]
136136
# NOTE: moved below FrequencyDaily because "per day" was taking precedence in max daily dose text
137137
class FrequencyEveryDay(FrequencyParser):
138-
pattern = r'(?:every|each|q|per|\ba)\s?(?P<period>other\b|o\b)?\s*(?:day (?:in the|at)\s?)?(?P<period_unit>hour|day|week|month|morning|afternoon|evening at bedtime|bedtime|evening|night|hs|pm|am|d\b)'
138+
pattern = r'(?:every|each|q|per|\ba)\s?(?P<period>other\b|o)?\s?(?:day (?:in the|at)\s?)?(?P<period_unit>hour|day|week|wk\b|month|morning(?! and)|afternoon|evening at bedtime|bedtime|evening|night|hs|pm|am|d\b)'
139139
def normalize_match(self, match):
140140
frequency = 1
141141
period = 2 if match.group('period') else 1
@@ -166,7 +166,7 @@ def normalize_match(self, match):
166166
# morning | evening | afternoon
167167
# frequency = 1, when = a
168168
class FrequencyInTheX(FrequencyParser):
169-
pattern = r'in the\s*(morning|evening|afternoon)'
169+
pattern = r'in the\s?(morning|evening|afternoon)'
170170
def normalize_match(self, match):
171171
frequency = 1
172172
period = 1
@@ -197,7 +197,7 @@ def normalize_match(self, match):
197197
# one time only
198198
# count = 1
199199
class FrequencyOneTime(FrequencyParser):
200-
pattern = r'(?:x\s?1\b(?!day| day|d\b| d\b|week| week|w\b| w\b|month| month|mon|m\b| m\b| mon\b)|(?:1|one) time(?: only)?(?! daily| per)|for (?:1|one) dose|once|once in imaging|at (?:the )?(?:first|1st) (?:onset:sign) of symptoms)'
200+
pattern = r'(?:x\s?1\b(?!day| day|d\b| d\b|week| week|w\b| w\b|month| month|mon|m\b| m\b| mon\b)|(?:1|one) time(?: only)?(?! daily| per)|for (?:1|one) dose|once|once in imaging|before transfusion|(?:one|1) hour prior to (?:dental )?appointment|at (?:the )?(?:first|1st) (?:onset:sign) of symptoms)'
201201
def normalize_match(self, match):
202202
count = 1
203203
frequency_text_start, frequency_text_end = match.span()
@@ -208,12 +208,11 @@ def normalize_match(self, match):
208208

209209

210210
class FrequencyAsDirected(FrequencyParser):
211-
pattern = r'as directed(?: on package)?|ad lib|as instructed|see admin instructions|see notes'
211+
pattern = r'as directed(?: on package)?|ad lib|as dir\b|as instructed|see admin instructions|follow package directions|see notes|sliding scale|per package instructions'
212212
def normalize_match(self, match):
213-
# TODO: how to capture just text?
214-
# text = 'as directed'
215213
frequency_text_start, frequency_text_end = match.span()
216-
frequency_text = match.group(0)
214+
# frequency_text = match.group(0)
215+
frequency_text = 'as directed'
217216
frequency_readable = frequency_text
218217
# TODO: normalize text numbers to integer numbers - maybe make separate normalize_period_unit function that also hits the text_to_int function?
219218
return self.generate_match({'frequency_text_start': frequency_text_start, 'frequency_text_end': frequency_text_end, 'frequency_text': frequency_text, 'frequency_readable': frequency_readable})

parsers/indication.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# TODO: replace wildcard indication search with INDICATIONS list from normalize.py
66
class IndicationParser(Parser):
77
parser_type = 'indication'
8-
pattern = r'(?P<as_needed>as needed for|if needed for|as needed|to prevent|if needed|prn for|prnf|prf|prn)(?:\s?(?P<indication>.{,250}))?'
8+
pattern = r'(?P<as_needed>as needed for|if needed for|as needed|to prevent|if needed|prn for|prnf|prf|prn|at (?:the )?(?:first|1st) sign of)(?:\s?(?P<indication>.{,250}))?'
99
match_keys = ['as_needed', 'indication', 'indication_text_start', 'indication_text_end', 'indication_text', 'indication_readable']
1010
def normalize_match(self, match):
1111
as_needed = 1

parsers/route.py

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def normalize_pattern(self):
1111
# and join them with a | character
1212
# and add them to the route_patterns array
1313
route_patterns.append(r'|'.join(p))
14-
pattern = re.compile(r'\b(?P<route>' + r'|'.join(route_patterns) + r')\b', flags = re.I)
14+
pattern = re.compile(r'(?P<route>' + r'|'.join(route_patterns) + r')', flags = re.I)
1515
return pattern
1616
def normalize_match(self, match):
1717
route = get_normalized(ROUTES, match.group('route'))
@@ -32,7 +32,7 @@ def normalize_pattern(self):
3232
# and join them with a | character
3333
# and add them to the route_patterns array
3434
route_patterns.append(r'|'.join(p))
35-
pattern = re.compile(r'\b(?P<route>' + r'|'.join(route_patterns) + r')\b', flags = re.I)
35+
pattern = re.compile(r'(?P<route>' + r'|'.join(route_patterns) + r')', flags = re.I)
3636
return pattern
3737
def normalize_match(self, match):
3838
route = get_normalized(INHALATION_ROUTES, match.group('route'))
@@ -51,7 +51,7 @@ def normalize_pattern(self):
5151
# and join them with a | character
5252
# and add them to the route_patterns array
5353
topical_route_patterns.append(r'|'.join(p))
54-
pattern = re.compile(r'\b(?P<route>' + r'|'.join(topical_route_patterns) + r')(?!\s?pain)\b', flags = re.I)
54+
pattern = re.compile(r'(?P<route>' + r'|'.join(topical_route_patterns) + r')(?!\s?pain)', flags = re.I)
5555
return pattern
5656
def normalize_match(self, match):
5757
route = get_normalized(TOPICAL_ROUTES, match.group('route'))
@@ -113,8 +113,9 @@ def parse(self, sig):
113113
self.matches = matches
114114
return matches
115115

116+
116117
class InferredOralRouteParser(RouteParser):
117-
pattern = r'\b(?P<route>(?!vaginal|sublingual)tab(?:let)?(?:s)?(?!.*(?:sublingual(?:ly)?|into|per|on the|between the|under|by sublingual route|by buccal route))|cap(?:sule)?(?:s)?|chew(?:able)?|\dpo|capful|pill)\b'
118+
pattern = r'(?P<route>(?!vaginal|sublingual)tab(?:let)?(?:s)?(?!.*(?:sublingual(?:ly)?|into|per|on the|between the|under|by sublingual route|by buccal route))|cap(?:sule)?(?:s)?|chew(?:able)?|\dpo|capful|pill)'
118119
def normalize_pattern(self):
119120
return re.compile(self.pattern, flags = re.I)
120121
def normalize_match(self, match):
@@ -124,13 +125,48 @@ def normalize_match(self, match):
124125
route_readable = self.get_readable(route=route)
125126
return self.generate_match({'route': route, 'route_text_start': route_text_start, 'route_text_end': route_text_end, 'route_text': route_text, 'route_readable': route_readable})
126127

128+
129+
# infers inhalation route for things like 'puffs' in the absence of other more specific routes
130+
class InferredInhalationRouteParser(RouteParser):
131+
pattern = r'puff(?:s)?(?! in each nostril)(?! in the nose)(?! each nostril)(?! in nostril)'
132+
def normalize_pattern(self):
133+
return re.compile(self.pattern, flags = re.I)
134+
def normalize_match(self, match):
135+
route = 'into the lungs'
136+
route_text_start, route_text_end = match.span()
137+
route_text = match[0]
138+
route_readable = self.get_readable(route=route)
139+
return self.generate_match({'route': route, 'route_text_start': route_text_start, 'route_text_end': route_text_end, 'route_text': route_text, 'route_readable': route_readable})
140+
141+
142+
class MiscellaneousRouteParser(RouteParser):
143+
def normalize_pattern(self):
144+
dose_patterns = []
145+
for n, p in MISCELLANEOUS_ROUTES.items():
146+
# add the name of the pattern to the list of matched patterns
147+
p.append(n)
148+
# and join them with a | character
149+
# and add them to the dose_patterns array
150+
dose_patterns.append(r'|'.join(p))
151+
pattern = re.compile(r'(' + r'|'.join(dose_patterns) + r')', flags = re.I)
152+
return pattern
153+
def normalize_match(self, match):
154+
route = 'miscellaneous'
155+
route_text_start, route_text_end = match.span()
156+
route_text = match[0]
157+
route_readable = self.get_readable(route=route)
158+
return self.generate_match({'route': route, 'route_text_start': route_text_start, 'route_text_end': route_text_end, 'route_text': route_text, 'route_readable': route_readable})
159+
160+
127161
# NOTE: moved InhalationRouteParser above RouteParser here so that "2 PUFFS BY MOUTH DAILY" resolved to "into the lungs" instead of "by mouth"...
128162
# however, left it in different order above for class inheritance
129163
parsers = [
130-
# InhalationRouteParser(), # turned off for VUMC - TODO: need to create customer "settings"
164+
InhalationRouteParser(),
131165
RouteParser(),
132166
TopicalRouteParser(),
133-
# InferredOralRouteParser()
167+
# InferredOralRouteParser(), # turned off for VUMC - TODO: need to create customer "settings"
168+
# InferredInhalationRouteParser(), # turned off for VUMC - TODO: need to create customer "settings"
169+
MiscellaneousRouteParser(),
134170
]
135171

136172
#print(RouteParser().parse('take one by mouth daily'))

0 commit comments

Comments
 (0)