Skip to content

Commit 85a41e2

Browse files
authored
Merge branch 'master' into structure_additional_info
2 parents 2ef2dc5 + 196dc8d commit 85a41e2

12 files changed

+427
-51
lines changed

parsers/dose.py

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -42,24 +42,6 @@ def get_readable(self, dose=None, dose_max=None, dose_unit=None):
4242
readable = readable.strip()
4343
return readable
4444

45-
class DoseUnitOnlyParser(DoseParser):
46-
def normalize_pattern(self):
47-
dose_patterns = []
48-
for n, p in DOSE_UNITS.items():
49-
# add the name of the pattern to the list of matched patterns
50-
p.append(n)
51-
# and join them with a | character
52-
# and add them to the dose_patterns array
53-
dose_patterns.append(r'|'.join(p))
54-
pattern = re.compile(r'\b(?P<dose_unit>' + r'|'.join(dose_patterns) + r')\b', flags = re.I)
55-
return pattern
56-
def normalize_match(self, match):
57-
dose_unit = get_normalized(DOSE_UNITS, match.group('dose_unit'))
58-
dose_text_start, dose_text_end = match.span()
59-
dose_text = match[0]
60-
dose_readable = self.get_readable(dose_unit=dose_unit)
61-
return self.generate_match({'dose_unit': dose_unit, 'dose_text_start': dose_text_start, 'dose_text_end': dose_text_end, 'dose_text': dose_text, 'dose_readable': dose_readable})
62-
6345
class DoseOnlyParser(DoseParser):
6446
def normalize_pattern(self):
6547
method_patterns = []
@@ -86,10 +68,30 @@ def normalize_match(self, match):
8668
dose_readable = self.get_readable(dose=dose, dose_max=dose_max)
8769
return self.generate_match({'dose': dose, 'dose_max': dose_max, 'dose_text_start': dose_text_start, 'dose_text_end': dose_text_end, 'dose_text': dose_text, 'dose_readable': dose_readable})
8870

71+
# NOTE: moved dose unit only BELOW dose only because prefer "2" over "tablets" from "2 po qid max dose 6 tabs"
72+
class DoseUnitOnlyParser(DoseParser):
73+
def normalize_pattern(self):
74+
dose_patterns = []
75+
for n, p in DOSE_UNITS.items():
76+
# add the name of the pattern to the list of matched patterns
77+
p.append(n)
78+
# and join them with a | character
79+
# and add them to the dose_patterns array
80+
dose_patterns.append(r'|'.join(p))
81+
pattern = re.compile(r'\b(?P<dose_unit>' + r'|'.join(dose_patterns) + r')\b', flags = re.I)
82+
return pattern
83+
def normalize_match(self, match):
84+
dose_unit = get_normalized(DOSE_UNITS, match.group('dose_unit'))
85+
dose_text_start, dose_text_end = match.span()
86+
dose_text = match[0]
87+
dose_readable = self.get_readable(dose_unit=dose_unit)
88+
return self.generate_match({'dose_unit': dose_unit, 'dose_text_start': dose_text_start, 'dose_text_end': dose_text_end, 'dose_text': dose_text, 'dose_readable': dose_readable})
89+
90+
8991
parsers = [
9092
DoseParser(),
91-
DoseUnitOnlyParser(),
9293
DoseOnlyParser(),
94+
DoseUnitOnlyParser(),
9395
]
9496

9597
#print(DoseParser().parse('take one capsule prn nausea for 5 days'))

parsers/max.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
from .classes.parser import *
2+
3+
class MaxParser(Parser):
4+
parser_type = 'max'
5+
match_keys = ['max_numerator_value', 'max_numerator_unit', 'max_denominator_value', 'max_denominator_unit', 'max_text_start', 'max_text_end', 'max_text', 'max_readable']
6+
def normalize_pattern(self):
7+
dose_patterns = []
8+
for n, p in DOSE_UNITS.items():
9+
# add the name of the pattern to the list of matched patterns
10+
p.append(n)
11+
# and join them with a | character
12+
# and add them to the dose_patterns array
13+
dose_patterns.append(r'|'.join(p))
14+
strength_patterns = []
15+
for n, p in STRENGTH_UNITS.items():
16+
# add the name of the pattern to the list of matched patterns
17+
p.append(n)
18+
# and join them with a | character
19+
# and add them to the strength_patterns array
20+
strength_patterns.append(r'|'.join(p))
21+
pattern = re.compile(r'(?:max(?: dose)?|do not take more than|no more than|nmt|do not exceed)\s?(?P<dose>' + RE_RANGE + r')\s?(?P<dose_unit>' + r'|'.join(dose_patterns) + '|' + r'|'.join(strength_patterns) + r')?\s?(?:per|\/|in(?: a)?)\s?(?P<period>' + RE_RANGE + r')?\s?(?P<period_unit>day|hours|hour|hrs|hr|\bh\b|week|month|year|d\b|w\b|mon|m\b|yr)', flags = re.I)
22+
return pattern
23+
def normalize_match(self, match):
24+
dose_range = split_range(match.group('dose'))
25+
max_numerator_value = dose_range[1] or dose_range[0]
26+
# need to check for normalizaion for both dose units and strength units
27+
# will return text of match if no normalization found
28+
max_numerator_unit = get_normalized(STRENGTH_UNITS, get_normalized(DOSE_UNITS, match.group('dose_unit'))) if match.group('dose_unit') else None
29+
max_denominator_value = None
30+
if match.group('period'):
31+
period_range = split_range(match.group('period'))
32+
max_denominator_value = period_range[1] or period_range[0]
33+
max_denominator_unit = get_normalized(PERIOD_UNIT, match.group('period_unit')) if match.group('period_unit') else None
34+
# set denominator value to 1 in the example of "max 3 tablets per day" -> "max 3 tablets per 1 day"
35+
if max_denominator_unit and not max_denominator_value:
36+
max_denominator_value = 1
37+
max_text_start, max_text_end = match.span()
38+
max_text = match[0]
39+
max_readable = self.get_readable(
40+
max_numerator_value=max_numerator_value,
41+
max_numerator_unit=max_numerator_unit,
42+
max_denominator_value=max_denominator_value,
43+
max_denominator_unit=max_denominator_unit)
44+
return self.generate_match({'max_numerator_value': max_numerator_value, 'max_numerator_unit': max_numerator_unit, 'max_denominator_value': max_denominator_value, 'max_denominator_unit': max_denominator_unit, 'max_text_start': max_text_start, 'max_text_end': max_text_end, 'max_text': max_text, 'max_readable': max_readable})
45+
def get_readable(self, max_numerator_value=None, max_numerator_unit=None, max_denominator_value=None, max_denominator_unit=None):
46+
if 1 == 2:
47+
return ''
48+
49+
readable = ' - max'
50+
readable += ' ' + str(max_numerator_value) if max_numerator_value else ''
51+
plural_dose_unit = max_numerator_value and max_numerator_value > 1
52+
if max_numerator_unit:
53+
if plural_dose_unit:
54+
max_numerator_unit += 'e' if max_numerator_unit[-1:] == 'h' else ''
55+
max_numerator_unit += 's' if max_numerator_unit not in ['oz','mL','L','cm', 'mg','mcg','g','mEq'] else ''
56+
readable += ' ' + max_numerator_unit
57+
58+
readable += ' per' if max_denominator_value or max_denominator_unit else ''
59+
readable += ' ' + str(max_denominator_value) if max_denominator_value and max_denominator_value != 1 else ''
60+
plural_duration = max_denominator_value and max_denominator_value > 1
61+
if max_denominator_unit:
62+
max_denominator_unit += 's' if plural_duration else ''
63+
readable += ' ' + max_denominator_unit
64+
return readable
65+
66+
67+
class MaxDailyParser(MaxParser):
68+
def normalize_pattern(self):
69+
dose_patterns = []
70+
for n, p in DOSE_UNITS.items():
71+
# add the name of the pattern to the list of matched patterns
72+
p.append(n)
73+
# and join them with a | character
74+
# and add them to the dose_patterns array
75+
dose_patterns.append(r'|'.join(p))
76+
strength_patterns = []
77+
for n, p in STRENGTH_UNITS.items():
78+
# add the name of the pattern to the list of matched patterns
79+
p.append(n)
80+
# and join them with a | character
81+
# and add them to the strength_patterns array
82+
strength_patterns.append(r'|'.join(p))
83+
pattern = re.compile(r'(?:max(?:imum)? daily (?:dose|amount)|mdd)\s?(?:=|is)?\s?(?P<dose>' + RE_RANGE + r')\s?(?P<dose_unit>' + r'|'.join(dose_patterns) + '|' + r'|'.join(strength_patterns) + r')?', flags = re.I)
84+
return pattern
85+
def normalize_match(self, match):
86+
dose_range = split_range(match.group('dose'))
87+
max_numerator_value = dose_range[1] or dose_range[0]
88+
# need to check for normalizaion for both dose units and strength units
89+
# will return text of match if no normalization found
90+
max_numerator_unit = get_normalized(STRENGTH_UNITS, get_normalized(DOSE_UNITS, match.group('dose_unit'))) if match.group('dose_unit') else None
91+
max_denominator_value = 1
92+
max_denominator_unit = get_normalized(PERIOD_UNIT, 'day')
93+
max_text_start, max_text_end = match.span()
94+
max_text = match[0]
95+
max_readable = self.get_readable(
96+
max_numerator_value=max_numerator_value,
97+
max_numerator_unit=max_numerator_unit,
98+
max_denominator_value=max_denominator_value,
99+
max_denominator_unit=max_denominator_unit)
100+
return self.generate_match({'max_numerator_value': max_numerator_value, 'max_numerator_unit': max_numerator_unit, 'max_denominator_value': max_denominator_value, 'max_denominator_unit': max_denominator_unit, 'max_text_start': max_text_start, 'max_text_end': max_text_end, 'max_text': max_text, 'max_readable': max_readable})
101+
102+
parsers = [
103+
MaxParser(),
104+
MaxDailyParser(),
105+
]

parsers/services/normalize.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -377,10 +377,7 @@
377377
'mcg': [r'(?:microgram(?:s)?|mcgs)\b'],
378378
'g': [r'(?:gm|gms|gram(?:s)?)\b'],
379379
'international unit': [r'i\.u\.\b', r'iu\b', 'international units', r'int\'l unit', 'intl unit'],
380-
'unit': [r'units'],
381380
'mEq': [r'milliequivalent(?:s)?'],
382-
'teaspoon': [r'\btsp\b', 'teaspoons', 'teaspoonsful', 'teaspoonful', 'teaspoonfuls'],
383-
'tablespoon': [r'\btbsp\b', 'tablespoon', 'tablespoonsful', 'tablespoonful', 'tablespoonfuls'],
384381
}
385382

386383
DOSE_STRENGTH_NEGATION = [
@@ -400,6 +397,30 @@
400397
]
401398
RE_DOSE_STRENGTH_NEGATION = r'|'.join(DOSE_STRENGTH_NEGATION)
402399

400+
EXCLUDED_MDD_DOSE_UNITS = [
401+
'mg',
402+
'mcg',
403+
# 'g',
404+
# 'mL',
405+
'L',
406+
'international unit',
407+
'unit',
408+
'mEq',
409+
'teaspoon',
410+
'tablespoon',
411+
'puff',
412+
'application',
413+
'spray',
414+
'drop',
415+
'syringe',
416+
'vial',
417+
'packet',
418+
'pen',
419+
'oz',
420+
'injection',
421+
'cm',
422+
]
423+
403424
DOSE_UNITS = {
404425
# to match ahead of one-letter dose form (L)
405426
'lozenge': [r'\bloz\b'],
@@ -411,6 +432,9 @@
411432
'L': [r'(?:\bliter)'],
412433
'oz': ['ounce'],
413434
'cm': ['centimeter', r'cm\b', r'cms\b'],
435+
'unit': [r'units'],
436+
'teaspoon': [r'\btsp\b', 'teaspoons', 'teaspoonsful', 'teaspoonful', 'teaspoonfuls'],
437+
'tablespoon': [r'\btbsp\b', 'tablespoon', 'tablespoonsful', 'tablespoonful', 'tablespoonfuls'],
414438
# tablet
415439
# TODO: add all synonyms to exclusion for tablet
416440
# ERROR: make sure "tablespoon" does not match on "tab" -- use a negative lookahead

parsers/sig.py

Lines changed: 76 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from .classes.parser import *
2-
from . import method, dose, strength, route, frequency, when, duration, indication, additional_info
2+
from . import method, dose, strength, route, frequency, when, duration, indication, max, additional_info
33
import csv
44

55
# TODO: need to move all this to the main app and re-purpose the sig.py parser
@@ -19,11 +19,12 @@ class SigParser(Parser):
1919
'when': when.parsers,
2020
'duration': duration.parsers,
2121
'indication': indication.parsers,
22+
'max': max.parsers,
2223
'additional_info': additional_info.parsers,
2324
}
2425
# TODO: make this match_keys assignment more elegant
25-
#match_keys = ['original_sig_text'] + ['sig_text', 'sig_readable'] + method.parsers[0].match_keys + dose.parsers[0].match_keys + strength.parsers[0].match_keys + route.parsers[0].match_keys + frequency.parsers[0].match_keys + when.parsers[0].match_keys + duration.parsers[0].match_keys + indication.parsers[0].match_keys + additional_info.parsers[0].match_keys
26-
match_keys = ['sig_text', 'sig_readable'] + method.parsers[0].match_keys + dose.parsers[0].match_keys + strength.parsers[0].match_keys + route.parsers[0].match_keys + frequency.parsers[0].match_keys + when.parsers[0].match_keys + duration.parsers[0].match_keys + indication.parsers[0].match_keys + additional_info.parsers[0].match_keys
26+
#match_keys = ['original_sig_text'] + ['sig_text', 'sig_readable'] + method.parsers[0].match_keys + dose.parsers[0].match_keys + strength.parsers[0].match_keys + route.parsers[0].match_keys + frequency.parsers[0].match_keys + when.parsers[0].match_keys + duration.parsers[0].match_keys + indication.parsers[0].match_keys + max.parsers[0].match_keys + additional_info.parsers[0].match_keys
27+
match_keys = ['sig_text', 'sig_readable'] + method.parsers[0].match_keys + dose.parsers[0].match_keys + strength.parsers[0].match_keys + route.parsers[0].match_keys + frequency.parsers[0].match_keys + when.parsers[0].match_keys + duration.parsers[0].match_keys + indication.parsers[0].match_keys + max.parsers[0].match_keys + additional_info.parsers[0].match_keys
2728
parser_type = 'sig'
2829

2930
def get_normalized_sig_text(self, sig_text):
@@ -38,27 +39,85 @@ def get_normalized_sig_text(self, sig_text):
3839
sig_text = ' '.join(sig_text.split())
3940
return sig_text
4041

41-
def get_readable(self, method=None, dose=None, strength=None, route=None, frequency=None, when=None, duration=None, indication=None, additional_info=None):
42-
method = method if method else ''
43-
dose = dose if dose else ''
44-
strength = strength if strength else ''
45-
route = route if route else ''
46-
frequency = frequency if frequency else ''
47-
when = when if when else ''
48-
duration = duration if duration else ''
49-
indication = indication if indication else ''
50-
additional_info = additional_info if additional_info else ''
42+
def get_readable(self, match_dict):
43+
method = match_dict['method_readable'] if match_dict['method_readable'] else ''
44+
dose = match_dict['dose_readable'] if match_dict['dose_readable'] else ''
45+
strength = match_dict['strength_readable'] if match_dict['strength_readable'] else ''
46+
route = match_dict['route_readable'] if match_dict['route_readable'] else ''
47+
frequency = match_dict['frequency_readable'] if match_dict['frequency_readable'] else ''
48+
when = match_dict['when_readable'] if match_dict['when_readable'] else ''
49+
duration = match_dict['duration_readable'] if match_dict['duration_readable'] else ''
50+
indication = match_dict['indication_readable'] if match_dict['indication_readable'] else ''
51+
max = match_dict['max_readable'] if match_dict['max_readable'] else ''
52+
additional_info = match_dict['additional_info_readable'] if match_dict['additional_info_readable'] else ''
5153

5254
if dose != '' and strength != '':
5355
strength = '(' + strength + ')'
54-
sig_elements = [method, dose, strength, route, frequency, when, duration, indication, additional_info]
56+
sig_elements = [method, dose, strength, route, frequency, when, duration, indication, max, additional_info]
5557
# join sig elements with spaces
5658
readable = ' '.join(sig_elements)
5759
# remove duplicate spaces, and in doing so, also trim whitespaces from around sig
5860
# this accounts for empty sig elements
5961
readable = ' '.join(readable.split())
6062
return readable
6163

64+
def get_period_per_day(self, period, period_unit):
65+
if not period:
66+
return None
67+
68+
if period_unit == 'hour':
69+
return 24 / period
70+
elif period_unit == 'day':
71+
return 1 / period
72+
elif period_unit == 'week':
73+
return 1 / (7 * period)
74+
elif period_unit == 'month':
75+
return 1 / (30 * period)
76+
else:
77+
return None
78+
79+
def get_max_dose_per_day(self, match_dict):
80+
# calculate max per day from sig instructions
81+
frequency = match_dict['frequency_max'] or match_dict['frequency']
82+
period = match_dict['period']
83+
period_unit = get_normalized(PERIOD_UNIT, match_dict['period_unit']) if match_dict['period_unit'] else match_dict['period_unit']
84+
# period_per_day can be null if period_unit doesn't match hour / day / week / month
85+
period_per_day = self.get_period_per_day(period, period_unit)
86+
87+
dose = match_dict['dose_max'] or match_dict['dose']
88+
dose_unit = match_dict['dose_unit']
89+
90+
max_dose_per_day_sig = None
91+
if frequency and period_per_day and dose:
92+
max_dose_per_day_sig = frequency * period_per_day * dose
93+
94+
# calculate max per day from max dose (i.e. "max daily dose = 3" or "no more than 2 per week")
95+
frequency_max = 1
96+
period_max = match_dict['max_denominator_value']
97+
period_unit_max = match_dict['max_denominator_unit']
98+
# can be null if period_unit doesn't match
99+
period_per_day_max = self.get_period_per_day(period_max, period_unit_max)
100+
101+
dose_max = match_dict['max_numerator_value']
102+
dose_unit_max = match_dict['max_numerator_unit']
103+
104+
max_dose_per_day_max = None
105+
if frequency_max and period_per_day_max and dose_max:
106+
max_dose_per_day_max = frequency_max * period_per_day_max * dose_max
107+
108+
max_dose_per_day = None
109+
# if we are dealing with a complex dose unit, don't return a max_dose_per_day
110+
if dose_unit in EXCLUDED_MDD_DOSE_UNITS or dose_unit_max in EXCLUDED_MDD_DOSE_UNITS:
111+
return max_dose_per_day
112+
# if (at least one max dose is not null) and (the dose units match or one of the dose units is null)
113+
if (max_dose_per_day_sig or max_dose_per_day_max) and (dose_unit == dose_unit_max or not dose_unit or not dose_unit_max):
114+
# originally wrote this to choose the lowest dose per day
115+
# max_dose_per_day = min(d for d in [max_dose_per_day_sig, max_dose_per_day_max] if d is not None)
116+
# however, requirements changed to always prefer max over sig
117+
max_dose_per_day = max_dose_per_day_max or max_dose_per_day_sig
118+
119+
return max_dose_per_day
120+
62121
def parse(self, sig_text):
63122
match_dict = dict(self.match_dict)
64123
#match_dict['original_sig_text'] = sig_text
@@ -82,17 +141,9 @@ def parse(self, sig_text):
82141
for k, v in match.items():
83142
match_dict[k] = v
84143
#elif len(matches) == 0:
85-
match_dict['sig_readable'] = self.get_readable(
86-
method=match_dict['method_readable'],
87-
dose=match_dict['dose_readable'],
88-
strength=match_dict['strength_readable'],
89-
route=match_dict['route_readable'],
90-
frequency=match_dict['frequency_readable'],
91-
when=match_dict['when_readable'],
92-
duration=match_dict['duration_readable'],
93-
indication=match_dict['indication_readable'],
94-
additional_info=match_dict['additional_info_readable'],
95-
)
144+
match_dict['sig_readable'] = self.get_readable(match_dict)
145+
match_dict ['max_dose_per_day'] = self.get_max_dose_per_day(match_dict)
146+
96147
# calculate admin instructions based on leftover pieces of sig
97148
# would need to calculate overlap in each of the match_dicts
98149
# in doing so, maybe also return a map of the parsed parts of the sig for use in frontend highlighting
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Generated by Django 3.2.9 on 2022-07-18 16:26
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('sig', '0010_auto_20220707_0254'),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name='sigparsed',
15+
name='max_dose_denominator_unit',
16+
field=models.CharField(max_length=100, null=True),
17+
),
18+
migrations.AddField(
19+
model_name='sigparsed',
20+
name='max_dose_denominator_value',
21+
field=models.CharField(max_length=100, null=True),
22+
),
23+
migrations.AddField(
24+
model_name='sigparsed',
25+
name='max_dose_numerator_unit',
26+
field=models.CharField(max_length=100, null=True),
27+
),
28+
migrations.AddField(
29+
model_name='sigparsed',
30+
name='max_dose_numerator_value',
31+
field=models.CharField(max_length=100, null=True),
32+
),
33+
]

0 commit comments

Comments
 (0)