From ecd4421503d8a5e2e25cc4f00d1a7aa6be830eb6 Mon Sep 17 00:00:00 2001 From: jasmeetpahwa Date: Wed, 1 May 2019 19:48:04 +0530 Subject: [PATCH 1/2] CP-5043:- added support for complete token match in partial_ratio --- fuzzywuzzy/fuzz.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fuzzywuzzy/fuzz.py b/fuzzywuzzy/fuzz.py index 27f80c9f..519218cc 100644 --- a/fuzzywuzzy/fuzz.py +++ b/fuzzywuzzy/fuzz.py @@ -27,6 +27,18 @@ def ratio(s1, s2): m = SequenceMatcher(None, s1, s2) return utils.intr(100 * m.ratio()) +def get_token_start_index(longer, index): + for i in range(index, -1, -1): + if ' ' == longer[i]: + return (i+1) + return 0 + +def get_token_end_index(longer, index): + len_ = len(longer) + for i in range(index, len_): + if ' ' == longer[i]: + return (i-1) + return len_ - 1 @utils.check_for_none @utils.check_for_equivalence @@ -55,8 +67,10 @@ def partial_ratio(s1, s2): scores = [] for block in blocks: long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0 + long_start = get_token_start_index(longer, long_start) long_end = long_start + len(shorter) - long_substr = longer[long_start:long_end] + long_end = get_token_end_index(longer, long_end) + long_substr = longer[long_start:long_end+1] m2 = SequenceMatcher(None, shorter, long_substr) r = m2.ratio() From 969fc75a63917d86b9338f212e9ffbdbb0fa9f5a Mon Sep 17 00:00:00 2001 From: jasmeetpahwa Date: Thu, 2 May 2019 12:27:32 +0530 Subject: [PATCH 2/2] CP-5043:- test changes --- fuzzywuzzy/fuzz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzzywuzzy/fuzz.py b/fuzzywuzzy/fuzz.py index 519218cc..1f79d98f 100644 --- a/fuzzywuzzy/fuzz.py +++ b/fuzzywuzzy/fuzz.py @@ -63,7 +63,7 @@ def partial_ratio(s1, s2): # the best partial match will block align with at least one of those blocks # e.g. shorter = "abcd", longer = XXXbcdeEEE # block = (1,3,3) - # best score === ratio("abcd", "Xbcd") + # best score === ratio("abcd", "Xbcd") scores = [] for block in blocks: long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0