From eab59d2f7e02b31be0bf76981a45d6f2e5ba362b Mon Sep 17 00:00:00 2001 From: ligi Date: Thu, 23 Nov 2017 23:33:22 +0800 Subject: [PATCH] fix RatcliffObershelpSimilarity at ("abab","ababab") case --- FuzzyString/RatcliffObershelpSimilarity.cs | 53 +++++++++++++++++++++- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/FuzzyString/RatcliffObershelpSimilarity.cs b/FuzzyString/RatcliffObershelpSimilarity.cs index 386340c..2453c4d 100644 --- a/FuzzyString/RatcliffObershelpSimilarity.cs +++ b/FuzzyString/RatcliffObershelpSimilarity.cs @@ -10,7 +10,56 @@ public static partial class ComparisonMetrics { public static double RatcliffObershelpSimilarity(this string source, string target) { - return (2 * Convert.ToDouble(source.Intersect(target).Count())) / (Convert.ToDouble(source.Length + target.Length)); + var matchs = GetMatchQueue(source, target); + return 2.0 * matchs.Select(match => match.Length).Sum() / (source.Length + target.Length); + } + + private static List GetMatchQueue(string source, string target) + { + List list = new List(); + var match = FrontMaxMatch(source, target); + if (match.Length > 0) + { + var frontSource = source.Substring(0, source.IndexOf(match)); + var frontTarget = target.Substring(0, target.IndexOf(match)); + var frontQueue = GetMatchQueue(frontSource, frontTarget); + + var endSource = source.Substring(source.IndexOf(match) + match.Length); + var endTarget = target.Substring(target.IndexOf(match) + match.Length); + var endQueue = GetMatchQueue(endSource, endTarget); + + list.Add(match); + list.AddRange(frontQueue); + list.AddRange(endQueue); + return list; + } + else + { + return list; + } + } + + private static string FrontMaxMatch(string a, string b) + { + var index = 0; + var length = 0; + + for (int i = 0; i < a.Length; i++) + { + Enumerable.Range(1, a.Length - i) + .ToList() + .ForEach + (len => + { + if (len > length && b.Contains(a.Substring(i, len))) + { + index = i; + length = len; + } + }); + } + + return a.Substring(index, length); } } -} +} \ No newline at end of file