From 8c68d1cd65f139b144fcc3b66b5a0ed665a25a52 Mon Sep 17 00:00:00 2001 From: Sergey Svirsky Date: Wed, 18 Apr 2018 17:38:17 +0300 Subject: [PATCH 01/13] if else turned into switch --- FuzzyString/ApproximatelyEquals.cs | 59 ++++++------------------------ 1 file changed, 12 insertions(+), 47 deletions(-) diff --git a/FuzzyString/ApproximatelyEquals.cs b/FuzzyString/ApproximatelyEquals.cs index 5565afd..96cdc75 100644 --- a/FuzzyString/ApproximatelyEquals.cs +++ b/FuzzyString/ApproximatelyEquals.cs @@ -7,7 +7,7 @@ namespace FuzzyString { public static partial class ComparisonMetrics - { + { public static bool ApproximatelyEquals(this string source, string target, FuzzyStringComparisonTolerance tolerance, params FuzzyStringComparisonOptions[] options) { List comparisonResults = new List(); @@ -89,53 +89,18 @@ public static bool ApproximatelyEquals(this string source, string target, Fuzzy return false; } - if (tolerance == FuzzyStringComparisonTolerance.Strong) - { - if (comparisonResults.Average() < 0.25) - { - return true; - } - else - { + switch (tolerance) + { + case FuzzyStringComparisonTolerance.Strong: + return comparisonResults.Average() < 0.25; + case FuzzyStringComparisonTolerance.Normal: + return comparisonResults.Average() < 0.5; + case FuzzyStringComparisonTolerance.Weak: + return comparisonResults.Average() < 0.75; + case FuzzyStringComparisonTolerance.Manual: + return comparisonResults.Average() > 0.6; + default: return false; - } - } - else if (tolerance == FuzzyStringComparisonTolerance.Normal) - { - if (comparisonResults.Average() < 0.5) - { - return true; - } - else - { - return false; - } - } - else if (tolerance == FuzzyStringComparisonTolerance.Weak) - { - if (comparisonResults.Average() < 0.75) - { - return true; - } - else - { - return false; - } - } - else if (tolerance == FuzzyStringComparisonTolerance.Manual) - { - if (comparisonResults.Average() > 0.6) - { - return true; - } - else - { - return false; - } - } - else - { - return false; } } } From 520b93b4259bccf54f2945693b40df602201d2d0 Mon Sep 17 00:00:00 2001 From: Sergey Svirsky Date: Wed, 18 Apr 2018 20:38:43 +0300 Subject: [PATCH 02/13] Code style changes --- FuzzyString/ApproximatelyEquals.cs | 18 ++++++++++++------ FuzzyString/HammingDistance.cs | 15 +++++++-------- FuzzyString/JaccardDistance.cs | 4 ++-- FuzzyString/JaroDistance.cs | 20 +++++++++----------- 4 files changed, 30 insertions(+), 27 deletions(-) diff --git a/FuzzyString/ApproximatelyEquals.cs b/FuzzyString/ApproximatelyEquals.cs index 96cdc75..e3f76bb 100644 --- a/FuzzyString/ApproximatelyEquals.cs +++ b/FuzzyString/ApproximatelyEquals.cs @@ -8,7 +8,8 @@ namespace FuzzyString { public static partial class ComparisonMetrics { - public static bool ApproximatelyEquals(this string source, string target, FuzzyStringComparisonTolerance tolerance, params FuzzyStringComparisonOptions[] options) + public static bool ApproximatelyEquals(this string source, string target, FuzzyStringComparisonTolerance tolerance, + params FuzzyStringComparisonOptions[] options) { List comparisonResults = new List(); @@ -49,21 +50,26 @@ public static bool ApproximatelyEquals(this string source, string target, Fuzzy // Min: LevenshteinDistanceLowerBounds Max: LevenshteinDistanceUpperBounds if (options.Contains(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance)) { - comparisonResults.Add(Convert.ToDouble(source.NormalizedLevenshteinDistance(target)) / Convert.ToDouble((Math.Max(source.Length, target.Length) - source.LevenshteinDistanceLowerBounds(target)))); + comparisonResults.Add(Convert.ToDouble(source.NormalizedLevenshteinDistance(target)) / + Convert.ToDouble(Math.Max(source.Length, target.Length) - + source.LevenshteinDistanceLowerBounds(target))); } else if (options.Contains(FuzzyStringComparisonOptions.UseLevenshteinDistance)) { - comparisonResults.Add(Convert.ToDouble(source.LevenshteinDistance(target)) / Convert.ToDouble(source.LevenshteinDistanceUpperBounds(target))); + comparisonResults.Add(Convert.ToDouble(source.LevenshteinDistance(target)) / + Convert.ToDouble(source.LevenshteinDistanceUpperBounds(target))); } if (options.Contains(FuzzyStringComparisonOptions.UseLongestCommonSubsequence)) { - comparisonResults.Add(1 - Convert.ToDouble((source.LongestCommonSubsequence(target).Length) / Convert.ToDouble(Math.Min(source.Length, target.Length)))); + comparisonResults.Add(1 - Convert.ToDouble(source.LongestCommonSubsequence(target).Length / + Convert.ToDouble(Math.Min(source.Length, target.Length)))); } if (options.Contains(FuzzyStringComparisonOptions.UseLongestCommonSubstring)) { - comparisonResults.Add(1 - Convert.ToDouble((source.LongestCommonSubstring(target).Length) / Convert.ToDouble(Math.Min(source.Length, target.Length)))); + comparisonResults.Add(1 - Convert.ToDouble(source.LongestCommonSubstring(target).Length / + Convert.ToDouble(Math.Min(source.Length, target.Length)))); } // Min: 0 Max: 1 @@ -104,4 +110,4 @@ public static bool ApproximatelyEquals(this string source, string target, Fuzzy } } } -} +} \ No newline at end of file diff --git a/FuzzyString/HammingDistance.cs b/FuzzyString/HammingDistance.cs index 277164d..f24b3ad 100644 --- a/FuzzyString/HammingDistance.cs +++ b/FuzzyString/HammingDistance.cs @@ -12,18 +12,17 @@ public static int HammingDistance(this string source, string target) { int distance = 0; - if (source.Length == target.Length) + if (source.Length != target.Length) return 99999; + + for (int i = 0; i < source.Length; i++) { - for (int i = 0; i < source.Length; i++) + if (!source[i].Equals(target[i])) { - if (!source[i].Equals(target[i])) - { - distance++; - } + distance++; } - return distance; } - else { return 99999; } + + return distance; } } } diff --git a/FuzzyString/JaccardDistance.cs b/FuzzyString/JaccardDistance.cs index 7221e89..ec4ecda 100644 --- a/FuzzyString/JaccardDistance.cs +++ b/FuzzyString/JaccardDistance.cs @@ -15,7 +15,7 @@ public static double JaccardDistance(this string source, string target) public static double JaccardIndex(this string source, string target) { - return (Convert.ToDouble(source.Intersect(target).Count())) / (Convert.ToDouble(source.Union(target).Count())); + return Convert.ToDouble(source.Intersect(target).Count()) / Convert.ToDouble(source.Union(target).Count()); } } -} +} \ No newline at end of file diff --git a/FuzzyString/JaroDistance.cs b/FuzzyString/JaroDistance.cs index 10002f6..d60c613 100644 --- a/FuzzyString/JaroDistance.cs +++ b/FuzzyString/JaroDistance.cs @@ -13,17 +13,15 @@ public static double JaroDistance(this string source, string target) int m = source.Intersect(target).Count(); if (m == 0) { return 0; } - else - { - string sourceTargetIntersetAsString = ""; - string targetSourceIntersetAsString = ""; - IEnumerable sourceIntersectTarget = source.Intersect(target); - IEnumerable targetIntersectSource = target.Intersect(source); - foreach (char character in sourceIntersectTarget) { sourceTargetIntersetAsString += character; } - foreach (char character in targetIntersectSource) { targetSourceIntersetAsString += character; } - double t = sourceTargetIntersetAsString.LevenshteinDistance(targetSourceIntersetAsString) / 2; - return ((m / source.Length) + (m / target.Length) + ((m - t) / m)) / 3; - } + + string sourceTargetIntersetAsString = ""; + string targetSourceIntersetAsString = ""; + IEnumerable sourceIntersectTarget = source.Intersect(target); + IEnumerable targetIntersectSource = target.Intersect(source); + foreach (char character in sourceIntersectTarget) { sourceTargetIntersetAsString += character; } + foreach (char character in targetIntersectSource) { targetSourceIntersetAsString += character; } + double t = sourceTargetIntersetAsString.LevenshteinDistance(targetSourceIntersetAsString) / 2; + return ((m / source.Length) + (m / target.Length) + ((m - t) / m)) / 3; } } } From b48fe35f49fbeca538f248ded833d9ffd9ffa4da Mon Sep 17 00:00:00 2001 From: Sergey Svirsky Date: Wed, 18 Apr 2018 22:07:52 +0300 Subject: [PATCH 03/13] use span From 341a725bc9f9168fd0cc9b4e310f9de724880ce6 Mon Sep 17 00:00:00 2001 From: Sergey Svirsky Date: Thu, 19 Apr 2018 10:20:35 +0300 Subject: [PATCH 04/13] Dropped unused namespaces --- FuzzyString/ApproximatelyEquals.cs | 2 - FuzzyString/FuzzyStringComparisonOptions.cs | 4 - FuzzyString/FuzzyStringComparisonTolerance.cs | 8 +- FuzzyString/HammingDistance.cs | 8 +- FuzzyString/JaccardDistance.cs | 3 - FuzzyString/JaroDistance.cs | 6 +- FuzzyString/JaroWinklerDistance.cs | 4 - FuzzyString/LevenshteinDistance.cs | 4 - FuzzyString/LongestCommonSubsequence.cs | 4 - FuzzyString/LongestCommonSubstring.cs | 3 - FuzzyString/Operations.cs | 85 ------------------- FuzzyString/OverlapCoefficient.cs | 3 - FuzzyString/RatcliffObershelpSimilarity.cs | 3 - FuzzyString/SorensenDiceDistance.cs | 3 - FuzzyString/TanimotoCoefficient.cs | 6 +- 15 files changed, 4 insertions(+), 142 deletions(-) delete mode 100644 FuzzyString/Operations.cs diff --git a/FuzzyString/ApproximatelyEquals.cs b/FuzzyString/ApproximatelyEquals.cs index e3f76bb..ecbc683 100644 --- a/FuzzyString/ApproximatelyEquals.cs +++ b/FuzzyString/ApproximatelyEquals.cs @@ -1,8 +1,6 @@ using System; using System.Collections.Generic; using System.Linq; -using System.Text; -using System.Threading.Tasks; namespace FuzzyString { diff --git a/FuzzyString/FuzzyStringComparisonOptions.cs b/FuzzyString/FuzzyStringComparisonOptions.cs index b79dee5..3ffe1f7 100644 --- a/FuzzyString/FuzzyStringComparisonOptions.cs +++ b/FuzzyString/FuzzyStringComparisonOptions.cs @@ -1,8 +1,4 @@ using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; namespace FuzzyString { diff --git a/FuzzyString/FuzzyStringComparisonTolerance.cs b/FuzzyString/FuzzyStringComparisonTolerance.cs index ddbfeb1..30923ed 100644 --- a/FuzzyString/FuzzyStringComparisonTolerance.cs +++ b/FuzzyString/FuzzyStringComparisonTolerance.cs @@ -1,10 +1,4 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace FuzzyString +namespace FuzzyString { public enum FuzzyStringComparisonTolerance { diff --git a/FuzzyString/HammingDistance.cs b/FuzzyString/HammingDistance.cs index f24b3ad..883ef04 100644 --- a/FuzzyString/HammingDistance.cs +++ b/FuzzyString/HammingDistance.cs @@ -1,10 +1,4 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace FuzzyString +namespace FuzzyString { public static partial class ComparisonMetrics { diff --git a/FuzzyString/JaccardDistance.cs b/FuzzyString/JaccardDistance.cs index ec4ecda..3e5b518 100644 --- a/FuzzyString/JaccardDistance.cs +++ b/FuzzyString/JaccardDistance.cs @@ -1,8 +1,5 @@ using System; -using System.Collections.Generic; using System.Linq; -using System.Text; -using System.Threading.Tasks; namespace FuzzyString { diff --git a/FuzzyString/JaroDistance.cs b/FuzzyString/JaroDistance.cs index d60c613..9156e0e 100644 --- a/FuzzyString/JaroDistance.cs +++ b/FuzzyString/JaroDistance.cs @@ -1,8 +1,4 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +using System.Linq; namespace FuzzyString { diff --git a/FuzzyString/JaroWinklerDistance.cs b/FuzzyString/JaroWinklerDistance.cs index 310f5a5..eaa8a45 100644 --- a/FuzzyString/JaroWinklerDistance.cs +++ b/FuzzyString/JaroWinklerDistance.cs @@ -1,8 +1,4 @@ using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; namespace FuzzyString { diff --git a/FuzzyString/LevenshteinDistance.cs b/FuzzyString/LevenshteinDistance.cs index e016466..9321950 100644 --- a/FuzzyString/LevenshteinDistance.cs +++ b/FuzzyString/LevenshteinDistance.cs @@ -1,8 +1,4 @@ using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; namespace FuzzyString { diff --git a/FuzzyString/LongestCommonSubsequence.cs b/FuzzyString/LongestCommonSubsequence.cs index de159bc..fe25c05 100644 --- a/FuzzyString/LongestCommonSubsequence.cs +++ b/FuzzyString/LongestCommonSubsequence.cs @@ -1,8 +1,4 @@ using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; namespace FuzzyString { diff --git a/FuzzyString/LongestCommonSubstring.cs b/FuzzyString/LongestCommonSubstring.cs index 6871a8b..c9d3726 100644 --- a/FuzzyString/LongestCommonSubstring.cs +++ b/FuzzyString/LongestCommonSubstring.cs @@ -1,8 +1,5 @@ using System; -using System.Collections.Generic; -using System.Linq; using System.Text; -using System.Threading.Tasks; namespace FuzzyString { diff --git a/FuzzyString/Operations.cs b/FuzzyString/Operations.cs deleted file mode 100644 index a192068..0000000 --- a/FuzzyString/Operations.cs +++ /dev/null @@ -1,85 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace FuzzyString -{ - public static partial class Operations - { - public static string Capitalize(this string source) - { - return source.ToUpper(); - } - - public static string[] SplitIntoIndividualElements(string source) - { - string[] stringCollection = new string[source.Length]; - - for (int i = 0; i < stringCollection.Length; i++) - { - stringCollection[i] = source[i].ToString(); - } - - return stringCollection; - } - - public static string MergeIndividualElementsIntoString(IEnumerable source) - { - string returnString = ""; - - for (int i = 0; i < source.Count(); i++) - { - returnString += source.ElementAt(i); - } - return returnString; - } - - public static List ListPrefixes(this string source) - { - List prefixes = new List(); - - for (int i = 0; i < source.Length; i++) - { - prefixes.Add(source.Substring(0, i)); - } - - return prefixes; - } - - public static List ListBiGrams(this string source) - { - return ListNGrams(source, 2); - } - - public static List ListTriGrams(this string source) - { - return ListNGrams(source, 3); - } - - public static List ListNGrams(this string source, int n) - { - List nGrams = new List(); - - if (n > source.Length) - { - return null; - } - else if (n == source.Length) - { - nGrams.Add(source); - return nGrams; - } - else - { - for (int i = 0; i < source.Length - n; i++) - { - nGrams.Add(source.Substring(i, n)); - } - - return nGrams; - } - } - } -} diff --git a/FuzzyString/OverlapCoefficient.cs b/FuzzyString/OverlapCoefficient.cs index 3c33fa0..734dc82 100644 --- a/FuzzyString/OverlapCoefficient.cs +++ b/FuzzyString/OverlapCoefficient.cs @@ -1,8 +1,5 @@ using System; -using System.Collections.Generic; using System.Linq; -using System.Text; -using System.Threading.Tasks; namespace FuzzyString { diff --git a/FuzzyString/RatcliffObershelpSimilarity.cs b/FuzzyString/RatcliffObershelpSimilarity.cs index 386340c..4660a48 100644 --- a/FuzzyString/RatcliffObershelpSimilarity.cs +++ b/FuzzyString/RatcliffObershelpSimilarity.cs @@ -1,8 +1,5 @@ using System; -using System.Collections.Generic; using System.Linq; -using System.Text; -using System.Threading.Tasks; namespace FuzzyString { diff --git a/FuzzyString/SorensenDiceDistance.cs b/FuzzyString/SorensenDiceDistance.cs index 9713eed..492ed4f 100644 --- a/FuzzyString/SorensenDiceDistance.cs +++ b/FuzzyString/SorensenDiceDistance.cs @@ -1,8 +1,5 @@ using System; -using System.Collections.Generic; using System.Linq; -using System.Text; -using System.Threading.Tasks; namespace FuzzyString { diff --git a/FuzzyString/TanimotoCoefficient.cs b/FuzzyString/TanimotoCoefficient.cs index 2cb8019..b6c6888 100644 --- a/FuzzyString/TanimotoCoefficient.cs +++ b/FuzzyString/TanimotoCoefficient.cs @@ -1,8 +1,4 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +using System.Linq; namespace FuzzyString { From ab8120eafb0fc234f44a55a502c963c322b55277 Mon Sep 17 00:00:00 2001 From: Sergey Svirsky Date: Thu, 19 Apr 2018 10:23:31 +0300 Subject: [PATCH 05/13] Use Flags attribute for Options (major change) --- FuzzyString/ApproximatelyEquals.cs | 26 ++++++++++---------- FuzzyString/FuzzyStringComparisonOptions.cs | 27 +++++++++++---------- FuzzyStringConsole/Program.cs | 20 +++++++-------- 3 files changed, 37 insertions(+), 36 deletions(-) diff --git a/FuzzyString/ApproximatelyEquals.cs b/FuzzyString/ApproximatelyEquals.cs index ecbc683..aab9379 100644 --- a/FuzzyString/ApproximatelyEquals.cs +++ b/FuzzyString/ApproximatelyEquals.cs @@ -7,18 +7,18 @@ namespace FuzzyString public static partial class ComparisonMetrics { public static bool ApproximatelyEquals(this string source, string target, FuzzyStringComparisonTolerance tolerance, - params FuzzyStringComparisonOptions[] options) + FuzzyStringComparisonOptions options) { List comparisonResults = new List(); - if (!options.Contains(FuzzyStringComparisonOptions.CaseSensitive)) + if (!options.HasFlag(FuzzyStringComparisonOptions.CaseSensitive)) { source = source.Capitalize(); target = target.Capitalize(); } // Min: 0 Max: source.Length = target.Length - if (options.Contains(FuzzyStringComparisonOptions.UseHammingDistance)) + if (tolerance.HasFlag(FuzzyStringComparisonOptions.UseHammingDistance)) { if (source.Length == target.Length) { @@ -27,63 +27,63 @@ public static bool ApproximatelyEquals(this string source, string target, FuzzyS } // Min: 0 Max: 1 - if (options.Contains(FuzzyStringComparisonOptions.UseJaccardDistance)) + if (options.HasFlag(FuzzyStringComparisonOptions.UseJaccardDistance)) { comparisonResults.Add(source.JaccardDistance(target)); } // Min: 0 Max: 1 - if (options.Contains(FuzzyStringComparisonOptions.UseJaroDistance)) + if (options.HasFlag(FuzzyStringComparisonOptions.UseJaroDistance)) { comparisonResults.Add(source.JaroDistance(target)); } // Min: 0 Max: 1 - if (options.Contains(FuzzyStringComparisonOptions.UseJaroWinklerDistance)) + if (options.HasFlag(FuzzyStringComparisonOptions.UseJaroWinklerDistance)) { comparisonResults.Add(source.JaroWinklerDistance(target)); } // Min: 0 Max: LevenshteinDistanceUpperBounds - LevenshteinDistanceLowerBounds // Min: LevenshteinDistanceLowerBounds Max: LevenshteinDistanceUpperBounds - if (options.Contains(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance)) + if (options.HasFlag(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance)) { comparisonResults.Add(Convert.ToDouble(source.NormalizedLevenshteinDistance(target)) / Convert.ToDouble(Math.Max(source.Length, target.Length) - source.LevenshteinDistanceLowerBounds(target))); } - else if (options.Contains(FuzzyStringComparisonOptions.UseLevenshteinDistance)) + else if (options.HasFlag(FuzzyStringComparisonOptions.UseLevenshteinDistance)) { comparisonResults.Add(Convert.ToDouble(source.LevenshteinDistance(target)) / Convert.ToDouble(source.LevenshteinDistanceUpperBounds(target))); } - if (options.Contains(FuzzyStringComparisonOptions.UseLongestCommonSubsequence)) + if (options.HasFlag(FuzzyStringComparisonOptions.UseLongestCommonSubsequence)) { comparisonResults.Add(1 - Convert.ToDouble(source.LongestCommonSubsequence(target).Length / Convert.ToDouble(Math.Min(source.Length, target.Length)))); } - if (options.Contains(FuzzyStringComparisonOptions.UseLongestCommonSubstring)) + if (options.HasFlag(FuzzyStringComparisonOptions.UseLongestCommonSubstring)) { comparisonResults.Add(1 - Convert.ToDouble(source.LongestCommonSubstring(target).Length / Convert.ToDouble(Math.Min(source.Length, target.Length)))); } // Min: 0 Max: 1 - if (options.Contains(FuzzyStringComparisonOptions.UseSorensenDiceDistance)) + if (options.HasFlag(FuzzyStringComparisonOptions.UseSorensenDiceDistance)) { comparisonResults.Add(source.SorensenDiceDistance(target)); } // Min: 0 Max: 1 - if (options.Contains(FuzzyStringComparisonOptions.UseOverlapCoefficient)) + if (options.HasFlag(FuzzyStringComparisonOptions.UseOverlapCoefficient)) { comparisonResults.Add(1 - source.OverlapCoefficient(target)); } // Min: 0 Max: 1 - if (options.Contains(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity)) + if (options.HasFlag(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity)) { comparisonResults.Add(1 - source.RatcliffObershelpSimilarity(target)); } diff --git a/FuzzyString/FuzzyStringComparisonOptions.cs b/FuzzyString/FuzzyStringComparisonOptions.cs index 3ffe1f7..9947f23 100644 --- a/FuzzyString/FuzzyStringComparisonOptions.cs +++ b/FuzzyString/FuzzyStringComparisonOptions.cs @@ -2,32 +2,33 @@ namespace FuzzyString { + [Flags] public enum FuzzyStringComparisonOptions { - UseHammingDistance, + CaseSensitive = 0x0001, - UseJaccardDistance, + UseHammingDistance = 0x0002, - UseJaroDistance, + UseJaccardDistance = 0x0004, - UseJaroWinklerDistance, + UseJaroDistance = 0x0008, - UseLevenshteinDistance, + UseJaroWinklerDistance = 0x0010, - UseLongestCommonSubsequence, + UseLevenshteinDistance = 0x0020, - UseLongestCommonSubstring, + UseLongestCommonSubsequence = 0x0040, - UseNormalizedLevenshteinDistance, + UseLongestCommonSubstring = 0x0080, - UseOverlapCoefficient, + UseNormalizedLevenshteinDistance = 0x0100, - UseRatcliffObershelpSimilarity, + UseOverlapCoefficient = 0x0200, - UseSorensenDiceDistance, + UseRatcliffObershelpSimilarity = 0x0400, - UseTanimotoCoefficient, + UseSorensenDiceDistance = 0x0800, - CaseSensitive + UseTanimotoCoefficient = 0x1000, } } diff --git a/FuzzyStringConsole/Program.cs b/FuzzyStringConsole/Program.cs index 503fba9..ad37901 100644 --- a/FuzzyStringConsole/Program.cs +++ b/FuzzyStringConsole/Program.cs @@ -10,20 +10,20 @@ namespace FuzzyStringConsole class Program { static void Main(string[] args) - { + { string kevin = "kevin"; string kevyn = "kevyn"; - List options = new List(); - options.Add(FuzzyStringComparisonOptions.UseJaccardDistance); - options.Add(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance); - options.Add(FuzzyStringComparisonOptions.UseOverlapCoefficient); - options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence); - options.Add(FuzzyStringComparisonOptions.CaseSensitive); + var options = FuzzyStringComparisonOptions.UseJaccardDistance + | FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance + | FuzzyStringComparisonOptions.UseOverlapCoefficient + | FuzzyStringComparisonOptions.UseLongestCommonSubsequence + | FuzzyStringComparisonOptions.UseLevenshteinDistance + | FuzzyStringComparisonOptions.CaseSensitive; - Console.WriteLine(kevin.ApproximatelyEquals(kevyn, FuzzyStringComparisonTolerance.Weak, options.ToArray())); - Console.WriteLine(kevin.ApproximatelyEquals(kevyn, FuzzyStringComparisonTolerance.Normal, options.ToArray())); - Console.WriteLine(kevin.ApproximatelyEquals(kevyn, FuzzyStringComparisonTolerance.Strong, options.ToArray())); + Console.WriteLine(kevin.ApproximatelyEquals(kevyn, FuzzyStringComparisonTolerance.Weak, options)); + Console.WriteLine(kevin.ApproximatelyEquals(kevyn, FuzzyStringComparisonTolerance.Normal, options)); + Console.WriteLine(kevin.ApproximatelyEquals(kevyn, FuzzyStringComparisonTolerance.Strong, options)); Console.ReadLine(); } From 8d7e3da04d952163274de70ba90d11f97389a06d Mon Sep 17 00:00:00 2001 From: Sergey Svirsky Date: Thu, 19 Apr 2018 10:24:37 +0300 Subject: [PATCH 06/13] lowercase string --- FuzzyString/ApproximatelyEquals.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/FuzzyString/ApproximatelyEquals.cs b/FuzzyString/ApproximatelyEquals.cs index aab9379..f17157b 100644 --- a/FuzzyString/ApproximatelyEquals.cs +++ b/FuzzyString/ApproximatelyEquals.cs @@ -13,8 +13,8 @@ public static bool ApproximatelyEquals(this string source, string target, FuzzyS if (!options.HasFlag(FuzzyStringComparisonOptions.CaseSensitive)) { - source = source.Capitalize(); - target = target.Capitalize(); + source = source.ToLower(); + target = target.ToLower(); } // Min: 0 Max: source.Length = target.Length From 70ebff9f3796d507547a53ff19c828b7c8b889cf Mon Sep 17 00:00:00 2001 From: Sergey Svirsky Date: Thu, 19 Apr 2018 10:31:45 +0300 Subject: [PATCH 07/13] Use float instead of double (no need in such precision) --- FuzzyString/ApproximatelyEquals.cs | 16 ++++++------- FuzzyString/JaccardDistance.cs | 6 ++--- FuzzyString/JaroDistance.cs | 15 +++++-------- FuzzyString/JaroWinklerDistance.cs | 26 +++++++++++++--------- FuzzyString/LevenshteinDistance.cs | 2 +- FuzzyString/OverlapCoefficient.cs | 4 ++-- FuzzyString/RatcliffObershelpSimilarity.cs | 4 ++-- FuzzyString/SorensenDiceDistance.cs | 6 ++--- FuzzyString/TanimotoCoefficient.cs | 8 +++---- 9 files changed, 44 insertions(+), 43 deletions(-) diff --git a/FuzzyString/ApproximatelyEquals.cs b/FuzzyString/ApproximatelyEquals.cs index f17157b..ac80309 100644 --- a/FuzzyString/ApproximatelyEquals.cs +++ b/FuzzyString/ApproximatelyEquals.cs @@ -9,7 +9,7 @@ public static partial class ComparisonMetrics public static bool ApproximatelyEquals(this string source, string target, FuzzyStringComparisonTolerance tolerance, FuzzyStringComparisonOptions options) { - List comparisonResults = new List(); + var comparisonResults = new List(); if (!options.HasFlag(FuzzyStringComparisonOptions.CaseSensitive)) { @@ -48,26 +48,26 @@ public static bool ApproximatelyEquals(this string source, string target, FuzzyS // Min: LevenshteinDistanceLowerBounds Max: LevenshteinDistanceUpperBounds if (options.HasFlag(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance)) { - comparisonResults.Add(Convert.ToDouble(source.NormalizedLevenshteinDistance(target)) / - Convert.ToDouble(Math.Max(source.Length, target.Length) - + comparisonResults.Add(Convert.ToSingle(source.NormalizedLevenshteinDistance(target)) / + Convert.ToSingle(Math.Max(source.Length, target.Length) - source.LevenshteinDistanceLowerBounds(target))); } else if (options.HasFlag(FuzzyStringComparisonOptions.UseLevenshteinDistance)) { - comparisonResults.Add(Convert.ToDouble(source.LevenshteinDistance(target)) / - Convert.ToDouble(source.LevenshteinDistanceUpperBounds(target))); + comparisonResults.Add(Convert.ToSingle(source.LevenshteinDistance(target)) / + Convert.ToSingle(source.LevenshteinDistanceUpperBounds(target))); } if (options.HasFlag(FuzzyStringComparisonOptions.UseLongestCommonSubsequence)) { - comparisonResults.Add(1 - Convert.ToDouble(source.LongestCommonSubsequence(target).Length / + comparisonResults.Add(1 - Convert.ToSingle(source.LongestCommonSubsequence(target).Length / Convert.ToDouble(Math.Min(source.Length, target.Length)))); } if (options.HasFlag(FuzzyStringComparisonOptions.UseLongestCommonSubstring)) { - comparisonResults.Add(1 - Convert.ToDouble(source.LongestCommonSubstring(target).Length / - Convert.ToDouble(Math.Min(source.Length, target.Length)))); + comparisonResults.Add(1 - Convert.ToSingle(source.LongestCommonSubstring(target).Length / + Convert.ToSingle(Math.Min(source.Length, target.Length)))); } // Min: 0 Max: 1 diff --git a/FuzzyString/JaccardDistance.cs b/FuzzyString/JaccardDistance.cs index 3e5b518..f6fb1f0 100644 --- a/FuzzyString/JaccardDistance.cs +++ b/FuzzyString/JaccardDistance.cs @@ -5,14 +5,14 @@ namespace FuzzyString { public static partial class ComparisonMetrics { - public static double JaccardDistance(this string source, string target) + public static float JaccardDistance(this string source, string target) { return 1 - source.JaccardIndex(target); } - public static double JaccardIndex(this string source, string target) + public static float JaccardIndex(this string source, string target) { - return Convert.ToDouble(source.Intersect(target).Count()) / Convert.ToDouble(source.Union(target).Count()); + return Convert.ToSingle(source.Intersect(target).Count()) / Convert.ToSingle(source.Union(target).Count()); } } } \ No newline at end of file diff --git a/FuzzyString/JaroDistance.cs b/FuzzyString/JaroDistance.cs index 9156e0e..ed14af4 100644 --- a/FuzzyString/JaroDistance.cs +++ b/FuzzyString/JaroDistance.cs @@ -4,20 +4,17 @@ namespace FuzzyString { public static partial class ComparisonMetrics { - public static double JaroDistance(this string source, string target) + public static float JaroDistance(this string source, string target) { int m = source.Intersect(target).Count(); if (m == 0) { return 0; } - string sourceTargetIntersetAsString = ""; - string targetSourceIntersetAsString = ""; - IEnumerable sourceIntersectTarget = source.Intersect(target); - IEnumerable targetIntersectSource = target.Intersect(source); - foreach (char character in sourceIntersectTarget) { sourceTargetIntersetAsString += character; } - foreach (char character in targetIntersectSource) { targetSourceIntersetAsString += character; } - double t = sourceTargetIntersetAsString.LevenshteinDistance(targetSourceIntersetAsString) / 2; - return ((m / source.Length) + (m / target.Length) + ((m - t) / m)) / 3; + string sourceTargetIntersetAsString = string.Concat(source.Intersect(target)); + string targetSourceIntersetAsString = string.Concat(target.Intersect(source)); + + float t = sourceTargetIntersetAsString.LevenshteinDistance(targetSourceIntersetAsString) / 2; + return (m / source.Length + m / target.Length + (m - t) / m) / 3; } } } diff --git a/FuzzyString/JaroWinklerDistance.cs b/FuzzyString/JaroWinklerDistance.cs index eaa8a45..eee4a61 100644 --- a/FuzzyString/JaroWinklerDistance.cs +++ b/FuzzyString/JaroWinklerDistance.cs @@ -4,33 +4,37 @@ namespace FuzzyString { public static partial class ComparisonMetrics { - public static double JaroWinklerDistance(this string source, string target) + public static float JaroWinklerDistance(this string source, string target) { - double jaroDistance = source.JaroDistance(target); - double commonPrefixLength = CommonPrefixLength(source, target); + float jaroDistance = source.JaroDistance(target); + float commonPrefixLength = CommonPrefixLength(source, target); - return jaroDistance + (commonPrefixLength * 0.1 * (1 - jaroDistance)); + return jaroDistance + commonPrefixLength * 0.1f * (1 - jaroDistance); } - public static double JaroWinklerDistanceWithPrefixScale(string source, string target, double p) + public static float JaroWinklerDistanceWithPrefixScale(string source, string target, float p) { - double prefixScale = 0.1; + float prefixScale = 0.1f; - if (p > 0.25) { prefixScale = 0.25; } // The maximu value for distance to not exceed 1 + if (p > 0.25) { prefixScale = 0.25f; } // The maximu value for distance to not exceed 1 else if (p < 0) { prefixScale = 0; } // The Jaro Distance else { prefixScale = p; } - double jaroDistance = source.JaroDistance(target); - double commonPrefixLength = CommonPrefixLength(source, target); + float jaroDistance = source.JaroDistance(target); + float commonPrefixLength = CommonPrefixLength(source, target); return jaroDistance + (commonPrefixLength * prefixScale * (1 - jaroDistance)); } - private static double CommonPrefixLength(string source, string target) + public static float CommonPrefixLength(string source, string target) { int maximumPrefixLength = 4; int commonPrefixLength = 0; - if (source.Length <= 4 || target.Length <= 4) { maximumPrefixLength = Math.Min(source.Length, target.Length); } + + if (source.Length <= 4 || target.Length <= 4) + { + maximumPrefixLength = Math.Min(source.Length, target.Length); + } for (int i = 0; i < maximumPrefixLength; i++) { diff --git a/FuzzyString/LevenshteinDistance.cs b/FuzzyString/LevenshteinDistance.cs index 9321950..f0407cd 100644 --- a/FuzzyString/LevenshteinDistance.cs +++ b/FuzzyString/LevenshteinDistance.cs @@ -41,7 +41,7 @@ public static int LevenshteinDistance(this string source, string target) /// /// /// The Levenshtein distance, normalized so that the lower bound is always zero, rather than the difference in length between the two strings - public static double NormalizedLevenshteinDistance(this string source, string target) + public static float NormalizedLevenshteinDistance(this string source, string target) { int unnormalizedLevenshteinDistance = source.LevenshteinDistance(target); diff --git a/FuzzyString/OverlapCoefficient.cs b/FuzzyString/OverlapCoefficient.cs index 734dc82..4cf40bd 100644 --- a/FuzzyString/OverlapCoefficient.cs +++ b/FuzzyString/OverlapCoefficient.cs @@ -5,9 +5,9 @@ namespace FuzzyString { public static partial class ComparisonMetrics { - public static double OverlapCoefficient(this string source, string target) + public static float OverlapCoefficient(this string source, string target) { - return (Convert.ToDouble(source.Intersect(target).Count())) / Convert.ToDouble(Math.Min(source.Length, target.Length)); + return Convert.ToSingle(source.Intersect(target).Count()) / Convert.ToSingle(Math.Min(source.Length, target.Length)); } } } diff --git a/FuzzyString/RatcliffObershelpSimilarity.cs b/FuzzyString/RatcliffObershelpSimilarity.cs index 4660a48..c421c7e 100644 --- a/FuzzyString/RatcliffObershelpSimilarity.cs +++ b/FuzzyString/RatcliffObershelpSimilarity.cs @@ -5,9 +5,9 @@ namespace FuzzyString { public static partial class ComparisonMetrics { - public static double RatcliffObershelpSimilarity(this string source, string target) + public static float RatcliffObershelpSimilarity(this string source, string target) { - return (2 * Convert.ToDouble(source.Intersect(target).Count())) / (Convert.ToDouble(source.Length + target.Length)); + return 2 * Convert.ToSingle(source.Intersect(target).Count()) / Convert.ToSingle(source.Length + target.Length); } } } diff --git a/FuzzyString/SorensenDiceDistance.cs b/FuzzyString/SorensenDiceDistance.cs index 492ed4f..1064150 100644 --- a/FuzzyString/SorensenDiceDistance.cs +++ b/FuzzyString/SorensenDiceDistance.cs @@ -5,14 +5,14 @@ namespace FuzzyString { public static partial class ComparisonMetrics { - public static double SorensenDiceDistance(this string source, string target) + public static float SorensenDiceDistance(this string source, string target) { return 1 - source.SorensenDiceIndex(target); } - public static double SorensenDiceIndex(this string source, string target) + public static float SorensenDiceIndex(this string source, string target) { - return (2 * Convert.ToDouble(source.Intersect(target).Count())) / (Convert.ToDouble(source.Length + target.Length)); + return 2 * Convert.ToSingle(source.Intersect(target).Count()) / Convert.ToSingle(source.Length + target.Length); } } } diff --git a/FuzzyString/TanimotoCoefficient.cs b/FuzzyString/TanimotoCoefficient.cs index b6c6888..c5a156c 100644 --- a/FuzzyString/TanimotoCoefficient.cs +++ b/FuzzyString/TanimotoCoefficient.cs @@ -4,11 +4,11 @@ namespace FuzzyString { public static partial class ComparisonMetrics { - public static double TanimotoCoefficient(this string source, string target) + public static float TanimotoCoefficient(this string source, string target) { - double Na = source.Length; - double Nb = target.Length; - double Nc = source.Intersect(target).Count(); + float Na = source.Length; + float Nb = target.Length; + float Nc = source.Intersect(target).Count(); return Nc / (Na + Nb - Nc); } From 459fcffb314a3f03008d875d23a4974f93dfc6ad Mon Sep 17 00:00:00 2001 From: Sergey Svirsky Date: Thu, 19 Apr 2018 10:34:37 +0300 Subject: [PATCH 08/13] simplify --- FuzzyString/LevenshteinDistance.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/FuzzyString/LevenshteinDistance.cs b/FuzzyString/LevenshteinDistance.cs index f0407cd..e549d62 100644 --- a/FuzzyString/LevenshteinDistance.cs +++ b/FuzzyString/LevenshteinDistance.cs @@ -3,7 +3,7 @@ namespace FuzzyString { public static partial class ComparisonMetrics - { + { /// /// Calculate the minimum number of single-character edits needed to change the source into the target, /// allowing insertions, deletions, and substitutions. @@ -26,9 +26,9 @@ public static int LevenshteinDistance(this string source, string target) var sourceInitial = source.Substring(0, source.Length - 1); var targetInitial = target.Substring(0, target.Length - 1); - return Math.Min(Math.Min(LevenshteinDistance(sourceInitial, target) + 1, - LevenshteinDistance(source, targetInitial)) + 1, - LevenshteinDistance(sourceInitial, targetInitial) + distance); + + var min = Math.Min(LevenshteinDistance(sourceInitial, target) + 1, LevenshteinDistance(source, targetInitial)); + return Math.Min(min + 1, LevenshteinDistance(sourceInitial, targetInitial) + distance); } /// @@ -60,8 +60,8 @@ public static int LevenshteinDistanceUpperBounds(this string source, string targ if (source.Length == target.Length) { return source.HammingDistance(target); } // Otherwise, the upper bound is the length of the longer string. - else if (source.Length > target.Length) { return source.Length; } - else if (target.Length > source.Length) { return target.Length; } + if (source.Length > target.Length) { return source.Length; } + if (target.Length > source.Length) { return target.Length; } return 9999; } From 1379118ca3663c46320900c8ed32410d05b219bb Mon Sep 17 00:00:00 2001 From: Sergey Svirsky Date: Thu, 19 Apr 2018 10:43:45 +0300 Subject: [PATCH 09/13] Added BacktrackTable struct for LCS --- FuzzyString/LongestCommonSubsequence.cs | 76 +++++++++++++++---------- 1 file changed, 45 insertions(+), 31 deletions(-) diff --git a/FuzzyString/LongestCommonSubsequence.cs b/FuzzyString/LongestCommonSubsequence.cs index fe25c05..fcb65ee 100644 --- a/FuzzyString/LongestCommonSubsequence.cs +++ b/FuzzyString/LongestCommonSubsequence.cs @@ -6,56 +6,70 @@ public static partial class ComparisonMetrics { public static string LongestCommonSubsequence(this string source, string target) { - int[,] C = LongestCommonSubsequenceLengthTable(source, target); - - return Backtrack(C, source, target, source.Length, target.Length); + return BacktrackTable.Create(source, target).Backtrack(); } - private static int[,] LongestCommonSubsequenceLengthTable(string source, string target) + private struct BacktrackTable { - int[,] C = new int[source.Length + 1, target.Length + 1]; + private readonly int[,] C; + private readonly string source; + private readonly string target; - for (int i = 0; i < source.Length + 1; i++) { C[i, 0] = 0; } - for (int j = 0; j < target.Length + 1; j++) { C[0, j] = 0; } + private BacktrackTable(int[,] C, string source, string target) + { + this.C = C; + this.source = source; + this.target = target; + } - for (int i = 1; i < source.Length + 1; i++) + public static BacktrackTable Create(string source, string target) { - for (int j = 1; j < target.Length + 1; j++) + int[,] C = new int[source.Length + 1, target.Length + 1]; + + for (int i = 0; i < source.Length + 1; i++) { C[i, 0] = 0; } + for (int j = 0; j < target.Length + 1; j++) { C[0, j] = 0; } + + for (int i = 1; i < source.Length + 1; i++) { - if (source[i - 1].Equals(target[j - 1])) + for (int j = 1; j < target.Length + 1; j++) { - C[i, j] = C[i - 1, j - 1] + 1; - } - else - { - C[i, j] = Math.Max(C[i, j - 1], C[i - 1, j]); + if (source[i - 1].Equals(target[j - 1])) + { + C[i, j] = C[i - 1, j - 1] + 1; + } + else + { + C[i, j] = Math.Max(C[i, j - 1], C[i - 1, j]); + } } } - } - return C; - } - - private static string Backtrack(int[,] C, string source, string target, int i, int j) - { - if (i == 0 || j == 0) - { - return ""; + return new BacktrackTable(C, source, target); } - else if (source[i - 1].Equals(target[j - 1])) + + public string Backtrack() { - return Backtrack(C, source, target, i - 1, j - 1) + source[i - 1]; + return Backtrack(source.Length, target.Length); } - else + + private string Backtrack( int i, int j) { - if (C[i, j - 1] > C[i - 1, j]) + if (i == 0 || j == 0) + { + return ""; + } + + if (source[i - 1].Equals(target[j - 1])) { - return Backtrack(C, source, target, i, j - 1); + return Backtrack(i - 1, j - 1) + source[i - 1]; } - else + + if (C[i, j - 1] > C[i - 1, j]) { - return Backtrack(C, source, target, i - 1, j); + return Backtrack(i, j - 1); } + + return Backtrack(i - 1, j); } } } From adea883216500c3b8bc6e82e5d025452b1c185c1 Mon Sep 17 00:00:00 2001 From: Sergey Svirsky Date: Thu, 19 Apr 2018 10:46:58 +0300 Subject: [PATCH 10/13] Simplify code & drop braces in few places --- FuzzyString/JaroWinklerDistance.cs | 4 +-- FuzzyString/LevenshteinDistance.cs | 5 +-- FuzzyString/LongestCommonSubsequence.cs | 16 ++++----- FuzzyString/LongestCommonSubstring.cs | 45 ++++++++++++------------- 4 files changed, 32 insertions(+), 38 deletions(-) diff --git a/FuzzyString/JaroWinklerDistance.cs b/FuzzyString/JaroWinklerDistance.cs index eee4a61..e058897 100644 --- a/FuzzyString/JaroWinklerDistance.cs +++ b/FuzzyString/JaroWinklerDistance.cs @@ -14,7 +14,7 @@ public static float JaroWinklerDistance(this string source, string target) public static float JaroWinklerDistanceWithPrefixScale(string source, string target, float p) { - float prefixScale = 0.1f; + float prefixScale; if (p > 0.25) { prefixScale = 0.25f; } // The maximu value for distance to not exceed 1 else if (p < 0) { prefixScale = 0; } // The Jaro Distance @@ -23,7 +23,7 @@ public static float JaroWinklerDistanceWithPrefixScale(string source, string tar float jaroDistance = source.JaroDistance(target); float commonPrefixLength = CommonPrefixLength(source, target); - return jaroDistance + (commonPrefixLength * prefixScale * (1 - jaroDistance)); + return jaroDistance + commonPrefixLength * prefixScale * (1 - jaroDistance); } public static float CommonPrefixLength(string source, string target) diff --git a/FuzzyString/LevenshteinDistance.cs b/FuzzyString/LevenshteinDistance.cs index e549d62..19fcdea 100644 --- a/FuzzyString/LevenshteinDistance.cs +++ b/FuzzyString/LevenshteinDistance.cs @@ -19,10 +19,7 @@ public static int LevenshteinDistance(this string source, string target) if (source.Length == 0) { return target.Length; } if (target.Length == 0) { return source.Length; } - int distance = 0; - - if (source[source.Length - 1] == target[target.Length - 1]) { distance = 0; } - else { distance = 1; } + var distance = source[source.Length - 1] == target[target.Length - 1] ? 0 : 1; var sourceInitial = source.Substring(0, source.Length - 1); var targetInitial = target.Substring(0, target.Length - 1); diff --git a/FuzzyString/LongestCommonSubsequence.cs b/FuzzyString/LongestCommonSubsequence.cs index fcb65ee..5b03324 100644 --- a/FuzzyString/LongestCommonSubsequence.cs +++ b/FuzzyString/LongestCommonSubsequence.cs @@ -30,17 +30,15 @@ public static BacktrackTable Create(string source, string target) for (int j = 0; j < target.Length + 1; j++) { C[0, j] = 0; } for (int i = 1; i < source.Length + 1; i++) + for (int j = 1; j < target.Length + 1; j++) { - for (int j = 1; j < target.Length + 1; j++) + if (source[i - 1].Equals(target[j - 1])) { - if (source[i - 1].Equals(target[j - 1])) - { - C[i, j] = C[i - 1, j - 1] + 1; - } - else - { - C[i, j] = Math.Max(C[i, j - 1], C[i - 1, j]); - } + C[i, j] = C[i - 1, j - 1] + 1; + } + else + { + C[i, j] = Math.Max(C[i, j - 1], C[i - 1, j]); } } diff --git a/FuzzyString/LongestCommonSubstring.cs b/FuzzyString/LongestCommonSubstring.cs index c9d3726..a9c7efd 100644 --- a/FuzzyString/LongestCommonSubstring.cs +++ b/FuzzyString/LongestCommonSubstring.cs @@ -15,34 +15,33 @@ public static string LongestCommonSubstring(this string source, string target) StringBuilder stringBuilder = new StringBuilder(); for (int i = 0; i < source.Length; i++) + for (int j = 0; j < target.Length; j++) { - for (int j = 0; j < target.Length; j++) + if (source[i] != target[j]) { - if (source[i] != target[j]) - { - L[i, j] = 0; - } + L[i, j] = 0; + } + else + { + if ((i == 0) || (j == 0)) + L[i, j] = 1; else - { - if ((i == 0) || (j == 0)) - L[i, j] = 1; - else - L[i, j] = 1 + L[i - 1, j - 1]; + L[i, j] = 1 + L[i - 1, j - 1]; - if (L[i, j] > maximumLength) + if (L[i, j] > maximumLength) + { + maximumLength = L[i, j]; + int thisSubsBegin = i - L[i, j] + 1; + if (lastSubsBegin == thisSubsBegin) + { + //if the current LCS is the same as the last time this block ran + stringBuilder.Append(source[i]); + } + else //this block resets the string builder if a different LCS is found { - maximumLength = L[i, j]; - int thisSubsBegin = i - L[i, j] + 1; - if (lastSubsBegin == thisSubsBegin) - {//if the current LCS is the same as the last time this block ran - stringBuilder.Append(source[i]); - } - else //this block resets the string builder if a different LCS is found - { - lastSubsBegin = thisSubsBegin; - stringBuilder.Length = 0; //clear it - stringBuilder.Append(source.Substring(lastSubsBegin, (i + 1) - lastSubsBegin)); - } + lastSubsBegin = thisSubsBegin; + stringBuilder.Length = 0; //clear it + stringBuilder.Append(source.Substring(lastSubsBegin, (i + 1) - lastSubsBegin)); } } } From ccb4768d00ad48ec54aca8becc6019f42cf83831 Mon Sep 17 00:00:00 2001 From: Sergey Svirsky Date: Thu, 19 Apr 2018 11:01:50 +0300 Subject: [PATCH 11/13] fixup! Use Flags attribute for Options (major change) --- FuzzyString/ApproximatelyEquals.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FuzzyString/ApproximatelyEquals.cs b/FuzzyString/ApproximatelyEquals.cs index ac80309..8d1252f 100644 --- a/FuzzyString/ApproximatelyEquals.cs +++ b/FuzzyString/ApproximatelyEquals.cs @@ -18,7 +18,7 @@ public static bool ApproximatelyEquals(this string source, string target, FuzzyS } // Min: 0 Max: source.Length = target.Length - if (tolerance.HasFlag(FuzzyStringComparisonOptions.UseHammingDistance)) + if (options.HasFlag(FuzzyStringComparisonOptions.UseHammingDistance)) { if (source.Length == target.Length) { From 430f221ba09673e94567abc7fde1a56698db9945 Mon Sep 17 00:00:00 2001 From: Sergey Svirsky Date: Thu, 19 Apr 2018 11:56:24 +0300 Subject: [PATCH 12/13] Upd LevenshteinDistance according to wiki --- FuzzyString/LevenshteinDistance.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/FuzzyString/LevenshteinDistance.cs b/FuzzyString/LevenshteinDistance.cs index 19fcdea..55d14ab 100644 --- a/FuzzyString/LevenshteinDistance.cs +++ b/FuzzyString/LevenshteinDistance.cs @@ -24,8 +24,8 @@ public static int LevenshteinDistance(this string source, string target) var sourceInitial = source.Substring(0, source.Length - 1); var targetInitial = target.Substring(0, target.Length - 1); - var min = Math.Min(LevenshteinDistance(sourceInitial, target) + 1, LevenshteinDistance(source, targetInitial)); - return Math.Min(min + 1, LevenshteinDistance(sourceInitial, targetInitial) + distance); + var min = Math.Min(LevenshteinDistance(sourceInitial, target) + 1, LevenshteinDistance(source, targetInitial) + 1); + return Math.Min(min, LevenshteinDistance(sourceInitial, targetInitial) + distance); } /// From e3c186440f2dc77a2accd4739190a9fab15bbcac Mon Sep 17 00:00:00 2001 From: Sergey Svirsky Date: Thu, 26 Apr 2018 12:41:32 +0300 Subject: [PATCH 13/13] Added editor config --- .editorconfig | 5 +++++ FuzzyString.sln | 1 + 2 files changed, 6 insertions(+) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..d489a1d --- /dev/null +++ b/.editorconfig @@ -0,0 +1,5 @@ +root=true + +[*.cs] +indent_size=4 +indent_style=space \ No newline at end of file diff --git a/FuzzyString.sln b/FuzzyString.sln index 10c46cf..1219469 100644 --- a/FuzzyString.sln +++ b/FuzzyString.sln @@ -12,6 +12,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution .gitattributes = .gitattributes .gitignore = .gitignore README.md = README.md + .editorconfig = .editorconfig EndProjectSection EndProject Global