Skip to content
Open
5 changes: 5 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
root=true

[*.cs]
indent_size=4
indent_style=space
1 change: 1 addition & 0 deletions FuzzyString.sln
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
.gitattributes = .gitattributes
.gitignore = .gitignore
README.md = README.md
.editorconfig = .editorconfig
EndProjectSection
EndProject
Global
Expand Down
109 changes: 39 additions & 70 deletions FuzzyString/ApproximatelyEquals.cs
Original file line number Diff line number Diff line change
@@ -1,25 +1,24 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace FuzzyString
{
public static partial class ComparisonMetrics
{
public static bool ApproximatelyEquals(this string source, string target, FuzzyStringComparisonTolerance tolerance, params FuzzyStringComparisonOptions[] options)
{
public static bool ApproximatelyEquals(this string source, string target, FuzzyStringComparisonTolerance tolerance,
FuzzyStringComparisonOptions options)
{
List<double> comparisonResults = new List<double>();
var comparisonResults = new List<float>();

if (!options.Contains(FuzzyStringComparisonOptions.CaseSensitive))
if (!options.HasFlag(FuzzyStringComparisonOptions.CaseSensitive))
{
source = source.Capitalize();
target = target.Capitalize();
source = source.ToLower();
target = target.ToLower();
}

// Min: 0 Max: source.Length = target.Length
if (options.Contains(FuzzyStringComparisonOptions.UseHammingDistance))
if (options.HasFlag(FuzzyStringComparisonOptions.UseHammingDistance))
{
if (source.Length == target.Length)
{
Expand All @@ -28,58 +27,63 @@ public static bool ApproximatelyEquals(this string source, string target, Fuzzy
}

// Min: 0 Max: 1
if (options.Contains(FuzzyStringComparisonOptions.UseJaccardDistance))
if (options.HasFlag(FuzzyStringComparisonOptions.UseJaccardDistance))
{
comparisonResults.Add(source.JaccardDistance(target));
}

// Min: 0 Max: 1
if (options.Contains(FuzzyStringComparisonOptions.UseJaroDistance))
if (options.HasFlag(FuzzyStringComparisonOptions.UseJaroDistance))
{
comparisonResults.Add(source.JaroDistance(target));
}

// Min: 0 Max: 1
if (options.Contains(FuzzyStringComparisonOptions.UseJaroWinklerDistance))
if (options.HasFlag(FuzzyStringComparisonOptions.UseJaroWinklerDistance))
{
comparisonResults.Add(source.JaroWinklerDistance(target));
}

// Min: 0 Max: LevenshteinDistanceUpperBounds - LevenshteinDistanceLowerBounds
// Min: LevenshteinDistanceLowerBounds Max: LevenshteinDistanceUpperBounds
if (options.Contains(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance))
if (options.HasFlag(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance))
{
comparisonResults.Add(Convert.ToDouble(source.NormalizedLevenshteinDistance(target)) / Convert.ToDouble((Math.Max(source.Length, target.Length) - source.LevenshteinDistanceLowerBounds(target))));
comparisonResults.Add(Convert.ToSingle(source.NormalizedLevenshteinDistance(target)) /
Convert.ToSingle(Math.Max(source.Length, target.Length) -
source.LevenshteinDistanceLowerBounds(target)));
}
else if (options.Contains(FuzzyStringComparisonOptions.UseLevenshteinDistance))
else if (options.HasFlag(FuzzyStringComparisonOptions.UseLevenshteinDistance))
{
comparisonResults.Add(Convert.ToDouble(source.LevenshteinDistance(target)) / Convert.ToDouble(source.LevenshteinDistanceUpperBounds(target)));
comparisonResults.Add(Convert.ToSingle(source.LevenshteinDistance(target)) /
Convert.ToSingle(source.LevenshteinDistanceUpperBounds(target)));
}

if (options.Contains(FuzzyStringComparisonOptions.UseLongestCommonSubsequence))
if (options.HasFlag(FuzzyStringComparisonOptions.UseLongestCommonSubsequence))
{
comparisonResults.Add(1 - Convert.ToDouble((source.LongestCommonSubsequence(target).Length) / Convert.ToDouble(Math.Min(source.Length, target.Length))));
comparisonResults.Add(1 - Convert.ToSingle(source.LongestCommonSubsequence(target).Length /
Convert.ToDouble(Math.Min(source.Length, target.Length))));
}

if (options.Contains(FuzzyStringComparisonOptions.UseLongestCommonSubstring))
if (options.HasFlag(FuzzyStringComparisonOptions.UseLongestCommonSubstring))
{
comparisonResults.Add(1 - Convert.ToDouble((source.LongestCommonSubstring(target).Length) / Convert.ToDouble(Math.Min(source.Length, target.Length))));
comparisonResults.Add(1 - Convert.ToSingle(source.LongestCommonSubstring(target).Length /
Convert.ToSingle(Math.Min(source.Length, target.Length))));
}

// Min: 0 Max: 1
if (options.Contains(FuzzyStringComparisonOptions.UseSorensenDiceDistance))
if (options.HasFlag(FuzzyStringComparisonOptions.UseSorensenDiceDistance))
{
comparisonResults.Add(source.SorensenDiceDistance(target));
}

// Min: 0 Max: 1
if (options.Contains(FuzzyStringComparisonOptions.UseOverlapCoefficient))
if (options.HasFlag(FuzzyStringComparisonOptions.UseOverlapCoefficient))
{
comparisonResults.Add(1 - source.OverlapCoefficient(target));
}

// Min: 0 Max: 1
if (options.Contains(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity))
if (options.HasFlag(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity))
{
comparisonResults.Add(1 - source.RatcliffObershelpSimilarity(target));
}
Expand All @@ -89,54 +93,19 @@ public static bool ApproximatelyEquals(this string source, string target, Fuzzy
return false;
}

if (tolerance == FuzzyStringComparisonTolerance.Strong)
{
if (comparisonResults.Average() < 0.25)
{
return true;
}
else
{
switch (tolerance)
{
case FuzzyStringComparisonTolerance.Strong:
return comparisonResults.Average() < 0.25;
case FuzzyStringComparisonTolerance.Normal:
return comparisonResults.Average() < 0.5;
case FuzzyStringComparisonTolerance.Weak:
return comparisonResults.Average() < 0.75;
case FuzzyStringComparisonTolerance.Manual:
return comparisonResults.Average() > 0.6;
default:
return false;
}
}
else if (tolerance == FuzzyStringComparisonTolerance.Normal)
{
if (comparisonResults.Average() < 0.5)
{
return true;
}
else
{
return false;
}
}
else if (tolerance == FuzzyStringComparisonTolerance.Weak)
{
if (comparisonResults.Average() < 0.75)
{
return true;
}
else
{
return false;
}
}
else if (tolerance == FuzzyStringComparisonTolerance.Manual)
{
if (comparisonResults.Average() > 0.6)
{
return true;
}
else
{
return false;
}
}
else
{
return false;
}
}
}
}
}
31 changes: 14 additions & 17 deletions FuzzyString/FuzzyStringComparisonOptions.cs
Original file line number Diff line number Diff line change
@@ -1,37 +1,34 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace FuzzyString
{
[Flags]
public enum FuzzyStringComparisonOptions
{
UseHammingDistance,
CaseSensitive = 0x0001,

UseJaccardDistance,
UseHammingDistance = 0x0002,

UseJaroDistance,
UseJaccardDistance = 0x0004,

UseJaroWinklerDistance,
UseJaroDistance = 0x0008,

UseLevenshteinDistance,
UseJaroWinklerDistance = 0x0010,

UseLongestCommonSubsequence,
UseLevenshteinDistance = 0x0020,

UseLongestCommonSubstring,
UseLongestCommonSubsequence = 0x0040,

UseNormalizedLevenshteinDistance,
UseLongestCommonSubstring = 0x0080,

UseOverlapCoefficient,
UseNormalizedLevenshteinDistance = 0x0100,

UseRatcliffObershelpSimilarity,
UseOverlapCoefficient = 0x0200,

UseSorensenDiceDistance,
UseRatcliffObershelpSimilarity = 0x0400,

UseTanimotoCoefficient,
UseSorensenDiceDistance = 0x0800,

CaseSensitive
UseTanimotoCoefficient = 0x1000,
}
}
8 changes: 1 addition & 7 deletions FuzzyString/FuzzyStringComparisonTolerance.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace FuzzyString
namespace FuzzyString
{
public enum FuzzyStringComparisonTolerance
{
Expand Down
23 changes: 8 additions & 15 deletions FuzzyString/HammingDistance.cs
Original file line number Diff line number Diff line change
@@ -1,29 +1,22 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace FuzzyString
namespace FuzzyString
{
public static partial class ComparisonMetrics
{
public static int HammingDistance(this string source, string target)
{
int distance = 0;

if (source.Length == target.Length)
if (source.Length != target.Length) return 99999;

for (int i = 0; i < source.Length; i++)
{
for (int i = 0; i < source.Length; i++)
if (!source[i].Equals(target[i]))
{
if (!source[i].Equals(target[i]))
{
distance++;
}
distance++;
}
return distance;
}
else { return 99999; }

return distance;
}
}
}
11 changes: 4 additions & 7 deletions FuzzyString/JaccardDistance.cs
Original file line number Diff line number Diff line change
@@ -1,21 +1,18 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace FuzzyString
{
public static partial class ComparisonMetrics
{
public static double JaccardDistance(this string source, string target)
public static float JaccardDistance(this string source, string target)
{
return 1 - source.JaccardIndex(target);
}

public static double JaccardIndex(this string source, string target)
public static float JaccardIndex(this string source, string target)
{
return (Convert.ToDouble(source.Intersect(target).Count())) / (Convert.ToDouble(source.Union(target).Count()));
return Convert.ToSingle(source.Intersect(target).Count()) / Convert.ToSingle(source.Union(target).Count());
}
}
}
}
25 changes: 8 additions & 17 deletions FuzzyString/JaroDistance.cs
Original file line number Diff line number Diff line change
@@ -1,29 +1,20 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Linq;

namespace FuzzyString
{
public static partial class ComparisonMetrics
{
public static double JaroDistance(this string source, string target)
public static float JaroDistance(this string source, string target)
{
int m = source.Intersect(target).Count();

if (m == 0) { return 0; }
else
{
string sourceTargetIntersetAsString = "";
string targetSourceIntersetAsString = "";
IEnumerable<char> sourceIntersectTarget = source.Intersect(target);
IEnumerable<char> targetIntersectSource = target.Intersect(source);
foreach (char character in sourceIntersectTarget) { sourceTargetIntersetAsString += character; }
foreach (char character in targetIntersectSource) { targetSourceIntersetAsString += character; }
double t = sourceTargetIntersetAsString.LevenshteinDistance(targetSourceIntersetAsString) / 2;
return ((m / source.Length) + (m / target.Length) + ((m - t) / m)) / 3;
}

string sourceTargetIntersetAsString = string.Concat(source.Intersect(target));
string targetSourceIntersetAsString = string.Concat(target.Intersect(source));

float t = sourceTargetIntersetAsString.LevenshteinDistance(targetSourceIntersetAsString) / 2;
return (m / source.Length + m / target.Length + (m - t) / m) / 3;
}
}
}
Loading