/// <summary> /// Returns a new instance of a supported string metrics calculator. /// </summary> /// <param name="algorithm">A supported string similarity algorithm.</param> /// <returns>A string metric measurement class.</returns> public static IStringMetric FromAlgorithm(SimMetricAlgorithm algorithm) { switch (algorithm) { case SimMetricAlgorithm.BlockDistance: return(new BlockDistance()); case SimMetricAlgorithm.ChapmanLengthDeviation: return(new ChapmanLengthDeviation()); case SimMetricAlgorithm.CosineSimilarity: return(new CosineSimilarity()); case SimMetricAlgorithm.DiceSimilarity: return(new DiceSimilarity()); case SimMetricAlgorithm.EuclideanDistance: return(new EuclideanDistance()); case SimMetricAlgorithm.JaccardSimilarity: return(new JaccardSimilarity()); case SimMetricAlgorithm.Jaro: return(new Jaro()); case SimMetricAlgorithm.JaroWinkler: return(new JaroWinkler()); case SimMetricAlgorithm.MatchingCoefficient: return(new MatchingCoefficient()); case SimMetricAlgorithm.MongeElkan: return(new MongeElkan()); case SimMetricAlgorithm.NeedlemanWunch: return(new NeedlemanWunch()); case SimMetricAlgorithm.OverlapCoefficient: return(new OverlapCoefficient()); case SimMetricAlgorithm.QGramsDistance: return(new QGramsDistance()); case SimMetricAlgorithm.SmithWaterman: return(new SmithWaterman()); case SimMetricAlgorithm.SmithWatermanGotoh: return(new SmithWatermanGotoh()); case SimMetricAlgorithm.SmithWatermanGotohWindowedAffine: return(new SmithWatermanGotohWindowedAffine()); case SimMetricAlgorithm.ChapmanMeanLength: return(new ChapmanMeanLength()); default: return(new Levenstein()); } }
/// <summary> /// Compares two strings for similarity. /// </summary> /// <param name="firstWord">The first string to compare.</param> /// <param name="secondWord">The second string to compare.</param> /// <param name="threshold">Any similarity score at or above this value will be considered similar.</param> /// <param name="algorithm">Specify the algorithm to use for comparing <paramref name="firstWord"/> and <paramref name="secondWord"/>. The default /// algorithm is <see cref="SimMetricAlgorithm.Levenstein"/>.</param> /// <returns>`true` if the similarity score is equal or above <paramref name="threshold"/>. Otherwise, `false`.</returns> /// <remarks><![CDATA[ /// The following code demonstrates how to filter a list of strings based on the degree of similarity: /// ```C# /// string word = "fooler" /// var list = new List<string>() { "fowler", "fish", "crawler" }; /// var filtered = List<string>(); /// foreach (string item in list) /// { /// if (item.NearEquals(word)) /// filtered.Add(item); /// } /// Console.WriteLine("You typed '{0}'. Did you mean: {1}", word, filtered); /// ``` /// ]]></remarks> public static bool NearEquals(this string firstWord, string secondWord, double threshold, SimMetricAlgorithm algorithm) { IStringMetric sim = StringMetricFactory.FromAlgorithm(algorithm); double diff = sim.GetSimilarity(firstWord, secondWord); return((diff < threshold) ? true : false); }