public static double ApproximatelyEquals(this string firstWord, string secondWord, SimMetricType simMetricType = SimMetricType.Levenstein) { switch (simMetricType) { case SimMetricType.BlockDistance: var sim2 = new BlockDistance(); return(sim2.GetSimilarity(firstWord, secondWord)); case SimMetricType.ChapmanLengthDeviation: var sim3 = new ChapmanLengthDeviation(); return(sim3.GetSimilarity(firstWord, secondWord)); case SimMetricType.CosineSimilarity: var sim4 = new CosineSimilarity(); return(sim4.GetSimilarity(firstWord, secondWord)); case SimMetricType.DiceSimilarity: var sim5 = new DiceSimilarity(); return(sim5.GetSimilarity(firstWord, secondWord)); case SimMetricType.EuclideanDistance: var sim6 = new EuclideanDistance(); return(sim6.GetSimilarity(firstWord, secondWord)); case SimMetricType.JaccardSimilarity: var sim7 = new JaccardSimilarity(); return(sim7.GetSimilarity(firstWord, secondWord)); case SimMetricType.Jaro: var sim8 = new Jaro(); return(sim8.GetSimilarity(firstWord, secondWord)); case SimMetricType.JaroWinkler: var sim9 = new JaroWinkler(); return(sim9.GetSimilarity(firstWord, secondWord)); case SimMetricType.MatchingCoefficient: var sim10 = new MatchingCoefficient(); return(sim10.GetSimilarity(firstWord, secondWord)); case SimMetricType.MongeElkan: var sim11 = new MongeElkan(); return(sim11.GetSimilarity(firstWord, secondWord)); case SimMetricType.NeedlemanWunch: var sim12 = new NeedlemanWunch(); return(sim12.GetSimilarity(firstWord, secondWord)); case SimMetricType.OverlapCoefficient: var sim13 = new OverlapCoefficient(); return(sim13.GetSimilarity(firstWord, secondWord)); case SimMetricType.QGramsDistance: var sim14 = new QGramsDistance(); return(sim14.GetSimilarity(firstWord, secondWord)); case SimMetricType.SmithWaterman: var sim15 = new SmithWaterman(); return(sim15.GetSimilarity(firstWord, secondWord)); case SimMetricType.SmithWatermanGotoh: var sim16 = new SmithWatermanGotoh(); return(sim16.GetSimilarity(firstWord, secondWord)); case SimMetricType.SmithWatermanGotohWindowedAffine: var sim17 = new SmithWatermanGotohWindowedAffine(); return(sim17.GetSimilarity(firstWord, secondWord)); case SimMetricType.ChapmanMeanLength: var sim18 = new ChapmanMeanLength(); return(sim18.GetSimilarity(firstWord, secondWord)); default: var sim1 = new Levenstein(); return(sim1.GetSimilarity(firstWord, secondWord)); } }
public double GetSimilarity(string str1, string str2, string type) { IStringMetric stringMetric; switch (type) { case AlgorithmTypes.BlockDistance: stringMetric = new BlockDistance(); break; case AlgorithmTypes.ChapmanLengthDeviation: stringMetric = new ChapmanLengthDeviation(); break; case AlgorithmTypes.ChapmanMeanLength: stringMetric = new ChapmanMeanLength(); break; case AlgorithmTypes.CosineSimilarity: stringMetric = new CosineSimilarity(); break; case AlgorithmTypes.DiceSimilarity: stringMetric = new DiceSimilarity(); break; case AlgorithmTypes.EuclideanDistance: stringMetric = new EuclideanDistance(); break; case AlgorithmTypes.JaccardSimilarity: stringMetric = new JaccardSimilarity(); break; case AlgorithmTypes.Jaro: stringMetric = new Jaro(); break; case AlgorithmTypes.JaroWinkler: stringMetric = new JaroWinkler(); break; case AlgorithmTypes.Levenstein: stringMetric = new Levenstein(); break; case AlgorithmTypes.MatchingCoefficient: stringMetric = new MatchingCoefficient(); break; case AlgorithmTypes.MongeElkan: stringMetric = new MongeElkan(); break; case AlgorithmTypes.NeedlemanWunch: stringMetric = new NeedlemanWunch(); break; case AlgorithmTypes.OverlapCoefficient: stringMetric = new OverlapCoefficient(); break; case AlgorithmTypes.QGramsDistance: stringMetric = new QGramsDistance(); break; case AlgorithmTypes.SmithWaterman: stringMetric = new SmithWaterman(); break; case AlgorithmTypes.SmithWatermanGotoh: stringMetric = new SmithWatermanGotoh(); break; case AlgorithmTypes.SmithWatermanGotohWindowedAffine: stringMetric = new SmithWatermanGotohWindowedAffine(); break; default: stringMetric = new SmithWatermanGotoh(); break; } var similarity = stringMetric.GetSimilarity(str1.Trim(), str2.Trim()); return(similarity); }
// [SetUp] public OverlapCoefficientUnitTests() { LoadData(); _myOverlapCoefficient = new OverlapCoefficient(); }
private static void Main(string[] args) { /****************************************************/ /* BagOfTokensSimilarity */ /****************************************************/ // the recommend method for complex similarity on more words var bagOfTokens = new BagOfWordsSimilarity(); const string pattern = "John Smith"; const string targetText = "Mr. John Smith, Jr."; // using normalized string and tokenizer returns score 1.0 var resultingSim = bagOfTokens.GetSimilarity(new Tokenizer(new NormalizedString(pattern)), new Tokenizer(new NormalizedString(targetText))); /****************************************************/ /* Levenshtein */ /****************************************************/ const string nameCorrect = "martha"; const string nameError = "marhta"; // Levenshtein distance (implements interface IDistance) // & similarity (implements interface ISimilarity) var lev = new Levenshtein(); // returns edit distance 2 var distLev = lev.GetDistance(nameCorrect, nameError); // normalized string removes special symbols, diacritics and make case insensitivity // returns score 0.67 var simLev = lev.GetSimilarity(new NormalizedString(nameCorrect), new NormalizedString(nameError)); /****************************************************/ /* Damerau-Levenshtein */ /****************************************************/ // DamerauLevenshtein implements IDistance and ISimilarity var damLev = new DamerauLevenshtein(); // returns edit distance 1 var distDamLev = damLev.GetDistance(nameCorrect, nameError); // returns score 0.83 var simDamLev = damLev.GetSimilarity(nameCorrect, nameError); /****************************************************/ /* Jaro, Jaro-Winler */ /****************************************************/ // Jaro, Jaro-Winkler implements only ISimilarity var nameFirst = new Token("dwayne"); var nameSecond = new Token("duane"); var jaro = new Jaro(); var jaroWinkler = new JaroWinkler(); jaroWinkler.GetSimilarity(nameFirst, nameSecond); /****************************************************/ /* Q-grams coefficient */ /****************************************************/ // q-grams similarity coefficient - Dice, Jaccard, Overlap // with different q-grams type var diceUnigrams = new DiceCoefficient <Bigram>(); var jaccardBigrams = new JaccardCoefficient <Bigram>(); var overlapTrigrams = new OverlapCoefficient <Bigram>(); // returns score 0.5 var jaccardSim = jaccardBigrams.GetSimilarity(pattern, targetText); // returns score 0.67 var diceSim = diceUnigrams.GetSimilarity(pattern, targetText); // returns score 1.0 var overlapSim = overlapTrigrams.GetSimilarity(pattern, targetText); Console.ReadKey(); }
public void Initializate() { _overlapdCoefBigram = new OverlapCoefficient <Bigram>(); _overlapCoefUnigram = new OverlapCoefficient <Unigram>(); _overlapCoefTrigram = new OverlapCoefficient <Trigram>(); }
public void SetUp() { LoadData(); myOverlapCoefficient = new OverlapCoefficient(); }