public void NonEmptyCases() { Assert.Equal(0.8222222, Jaro.Similarity("DWAYNE", "DUANE"), 7); Assert.Equal(0.9444444, Jaro.Similarity("MARTHA", "MARHTA"), 7); Assert.Equal(0.7666667, Jaro.Similarity("DIXON", "DICKSONX"), 7); Assert.Equal(0.8962963, Jaro.Similarity("JELLYFISH", "SMELLYFISH"), 7); }
public static double ApproximatelyEquals(this string firstWord, string secondWord, SimMetricType simMetricType = SimMetricType.Levenstein) { switch (simMetricType) { case SimMetricType.BlockDistance: var sim2 = new BlockDistance(); return(sim2.GetSimilarity(firstWord, secondWord)); case SimMetricType.ChapmanLengthDeviation: var sim3 = new ChapmanLengthDeviation(); return(sim3.GetSimilarity(firstWord, secondWord)); case SimMetricType.CosineSimilarity: var sim4 = new CosineSimilarity(); return(sim4.GetSimilarity(firstWord, secondWord)); case SimMetricType.DiceSimilarity: var sim5 = new DiceSimilarity(); return(sim5.GetSimilarity(firstWord, secondWord)); case SimMetricType.EuclideanDistance: var sim6 = new EuclideanDistance(); return(sim6.GetSimilarity(firstWord, secondWord)); case SimMetricType.JaccardSimilarity: var sim7 = new JaccardSimilarity(); return(sim7.GetSimilarity(firstWord, secondWord)); case SimMetricType.Jaro: var sim8 = new Jaro(); return(sim8.GetSimilarity(firstWord, secondWord)); case SimMetricType.JaroWinkler: var sim9 = new JaroWinkler(); return(sim9.GetSimilarity(firstWord, secondWord)); case SimMetricType.MatchingCoefficient: var sim10 = new MatchingCoefficient(); return(sim10.GetSimilarity(firstWord, secondWord)); case SimMetricType.MongeElkan: var sim11 = new MongeElkan(); return(sim11.GetSimilarity(firstWord, secondWord)); case SimMetricType.NeedlemanWunch: var sim12 = new NeedlemanWunch(); return(sim12.GetSimilarity(firstWord, secondWord)); case SimMetricType.OverlapCoefficient: var sim13 = new OverlapCoefficient(); return(sim13.GetSimilarity(firstWord, secondWord)); case SimMetricType.QGramsDistance: var sim14 = new QGramsDistance(); return(sim14.GetSimilarity(firstWord, secondWord)); case SimMetricType.SmithWaterman: var sim15 = new SmithWaterman(); return(sim15.GetSimilarity(firstWord, secondWord)); case SimMetricType.SmithWatermanGotoh: var sim16 = new SmithWatermanGotoh(); return(sim16.GetSimilarity(firstWord, secondWord)); case SimMetricType.SmithWatermanGotohWindowedAffine: var sim17 = new SmithWatermanGotohWindowedAffine(); return(sim17.GetSimilarity(firstWord, secondWord)); case SimMetricType.ChapmanMeanLength: var sim18 = new ChapmanMeanLength(); return(sim18.GetSimilarity(firstWord, secondWord)); default: var sim1 = new Levenstein(); return(sim1.GetSimilarity(firstWord, secondWord)); } }
private void Jaro_button_Click(object sender, EventArgs e) { JaroRez.Text = Jaro.Func(JaroText1.Text, JaroText2.Text).ToString(); }
// сравнение private List <string[]> Sravn(string[][] data) { List <string[]> rezlist = new List <string[]>(); foreach (string[] str in data) { // количество слов в строке if (str.Count() != 2) { continue; } else { string[] mass; SimMetricsMetricUtilities.Levenstein ex_l = new SimMetricsMetricUtilities.Levenstein(); Stopwatch t = new Stopwatch(); t.Start(); double rj1 = Math.Round(Jaro.Func(str[0], str[1]), 2); string tj1 = Math.Round(t.Elapsed.TotalMilliseconds, 2).ToString(); t = new Stopwatch(); t.Start(); double rj2 = Math.Round(ExJaro.distance(str[0], str[1]), 2); string tj2 = Math.Round(t.Elapsed.TotalMilliseconds, 2).ToString(); // ----- t = new Stopwatch(); t.Start(); double rjv1 = Math.Round(JaroVincler.Func(str[0], str[1]), 2); string tjv1 = Math.Round(t.Elapsed.TotalMilliseconds, 2).ToString(); t = new Stopwatch(); t.Start(); double rjv2 = Math.Round(ExJaroWincler.distance(str[0], str[1]), 2); string tjv2 = Math.Round(t.Elapsed.TotalMilliseconds, 2).ToString(); // ---- t = new Stopwatch(); t.Start(); double rl1 = Math.Round(Levenstein.Func(str[0], str[1]), 2); string tl1 = Math.Round(t.Elapsed.TotalMilliseconds, 2).ToString(); t = new Stopwatch(); t.Start(); double rl2 = Math.Round(ex_l.GetSimilarity(str[0], str[1]), 2); string tl2 = Math.Round(t.Elapsed.TotalMilliseconds, 2).ToString(); rezlist.Add(new string[14] { rj1.ToString(), tj1, rj2.ToString(), tj2, rjv1.ToString(), tjv1, rjv2.ToString(), tjv2, rl1.ToString(), tl1, rl2.ToString(), tl2, str[0], str[1] }); } } return(rezlist); }
public double GetSimilarity(string str1, string str2, string type) { IStringMetric stringMetric; switch (type) { case AlgorithmTypes.BlockDistance: stringMetric = new BlockDistance(); break; case AlgorithmTypes.ChapmanLengthDeviation: stringMetric = new ChapmanLengthDeviation(); break; case AlgorithmTypes.ChapmanMeanLength: stringMetric = new ChapmanMeanLength(); break; case AlgorithmTypes.CosineSimilarity: stringMetric = new CosineSimilarity(); break; case AlgorithmTypes.DiceSimilarity: stringMetric = new DiceSimilarity(); break; case AlgorithmTypes.EuclideanDistance: stringMetric = new EuclideanDistance(); break; case AlgorithmTypes.JaccardSimilarity: stringMetric = new JaccardSimilarity(); break; case AlgorithmTypes.Jaro: stringMetric = new Jaro(); break; case AlgorithmTypes.JaroWinkler: stringMetric = new JaroWinkler(); break; case AlgorithmTypes.Levenstein: stringMetric = new Levenstein(); break; case AlgorithmTypes.MatchingCoefficient: stringMetric = new MatchingCoefficient(); break; case AlgorithmTypes.MongeElkan: stringMetric = new MongeElkan(); break; case AlgorithmTypes.NeedlemanWunch: stringMetric = new NeedlemanWunch(); break; case AlgorithmTypes.OverlapCoefficient: stringMetric = new OverlapCoefficient(); break; case AlgorithmTypes.QGramsDistance: stringMetric = new QGramsDistance(); break; case AlgorithmTypes.SmithWaterman: stringMetric = new SmithWaterman(); break; case AlgorithmTypes.SmithWatermanGotoh: stringMetric = new SmithWatermanGotoh(); break; case AlgorithmTypes.SmithWatermanGotohWindowedAffine: stringMetric = new SmithWatermanGotohWindowedAffine(); break; default: stringMetric = new SmithWatermanGotoh(); break; } var similarity = stringMetric.GetSimilarity(str1.Trim(), str2.Trim()); return(similarity); }
// [SetUp] public JaroAndJaroWinklerUnitTests() { LoadData(); _myJaro = new Jaro(); _myJaroWinkler = new JaroWinkler(); }
private void button1_Click(object sender, EventArgs e) { Existing_Method ex_m = new Existing_Method(); Jaro m1; JaroVincler m2; Levenstein m3; Stopwatch t = new Stopwatch(); double[] rez_mas = new double[20]; if (System.IO.File.Exists("data.txt")) { string[] mass = System.IO.File.ReadAllLines("data.txt"); System.IO.File.WriteAllText("ex_m.txt", ""); // Существующий тест t.Start(); foreach (string s in mass) { string[] str = s.Split(' '); System.IO.File.AppendAllText("ex_m.txt", ex_m.IndistinctMatching(4, str[0], str[1]).ToString() + Environment.NewLine); } t.Stop(); System.IO.File.AppendAllText("ex_m.txt", t.Elapsed.ToString()); System.IO.File.WriteAllText("m1.txt", ""); // Метод 1 t = new Stopwatch(); t.Start(); foreach (string s in mass) { string[] str = s.Split(' '); m1 = new Jaro(str[0], str[1]); System.IO.File.AppendAllText("m1.txt", (m1.Func() * 100).ToString() + Environment.NewLine); } t.Stop(); System.IO.File.AppendAllText("m1.txt", t.Elapsed.ToString()); System.IO.File.WriteAllText("m2.txt", ""); // Метод 2 t = new Stopwatch(); t.Start(); foreach (string s in mass) { string[] str = s.Split(' '); m2 = new JaroVincler(str[0], str[1]); if (m2.Func()) { System.IO.File.AppendAllText("m2.txt", "100" + Environment.NewLine); } else { System.IO.File.AppendAllText("m2.txt", "0" + Environment.NewLine); } } t.Stop(); System.IO.File.AppendAllText("m2.txt", t.Elapsed.ToString()); // Метод 3 System.IO.File.WriteAllText("m3.txt", ""); t = new Stopwatch(); t.Start(); foreach (string s in mass) { string[] str = s.Split(' '); m3 = new Levenstein(str[0], str[1]); System.IO.File.AppendAllText("m3.txt", (100 - m3.Func() * 100).ToString() + Environment.NewLine); } t.Stop(); System.IO.File.AppendAllText("m3.txt", t.Elapsed.ToString()); MessageBox.Show("Выполнено"); } }
private static void Main(string[] args) { /****************************************************/ /* BagOfTokensSimilarity */ /****************************************************/ // the recommend method for complex similarity on more words var bagOfTokens = new BagOfWordsSimilarity(); const string pattern = "John Smith"; const string targetText = "Mr. John Smith, Jr."; // using normalized string and tokenizer returns score 1.0 var resultingSim = bagOfTokens.GetSimilarity(new Tokenizer(new NormalizedString(pattern)), new Tokenizer(new NormalizedString(targetText))); /****************************************************/ /* Levenshtein */ /****************************************************/ const string nameCorrect = "martha"; const string nameError = "marhta"; // Levenshtein distance (implements interface IDistance) // & similarity (implements interface ISimilarity) var lev = new Levenshtein(); // returns edit distance 2 var distLev = lev.GetDistance(nameCorrect, nameError); // normalized string removes special symbols, diacritics and make case insensitivity // returns score 0.67 var simLev = lev.GetSimilarity(new NormalizedString(nameCorrect), new NormalizedString(nameError)); /****************************************************/ /* Damerau-Levenshtein */ /****************************************************/ // DamerauLevenshtein implements IDistance and ISimilarity var damLev = new DamerauLevenshtein(); // returns edit distance 1 var distDamLev = damLev.GetDistance(nameCorrect, nameError); // returns score 0.83 var simDamLev = damLev.GetSimilarity(nameCorrect, nameError); /****************************************************/ /* Jaro, Jaro-Winler */ /****************************************************/ // Jaro, Jaro-Winkler implements only ISimilarity var nameFirst = new Token("dwayne"); var nameSecond = new Token("duane"); var jaro = new Jaro(); var jaroWinkler = new JaroWinkler(); jaroWinkler.GetSimilarity(nameFirst, nameSecond); /****************************************************/ /* Q-grams coefficient */ /****************************************************/ // q-grams similarity coefficient - Dice, Jaccard, Overlap // with different q-grams type var diceUnigrams = new DiceCoefficient <Bigram>(); var jaccardBigrams = new JaccardCoefficient <Bigram>(); var overlapTrigrams = new OverlapCoefficient <Bigram>(); // returns score 0.5 var jaccardSim = jaccardBigrams.GetSimilarity(pattern, targetText); // returns score 0.67 var diceSim = diceUnigrams.GetSimilarity(pattern, targetText); // returns score 1.0 var overlapSim = overlapTrigrams.GetSimilarity(pattern, targetText); Console.ReadKey(); }
public void Initialize() { _jaro = new Jaro(); }
public void EmptyCases() { Assert.Equal(1, Jaro.Similarity("", "")); Assert.Equal(0, Jaro.Similarity("", "AB")); Assert.Equal(0, Jaro.Similarity("AB", "")); }