public void TestSimilarity() { var instance = new Jaccard(k: 2); // AB BC CD DE DF // 1 1 1 1 0 // 1 1 1 0 1 // => 3 / 5 = 0.6 var result = instance.Similarity("ABCDE", "ABCDF"); Assert.Equal(expected: 0.6, actual: result); }
///// <summary> ///// Calcualtes the Levenshtein distance between two strings ///// </summary> ///// Source: https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#C.23 ///// Explanation: https://en.wikipedia.org/wiki/Levenshtein_distance //private Int32 levenshtein(String a, String b) //{ // if (string.IsNullOrEmpty(a)) // { // if (!string.IsNullOrEmpty(b)) // { // return b.Length; // } // return 0; // } // if (string.IsNullOrEmpty(b)) // { // if (!string.IsNullOrEmpty(a)) // { // return a.Length; // } // return 0; // } // Int32 cost; // Int32[,] d = new int[a.Length + 1, b.Length + 1]; // Int32 min1; // Int32 min2; // Int32 min3; // for (Int32 i = 0; i <= d.GetUpperBound(0); i += 1) // { // d[i, 0] = i; // } // for (Int32 i = 0; i <= d.GetUpperBound(1); i += 1) // { // d[0, i] = i; // } // for (Int32 i = 1; i <= d.GetUpperBound(0); i += 1) // { // for (Int32 j = 1; j <= d.GetUpperBound(1); j += 1) // { // cost = Convert.ToInt32(!(a[i - 1] == b[j - 1])); // min1 = d[i - 1, j] + 1; // min2 = d[i, j - 1] + 1; // min3 = d[i - 1, j - 1] + cost; // d[i, j] = Math.Min(Math.Min(min1, min2), min3); // } // } // return d[d.GetUpperBound(0), d.GetUpperBound(1)]; //} ///// <summary> ///// String-similarity computed with levenshtein-distance ///// </summary> //private double similarityLevenshtein(string a, string b) //{ // if (a.Equals(b)) // { // return 1.0; // } // else // { // if (!(a.Length == 0 || b.Length == 0)) // { // double sim = 1 - (levenshtein(a, b) / Convert.ToDouble(Math.Min(a.Length, b.Length))); // return sim; // } // else // return 0.0; // } //} ///// <summary> ///// String-similarity computed with Dice Coefficient ///// </summary> ///// Source: https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Dice%27s_coefficient#C.23 ///// Explanation: https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient //private double similarityDiceCoefficient(string a, string b) //{ // //Workaround for |a| == |b| == 1 // if (a.Length <= 1 && b.Length <= 1) // { // if (a.Equals(b)) // return 1.0; // else // return 0.0; // } // HashSet<string> setA = new HashSet<string>(); // HashSet<string> setB = new HashSet<string>(); // for (int i = 0; i < a.Length - 1; ++i) // setA.Add(a.Substring(i, 2)); // for (int i = 0; i < b.Length - 1; ++i) // setB.Add(b.Substring(i, 2)); // HashSet<string> intersection = new HashSet<string>(setA); // intersection.IntersectWith(setB); // return (2.0 * intersection.Count) / (setA.Count + setB.Count); //} /// <summary> /// Combines multiple String-similarities with equal weight /// </summary> private double similarity(string a, string b) { List <double> similarities = new List <double>(); double output = 0.0; var l = new NormalizedLevenshtein(); similarities.Add(l.Similarity(a, b)); var jw = new JaroWinkler(); similarities.Add(jw.Similarity(a, b)); var jac = new Jaccard(); similarities.Add(jac.Similarity(a, b)); foreach (double sim in similarities) { output += sim; } return(output / similarities.Count); }
public static double GetSimilarityAverage(string str1, string str2) { return((cosine.Similarity(str1, str2) + jaccard.Similarity(str1, str2) + sorensenDice.Similarity(str1, str2)) / 3); }
private void Ok_Click(object sender, RoutedEventArgs e) { var format = new NumberFormatInfo(); format.NegativeSign = "-"; var valuesCol1 = (from row in mainWindow.gridData.AsEnumerable() select Convert.ToDouble(ConvertString(row.Field <string>(mainWindow.gridData.Columns[comboBoxCol1.SelectedItem.ToString()].ColumnName)), format)).ToArray(); var valuesCol2 = (from row in mainWindow.gridData.AsEnumerable() select Convert.ToDouble(ConvertString(row.Field <string>(mainWindow.gridData.Columns[comboBoxCol2.SelectedItem.ToString()].ColumnName)), format)).ToArray(); if (similarityMeasures.SelectedItem.ToString() == "Jaccard") { var jaccard = new Jaccard(); textBlock.Text += "\nPodobieństwo miary Jaccard pomiędzy kolumnami " + comboBoxCol1.SelectedItem.ToString() + " i " + comboBoxCol2.SelectedItem.ToString() + " wynosi: " + jaccard.Similarity(valuesCol1, valuesCol2).ToString(); } else if (similarityMeasures.SelectedItem.ToString() == "Korelacja Pearsona") { var pearsonCorrelation = new PearsonCorrelation(); textBlock.Text += "\nPodobieństwo miary Korelacja Pearsona pomiędzy kolumnami " + comboBoxCol1.SelectedItem.ToString() + " i " + comboBoxCol2.SelectedItem.ToString() + " wynosi: " + pearsonCorrelation.Similarity(valuesCol1, valuesCol2).ToString(); } else if (similarityMeasures.SelectedItem.ToString() == "Cosinus") { var cosine = new Cosine(); textBlock.Text += "\nPodobieństwo miary Cosinus pomiędzy kolumnami " + comboBoxCol1.SelectedItem.ToString() + " i " + comboBoxCol2.SelectedItem.ToString() + " wynosi: " + cosine.Similarity(valuesCol1, valuesCol2).ToString(); } }