/// <summary> /// Computes word similarity /// </summary> /// <param name="wordA">The word a.</param> /// <param name="wordB">The word b.</param> /// <param name="equationEnum">The equation enum.</param> /// <param name="nGramSize">Size of the n gram.</param> /// <param name="nGramMode">The n gram mode.</param> /// <returns></returns> public static Double GetSimilarity(String wordA, String wordB, nGramsSimilarityEquationEnum equationEnum = nGramsSimilarityEquationEnum.JaccardIndex, Int32 nGramSize = 2, nGramsModeEnum nGramMode = nGramsModeEnum.overlap) { var ngram_A = getNGrams(wordA, nGramSize, nGramMode); var ngram_B = getNGrams(wordB, nGramSize, nGramMode); switch (equationEnum) { default: case nGramsSimilarityEquationEnum.JaccardIndex: return(GetJaccardIndex(ngram_A, ngram_B)); break; case nGramsSimilarityEquationEnum.DiceCoefficient: return(GetDiceCoefficient(ngram_A, ngram_B)); break; case nGramsSimilarityEquationEnum.continualOverlapRatio: return(GetContinualOverlapRatio(ngram_A, ngram_B)); break; } return(0); }
/// <summary> /// Computes word similarity /// </summary> /// <param name="wordA">The word a.</param> /// <param name="wordB">The word b.</param> /// <param name="equationEnum">The equation enum.</param> /// <param name="nGramSize">Size of the n gram.</param> /// <param name="nGramMode">The n gram mode.</param> /// <returns></returns> public Double GetSimilarity(List <T> setA, List <T> setB, nGramsSimilarityEquationEnum equationEnum = nGramsSimilarityEquationEnum.JaccardIndex) { switch (equationEnum) { default: case nGramsSimilarityEquationEnum.JaccardIndex: return(GetJaccardIndex(setA, setB)); break; case nGramsSimilarityEquationEnum.DiceCoefficient: return(GetDiceCoefficient(setA, setB)); break; case nGramsSimilarityEquationEnum.continualOverlapRatio: return(GetContinualOverlapRatio(setA, setB)); break; case nGramsSimilarityEquationEnum.KunchevaIndex: return(GetKunchevaIndex(setA, setB)); break; } return(0); }
public static Double GetSimilarity <T>(this List <T> setA, List <T> setB, nGramsSimilarityEquationEnum equationEnum = nGramsSimilarityEquationEnum.JaccardIndex) where T : IEquatable <T> { setAnalysisTools <T> tool = new setAnalysisTools <T>(); return(tool.GetSimilarity(setA, setB, equationEnum)); }
/// <summary> /// Gets the similarity. /// </summary> /// <typeparam name="T"></typeparam> /// <param name="sets">The sets.</param> /// <param name="equationEnum">The equation enum.</param> /// <returns></returns> public static Double GetSimilarity <T>(this IEnumerable <List <T> > sets, nGramsSimilarityEquationEnum equationEnum = nGramsSimilarityEquationEnum.JaccardIndex) where T : IEquatable <T> { List <List <T> > setList = sets.ToList(); Double l = setList.Count; if (l < 2) { return(0); } if (l == 2) { return(GetSimilarity <T>(setList[0], setList[1], equationEnum)); } Double k = 2 / (l * (l - 1)); Double s = 0; switch (equationEnum) { case nGramsSimilarityEquationEnum.continualOverlapRatio: case nGramsSimilarityEquationEnum.DiceCoefficient: Double division = 0; for (int i = 0; i < l - 1; i++) { for (int j = i + 1; j < l; j++) { s += GetSimilarity <T>(setList[i], setList[j], equationEnum); division++; } } s = s / division; break; case nGramsSimilarityEquationEnum.JaccardIndex: for (int i = 0; i < l - 1; i++) { for (int j = i + 1; j < l; j++) { s += GetSimilarity <T>(setList[i], setList[j], equationEnum); } } return(s * k); break; case nGramsSimilarityEquationEnum.KunchevaIndex: s = 0; setAnalysisTools <T> tool = new setAnalysisTools <T>(); List <T> completeDataset = new List <T>(); foreach (List <T> subset in setList) { completeDataset.AddRange(subset, true); } Int32 n_n = completeDataset.Count; var pairs = tool.getNGrams(setList, 2, nGramsModeEnum.overlap); foreach (var pair in pairs) { s += tool.GetKunchevaIndex(pair[0], pair[1], n_n); } s = s.GetRatio(pairs.Count); break; default: break; } return(s); }