private static double ActualBlockDistanceSimilarity(Collection <string> firstTokens, Collection <string> secondTokens) { TokeniserUtilities <string> _tokenUtilities = new TokeniserUtilities <string>(); Collection <string> collection = _tokenUtilities.CreateMergedList(firstTokens, secondTokens); int num = 0; foreach (string str in collection) { int num2 = 0; int num3 = 0; if (firstTokens.Contains(str)) { num2++; } if (secondTokens.Contains(str)) { num3++; } if (num2 > num3) { num += num2 - num3; } else { num += num3 - num2; } } return(num); }
public static double CosineSimilarity2(this string source, string target) { TokeniserUtilities <string> _tokenUtilities = new TokeniserUtilities <string>(); if (source != null && target != null && _tokenUtilities.CreateMergedSet(_tokeniser.Tokenize(source), _tokeniser.Tokenize(target)).Count > 0) { return(_tokenUtilities.CommonSetTerms() / (Math.Pow(_tokenUtilities.FirstSetTokenCount, 0.5) * Math.Pow(_tokenUtilities.SecondSetTokenCount, 0.5))); } return(0.0); }
public static Collection <string> TokenizeToSet(string word) { TokeniserUtilities <string> _tokenUtilities = new TokeniserUtilities <string>(); if (word != null) { return(_tokenUtilities.CreateSet(Tokenize(word))); } return(null); }
public static double OverlapCoefficientSimilarity(this string firstWord, string secondWord) { TokeniserUtilities <string> _tokenUtilities = new TokeniserUtilities <string>(); if (firstWord != null && secondWord != null) { _tokenUtilities.CreateMergedSet(_tokeniser.Tokenize(firstWord), _tokeniser.Tokenize(secondWord)); return(_tokenUtilities.CommonSetTerms() / (double)Math.Min(_tokenUtilities.FirstSetTokenCount, _tokenUtilities.SecondSetTokenCount)); } return(DefaultMismatchScore); }
public static double EuclideanSimilarity(this string source, string target) { TokeniserUtilities <int> _tokenUtilities = new TokeniserUtilities <int>(); if (source != null && target != null) { double unnormalisedSimilarity = source.UnnormalisedEuclideanSimilarity(target); double num2 = Math.Sqrt(_tokenUtilities.FirstTokenCount + _tokenUtilities.SecondTokenCount); return((num2 - unnormalisedSimilarity) / num2); } return(DefaultMismatchScore); }
//! NOT WORKING public static double MatchingCoefficientSimilarity(this string source, string target) { TokeniserUtilities <string> _tokenUtilities = new TokeniserUtilities <string>(); if (source != null && target != null) { double unnormalisedSimilarity = source.UnnormalisedJaroSimilarity(target); int num2 = Math.Max(_tokenUtilities.FirstTokenCount, _tokenUtilities.SecondTokenCount); return(unnormalisedSimilarity / num2); } return(0); }
//private static readonly ITokeniser _tokeniser; //private static readonly TokeniserUtilities<string> => _tokenUtilities; //private static readonly _tokeniser => TokeniserWhitespace; private static double ActualMatchingCoefficientSimilarity(Collection <string> firstTokens, Collection <string> secondTokens) { TokeniserUtilities <string> _tokenUtilities = new TokeniserUtilities <string>(); _tokenUtilities.CreateMergedList(firstTokens, secondTokens); int num = 0; foreach (string str in firstTokens) { if (secondTokens.Contains(str)) { num++; } } return(num); }
//private readonly double _estimatedTimingConstant; //private readonly ITokeniser _tokeniser; //private readonly TokeniserUtilities<string> _tokenUtilities; //public EuclideanDistance() : this(new TokeniserWhitespace()) //{ //} //public EuclideanDistance(ITokeniser tokeniserToUse) //{ // _estimatedTimingConstant = 7.4457137088757008E-05; // _tokeniser = tokeniserToUse; // _tokenUtilities = new TokeniserUtilities<string>(); //} private static double GetActualDistance(Collection <string> firstTokens, Collection <string> secondTokens) { TokeniserUtilities <string> _tokenUtilities = new TokeniserUtilities <string>(); Collection <string> collection = _tokenUtilities.CreateMergedList(firstTokens, secondTokens); int num = 0; foreach (string str in collection) { int num2 = 0; int num3 = 0; if (firstTokens.Contains(str)) { num2++; } if (secondTokens.Contains(str)) { num3++; } num += (num2 - num3) * (num2 - num3); } return(Math.Sqrt(num)); }