Пример #1
0
        private static double ActualBlockDistanceSimilarity(Collection <string> firstTokens, Collection <string> secondTokens)
        {
            TokeniserUtilities <string> _tokenUtilities = new TokeniserUtilities <string>();
            Collection <string>         collection      = _tokenUtilities.CreateMergedList(firstTokens, secondTokens);
            int num = 0;

            foreach (string str in collection)
            {
                int num2 = 0;
                int num3 = 0;
                if (firstTokens.Contains(str))
                {
                    num2++;
                }
                if (secondTokens.Contains(str))
                {
                    num3++;
                }
                if (num2 > num3)
                {
                    num += num2 - num3;
                }
                else
                {
                    num += num3 - num2;
                }
            }
            return(num);
        }
Пример #2
0
        public static double CosineSimilarity2(this string source, string target)
        {
            TokeniserUtilities <string> _tokenUtilities = new TokeniserUtilities <string>();

            if (source != null && target != null && _tokenUtilities.CreateMergedSet(_tokeniser.Tokenize(source), _tokeniser.Tokenize(target)).Count > 0)
            {
                return(_tokenUtilities.CommonSetTerms() / (Math.Pow(_tokenUtilities.FirstSetTokenCount, 0.5) * Math.Pow(_tokenUtilities.SecondSetTokenCount, 0.5)));
            }
            return(0.0);
        }
Пример #3
0
        public static Collection <string> TokenizeToSet(string word)
        {
            TokeniserUtilities <string> _tokenUtilities = new TokeniserUtilities <string>();

            if (word != null)
            {
                return(_tokenUtilities.CreateSet(Tokenize(word)));
            }
            return(null);
        }
Пример #4
0
        public static double OverlapCoefficientSimilarity(this string firstWord, string secondWord)
        {
            TokeniserUtilities <string> _tokenUtilities = new TokeniserUtilities <string>();

            if (firstWord != null && secondWord != null)
            {
                _tokenUtilities.CreateMergedSet(_tokeniser.Tokenize(firstWord), _tokeniser.Tokenize(secondWord));
                return(_tokenUtilities.CommonSetTerms() / (double)Math.Min(_tokenUtilities.FirstSetTokenCount, _tokenUtilities.SecondSetTokenCount));
            }
            return(DefaultMismatchScore);
        }
Пример #5
0
        public static double EuclideanSimilarity(this string source, string target)
        {
            TokeniserUtilities <int> _tokenUtilities = new TokeniserUtilities <int>();

            if (source != null && target != null)
            {
                double unnormalisedSimilarity = source.UnnormalisedEuclideanSimilarity(target);
                double num2 = Math.Sqrt(_tokenUtilities.FirstTokenCount + _tokenUtilities.SecondTokenCount);
                return((num2 - unnormalisedSimilarity) / num2);
            }
            return(DefaultMismatchScore);
        }
Пример #6
0
        //! NOT WORKING
        public static double MatchingCoefficientSimilarity(this string source, string target)
        {
            TokeniserUtilities <string> _tokenUtilities = new TokeniserUtilities <string>();

            if (source != null && target != null)
            {
                double unnormalisedSimilarity = source.UnnormalisedJaroSimilarity(target);
                int    num2 = Math.Max(_tokenUtilities.FirstTokenCount, _tokenUtilities.SecondTokenCount);
                return(unnormalisedSimilarity / num2);
            }
            return(0);
        }
Пример #7
0
        //private static readonly ITokeniser _tokeniser;
        //private static readonly TokeniserUtilities<string> => _tokenUtilities;
        //private static readonly _tokeniser => TokeniserWhitespace;


        private static double ActualMatchingCoefficientSimilarity(Collection <string> firstTokens, Collection <string> secondTokens)
        {
            TokeniserUtilities <string> _tokenUtilities = new TokeniserUtilities <string>();

            _tokenUtilities.CreateMergedList(firstTokens, secondTokens);
            int num = 0;

            foreach (string str in firstTokens)
            {
                if (secondTokens.Contains(str))
                {
                    num++;
                }
            }
            return(num);
        }
Пример #8
0
        //private readonly double _estimatedTimingConstant;
        //private readonly ITokeniser _tokeniser;
        //private readonly TokeniserUtilities<string> _tokenUtilities;

        //public EuclideanDistance() : this(new TokeniserWhitespace())
        //{
        //}

        //public EuclideanDistance(ITokeniser tokeniserToUse)
        //{
        //    _estimatedTimingConstant = 7.4457137088757008E-05;
        //    _tokeniser = tokeniserToUse;
        //    _tokenUtilities = new TokeniserUtilities<string>();
        //}

        private static double GetActualDistance(Collection <string> firstTokens, Collection <string> secondTokens)
        {
            TokeniserUtilities <string> _tokenUtilities = new TokeniserUtilities <string>();
            Collection <string>         collection      = _tokenUtilities.CreateMergedList(firstTokens, secondTokens);
            int num = 0;

            foreach (string str in collection)
            {
                int num2 = 0;
                int num3 = 0;
                if (firstTokens.Contains(str))
                {
                    num2++;
                }
                if (secondTokens.Contains(str))
                {
                    num3++;
                }
                num += (num2 - num3) * (num2 - num3);
            }
            return(Math.Sqrt(num));
        }