Ejemplo n.º 1
0
        public double Calculate(string bigramm)
        {
            var bigrammWords = bigramm.Split(' ');
            if (bigrammWords.Count() != 2)
                throw new ArgumentException("Для оценки частоты биграммы необходимо два слова.");

            var otherBigrammsLeftLemm = _words.Where(t => t != bigrammWords[1]).Select(t => bigrammWords[0] + ' ' + t).ToList();
            var otherBigrammsRightLemm = _words.Where(t => t != bigrammWords[0]).Select(t => t + ' ' + bigrammWords[1]).ToList();
            var _wordsWithoutBigrammWords = _words.Where(t => !bigrammWords.Contains(t)).ToList();
            var otherBigramms = new List<string>(); //_wordsWithoutBigrammWords.SelectMany(t => _wordsWithoutBigrammWords, (t, w) => t + ' ' + w).ToList();
            for(int i = 0; i < _words.Count - 1; i++)
            {
                var otherBigrammText = _words[i] + " " + _words[i + 1];
                if (otherBigrammText != bigramm)
                    otherBigramms.Add(otherBigrammText);
            }

            Frequency freq = new Frequency(_words);
            var a = freq.GetByBigramm(bigramm);
            var b = otherBigrammsLeftLemm.Sum(t => freq.GetByBigramm(t));
            var c = otherBigrammsRightLemm.Sum(t => freq.GetByBigramm(t));
            var d = otherBigramms.Sum(t => freq.GetByBigramm(t));

            return a * Math.Log(a + 1)
                 + b * Math.Log(b + 1)
                 + c * Math.Log(c + 1)
                 + d * Math.Log(d + 1)
                 - (a + b) * Math.Log(a + b + 1)
                 - (a + c) * Math.Log(a + c + 1)
                 - (b + d) * Math.Log(b + d + 1)
                 - (c + d) * Math.Log(c + d + 1)
                 + (a + b + c + d) * Math.Log(a + b + c + d + 1);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Вычисляет коэффициент взаимной информации
        /// </summary>
        /// <param name="bigramm"></param>
        /// <returns></returns>
        public double Calculate(string bigramm)
        {
            var freq        = new Frequency(_words);
            var bigramWords = bigramm.Split(' ');

            var f1  = freq.GetByOneWord(bigramWords[0]);
            var f2  = freq.GetByOneWord(bigramWords[1]);
            var f12 = freq.GetByBigramm(bigramm);

            return(Math.Log(((double)f12 * N) / (f1 * f2), 2));
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Вычисляет коэффициент взаимной информации
        /// </summary>
        /// <param name="bigramm"></param>
        /// <returns></returns>
        public double Calculate(string bigramm)
        {
            var freq = new Frequency(_words);
            var bigramWords = bigramm.Split(' ');

            var f1 = freq.GetByOneWord(bigramWords[0]);
            var f2 = freq.GetByOneWord(bigramWords[1]);
            var f12 = freq.GetByBigramm(bigramm);

            return Math.Log(((double)f12 * N) / (f1 * f2), 2);
        }
Ejemplo n.º 4
0
        public double Calculate(string bigramm)
        {
            var bigrammWords = bigramm.Split(' ');

            if (bigrammWords.Count() != 2)
            {
                throw new ArgumentException("Для оценки частоты биграммы необходимо два слова.");
            }

            var otherBigrammsLeftLemm     = _words.Where(t => t != bigrammWords[1]).Select(t => bigrammWords[0] + ' ' + t).ToList();
            var otherBigrammsRightLemm    = _words.Where(t => t != bigrammWords[0]).Select(t => t + ' ' + bigrammWords[1]).ToList();
            var _wordsWithoutBigrammWords = _words.Where(t => !bigrammWords.Contains(t)).ToList();
            var otherBigramms             = new List <string>(); //_wordsWithoutBigrammWords.SelectMany(t => _wordsWithoutBigrammWords, (t, w) => t + ' ' + w).ToList();

            for (int i = 0; i < _words.Count - 1; i++)
            {
                var otherBigrammText = _words[i] + " " + _words[i + 1];
                if (otherBigrammText != bigramm)
                {
                    otherBigramms.Add(otherBigrammText);
                }
            }

            Frequency freq = new Frequency(_words);
            var       a    = freq.GetByBigramm(bigramm);
            var       b    = otherBigrammsLeftLemm.Sum(t => freq.GetByBigramm(t));
            var       c    = otherBigrammsRightLemm.Sum(t => freq.GetByBigramm(t));
            var       d    = otherBigramms.Sum(t => freq.GetByBigramm(t));

            return(a * Math.Log(a + 1)
                   + b * Math.Log(b + 1)
                   + c * Math.Log(c + 1)
                   + d * Math.Log(d + 1)
                   - (a + b) * Math.Log(a + b + 1)
                   - (a + c) * Math.Log(a + c + 1)
                   - (b + d) * Math.Log(b + d + 1)
                   - (c + d) * Math.Log(c + d + 1)
                   + (a + b + c + d) * Math.Log(a + b + c + d + 1));
        }
Ejemplo n.º 5
0
        public double Calculate(string bigramm)
        {
            var bigrammWords = bigramm.Split(' ');

            if (bigrammWords.Count() != 2)
            {
                throw new ArgumentException("Для оценки частоты биграммы необходимо два слова.");
            }

            Frequency freq = new Frequency(_words);
            double    f1   = freq.GetByOneWord(bigrammWords[0]);
            double    f2   = freq.GetByOneWord(bigrammWords[1]);
            double    f12  = freq.GetByBigramm(bigramm);

            return((f12 - f1 * f2 / (_words.Count - 1)) / Math.Pow(f12, 2));
        }