Ejemplo n.º 1
0
        public double Calculate(string bigramm)
        {
            var bigrammWords = bigramm.Split(' ');
            if (bigrammWords.Count() != 2)
                throw new ArgumentException("Для оценки частоты биграммы необходимо два слова.");

            var otherBigrammsLeftLemm = _words.Where(t => t != bigrammWords[1]).Select(t => bigrammWords[0] + ' ' + t).ToList();
            var otherBigrammsRightLemm = _words.Where(t => t != bigrammWords[0]).Select(t => t + ' ' + bigrammWords[1]).ToList();
            var _wordsWithoutBigrammWords = _words.Where(t => !bigrammWords.Contains(t)).ToList();
            var otherBigramms = new List<string>(); //_wordsWithoutBigrammWords.SelectMany(t => _wordsWithoutBigrammWords, (t, w) => t + ' ' + w).ToList();
            for(int i = 0; i < _words.Count - 1; i++)
            {
                var otherBigrammText = _words[i] + " " + _words[i + 1];
                if (otherBigrammText != bigramm)
                    otherBigramms.Add(otherBigrammText);
            }

            Frequency freq = new Frequency(_words);
            var a = freq.GetByBigramm(bigramm);
            var b = otherBigrammsLeftLemm.Sum(t => freq.GetByBigramm(t));
            var c = otherBigrammsRightLemm.Sum(t => freq.GetByBigramm(t));
            var d = otherBigramms.Sum(t => freq.GetByBigramm(t));

            return a * Math.Log(a + 1)
                 + b * Math.Log(b + 1)
                 + c * Math.Log(c + 1)
                 + d * Math.Log(d + 1)
                 - (a + b) * Math.Log(a + b + 1)
                 - (a + c) * Math.Log(a + c + 1)
                 - (b + d) * Math.Log(b + d + 1)
                 - (c + d) * Math.Log(c + d + 1)
                 + (a + b + c + d) * Math.Log(a + b + c + d + 1);
        }
Ejemplo n.º 2
0
 public double Calculate(string word, int collectionLength, int usedCount)
 {
     Frequency freq = new Frequency(_words);
     double f = freq.GetByOneWord(word);
     double TF = f / _words.Count;
     double IDF = Math.Log((collectionLength - usedCount)/usedCount);
     return TF * IDF;
 }
Ejemplo n.º 3
0
        /// <summary>
        /// Вычисляет коэффициент взаимной информации
        /// </summary>
        /// <param name="bigramm"></param>
        /// <returns></returns>
        public double Calculate(string bigramm)
        {
            var freq = new Frequency(_words);
            var bigramWords = bigramm.Split(' ');

            var f1 = freq.GetByOneWord(bigramWords[0]);
            var f2 = freq.GetByOneWord(bigramWords[1]);
            var f12 = freq.GetByBigramm(bigramm);

            return Math.Log(((double)f12 * N) / (f1 * f2), 2);
        }