public double Calculate(string bigramm) { var bigrammWords = bigramm.Split(' '); if (bigrammWords.Count() != 2) throw new ArgumentException("Для оценки частоты биграммы необходимо два слова."); var otherBigrammsLeftLemm = _words.Where(t => t != bigrammWords[1]).Select(t => bigrammWords[0] + ' ' + t).ToList(); var otherBigrammsRightLemm = _words.Where(t => t != bigrammWords[0]).Select(t => t + ' ' + bigrammWords[1]).ToList(); var _wordsWithoutBigrammWords = _words.Where(t => !bigrammWords.Contains(t)).ToList(); var otherBigramms = new List<string>(); //_wordsWithoutBigrammWords.SelectMany(t => _wordsWithoutBigrammWords, (t, w) => t + ' ' + w).ToList(); for(int i = 0; i < _words.Count - 1; i++) { var otherBigrammText = _words[i] + " " + _words[i + 1]; if (otherBigrammText != bigramm) otherBigramms.Add(otherBigrammText); } Frequency freq = new Frequency(_words); var a = freq.GetByBigramm(bigramm); var b = otherBigrammsLeftLemm.Sum(t => freq.GetByBigramm(t)); var c = otherBigrammsRightLemm.Sum(t => freq.GetByBigramm(t)); var d = otherBigramms.Sum(t => freq.GetByBigramm(t)); return a * Math.Log(a + 1) + b * Math.Log(b + 1) + c * Math.Log(c + 1) + d * Math.Log(d + 1) - (a + b) * Math.Log(a + b + 1) - (a + c) * Math.Log(a + c + 1) - (b + d) * Math.Log(b + d + 1) - (c + d) * Math.Log(c + d + 1) + (a + b + c + d) * Math.Log(a + b + c + d + 1); }
/// <summary> /// Вычисляет коэффициент взаимной информации /// </summary> /// <param name="bigramm"></param> /// <returns></returns> public double Calculate(string bigramm) { var freq = new Frequency(_words); var bigramWords = bigramm.Split(' '); var f1 = freq.GetByOneWord(bigramWords[0]); var f2 = freq.GetByOneWord(bigramWords[1]); var f12 = freq.GetByBigramm(bigramm); return(Math.Log(((double)f12 * N) / (f1 * f2), 2)); }
/// <summary> /// Вычисляет коэффициент взаимной информации /// </summary> /// <param name="bigramm"></param> /// <returns></returns> public double Calculate(string bigramm) { var freq = new Frequency(_words); var bigramWords = bigramm.Split(' '); var f1 = freq.GetByOneWord(bigramWords[0]); var f2 = freq.GetByOneWord(bigramWords[1]); var f12 = freq.GetByBigramm(bigramm); return Math.Log(((double)f12 * N) / (f1 * f2), 2); }
public double Calculate(string bigramm) { var bigrammWords = bigramm.Split(' '); if (bigrammWords.Count() != 2) { throw new ArgumentException("Для оценки частоты биграммы необходимо два слова."); } var otherBigrammsLeftLemm = _words.Where(t => t != bigrammWords[1]).Select(t => bigrammWords[0] + ' ' + t).ToList(); var otherBigrammsRightLemm = _words.Where(t => t != bigrammWords[0]).Select(t => t + ' ' + bigrammWords[1]).ToList(); var _wordsWithoutBigrammWords = _words.Where(t => !bigrammWords.Contains(t)).ToList(); var otherBigramms = new List <string>(); //_wordsWithoutBigrammWords.SelectMany(t => _wordsWithoutBigrammWords, (t, w) => t + ' ' + w).ToList(); for (int i = 0; i < _words.Count - 1; i++) { var otherBigrammText = _words[i] + " " + _words[i + 1]; if (otherBigrammText != bigramm) { otherBigramms.Add(otherBigrammText); } } Frequency freq = new Frequency(_words); var a = freq.GetByBigramm(bigramm); var b = otherBigrammsLeftLemm.Sum(t => freq.GetByBigramm(t)); var c = otherBigrammsRightLemm.Sum(t => freq.GetByBigramm(t)); var d = otherBigramms.Sum(t => freq.GetByBigramm(t)); return(a * Math.Log(a + 1) + b * Math.Log(b + 1) + c * Math.Log(c + 1) + d * Math.Log(d + 1) - (a + b) * Math.Log(a + b + 1) - (a + c) * Math.Log(a + c + 1) - (b + d) * Math.Log(b + d + 1) - (c + d) * Math.Log(c + d + 1) + (a + b + c + d) * Math.Log(a + b + c + d + 1)); }
public double Calculate(string bigramm) { var bigrammWords = bigramm.Split(' '); if (bigrammWords.Count() != 2) { throw new ArgumentException("Для оценки частоты биграммы необходимо два слова."); } Frequency freq = new Frequency(_words); double f1 = freq.GetByOneWord(bigrammWords[0]); double f2 = freq.GetByOneWord(bigrammWords[1]); double f12 = freq.GetByBigramm(bigramm); return((f12 - f1 * f2 / (_words.Count - 1)) / Math.Pow(f12, 2)); }