/// <summary> /// Инициализирует экспериментальные слова и их формы. /// </summary> /// <param name="words">Экспериментальные слова.</param> public void ProcessWords(List <string> words) { KeyWords = new List <string>(words); _words = new List <List <string> >(); for (int i = 0; i < words.Count; i++) { string wordsForms = BuildNounForms(Nouns.FindSimilar(words[i])); _words.Add(Utility.SplitExpression(wordsForms).ToList()); } Repetitions = new SortedDictionary <int, int> [KeyWords.Count]; TableOfCooccurrence = new Cooccurrence[KeyWords.Count, KeyWords.Count]; TripleOccurrences = new Dictionary <string, float>(); QuadrupleOccurrences = new Dictionary <string, float>(); OccurrenceByCases = new int[KeyWords.Count, 6]; for (int i = 0; i < KeyWords.Count; i++) { for (int j = 0; j < 6; j++) { OccurrenceByCases[i, j] = 0; } } }
/// <summary> /// Рассчитывает совместную встречаемость экспериментальных слов. /// </summary> /// <param name="contexts">Контексты, для которых рассчитывается совместная встречаемость.</param> public void CountingOfCoOccurrence(List <Context> contexts) { string curWord; bool found; int m, n; List <int> foundWords; List <int> distinctFoundWords; List <string> words; List <int[]> doubleCombinations; List <int[]> tripleCombinations; List <int[]> quadrupleCombinations; Dictionary <int, int> repetitions; for (int i = 0; i < KeyWords.Count; i++) { Repetitions[i] = new SortedDictionary <int, int>(); for (int j = 0; j < KeyWords.Count; j++) { TableOfCooccurrence[i, j] = new Cooccurrence(0); } } for (int i = 0; i < contexts.Count; i++) { foundWords = new List <int>(); for (int j = 0; j < contexts[i].Words.Count; j++) { curWord = contexts[i].Words[j]; found = false; m = 0; while (!found && m < _words.Count) { n = 0; while (!found && n < _words[m].Count) { if (curWord == _words[m][n]) { found = true; switch (n) { case 0: case 1: OccurrenceByCases[m, 0]++; break; case 2: case 3: OccurrenceByCases[m, 1]++; break; case 4: case 5: OccurrenceByCases[m, 2]++; break; case 6: case 7: OccurrenceByCases[m, 3]++; break; case 8: case 9: OccurrenceByCases[m, 4]++; break; case 10: case 11: OccurrenceByCases[m, 5]++; break; } } else { n++; } } m++; } if (found) { foundWords.Add(m - 1); } } repetitions = foundWords .Select(numOfWord => new { Num = numOfWord, Count = foundWords.Count(num => num == numOfWord) }) .Where(obj => obj.Count > 1) .Distinct() .ToDictionary(obj => obj.Num, obj => obj.Count); foreach (KeyValuePair <int, int> rep in repetitions) { if (Repetitions[rep.Key].ContainsKey(rep.Value)) { Repetitions[rep.Key][rep.Value]++; } else { Repetitions[rep.Key].Add(rep.Value, 1); } } distinctFoundWords = foundWords.Distinct().ToList(); if (distinctFoundWords.Count > 1) { doubleCombinations = Utility.GenerateCombinations(distinctFoundWords.Count, 2); foreach (int[] comb in doubleCombinations) { m = Math.Max(distinctFoundWords[comb[0]], distinctFoundWords[comb[1]]); n = Math.Min(distinctFoundWords[comb[0]], distinctFoundWords[comb[1]]); TableOfCooccurrence[m, n].CooccurCoeff += contexts[i].SizeCoeff; TableOfCooccurrence[m, n].Contexts.Add(i, contexts[i].SizeCoeff); } } if (distinctFoundWords.Count > 2) { tripleCombinations = Utility.GenerateCombinations(distinctFoundWords.Count, 3); foreach (int[] comb in tripleCombinations) { words = new List <string>(); for (int k = 0; k < comb.Length; k++) { words.Add(KeyWords[distinctFoundWords[comb[k]]]); } words = words.OrderBy(q => q).ToList(); string str = Utility.MakeString(words); if (TripleOccurrences.ContainsKey(str)) { TripleOccurrences[str] += contexts[i].SizeCoeff; } else { TripleOccurrences.Add(str, contexts[i].SizeCoeff); } } } if (distinctFoundWords.Count > 3) { quadrupleCombinations = Utility.GenerateCombinations(distinctFoundWords.Count, 4); foreach (int[] comb in quadrupleCombinations) { words = new List <string>(); for (int k = 0; k < comb.Length; k++) { words.Add(KeyWords[distinctFoundWords[comb[k]]]); } words = words.OrderBy(q => q).ToList(); string str = Utility.MakeString(words); if (QuadrupleOccurrences.ContainsKey(str)) { QuadrupleOccurrences[str] += contexts[i].SizeCoeff; } else { QuadrupleOccurrences.Add(str, contexts[i].SizeCoeff); } } } } }