コード例 #1
0
        /// <summary>
        /// Инициализирует экспериментальные слова и их формы.
        /// </summary>
        /// <param name="words">Экспериментальные слова.</param>
        public void ProcessWords(List <string> words)
        {
            KeyWords = new List <string>(words);
            _words   = new List <List <string> >();

            for (int i = 0; i < words.Count; i++)
            {
                string wordsForms = BuildNounForms(Nouns.FindSimilar(words[i]));
                _words.Add(Utility.SplitExpression(wordsForms).ToList());
            }
            Repetitions          = new SortedDictionary <int, int> [KeyWords.Count];
            TableOfCooccurrence  = new Cooccurrence[KeyWords.Count, KeyWords.Count];
            TripleOccurrences    = new Dictionary <string, float>();
            QuadrupleOccurrences = new Dictionary <string, float>();
            OccurrenceByCases    = new int[KeyWords.Count, 6];
            for (int i = 0; i < KeyWords.Count; i++)
            {
                for (int j = 0; j < 6; j++)
                {
                    OccurrenceByCases[i, j] = 0;
                }
            }
        }
コード例 #2
0
        /// <summary>
        /// Рассчитывает совместную встречаемость экспериментальных слов.
        /// </summary>
        /// <param name="contexts">Контексты, для которых рассчитывается совместная встречаемость.</param>
        public void CountingOfCoOccurrence(List <Context> contexts)
        {
            string                curWord;
            bool                  found;
            int                   m, n;
            List <int>            foundWords;
            List <int>            distinctFoundWords;
            List <string>         words;
            List <int[]>          doubleCombinations;
            List <int[]>          tripleCombinations;
            List <int[]>          quadrupleCombinations;
            Dictionary <int, int> repetitions;

            for (int i = 0; i < KeyWords.Count; i++)
            {
                Repetitions[i] = new SortedDictionary <int, int>();
                for (int j = 0; j < KeyWords.Count; j++)
                {
                    TableOfCooccurrence[i, j] = new Cooccurrence(0);
                }
            }

            for (int i = 0; i < contexts.Count; i++)
            {
                foundWords = new List <int>();
                for (int j = 0; j < contexts[i].Words.Count; j++)
                {
                    curWord = contexts[i].Words[j];
                    found   = false;
                    m       = 0;
                    while (!found && m < _words.Count)
                    {
                        n = 0;
                        while (!found && n < _words[m].Count)
                        {
                            if (curWord == _words[m][n])
                            {
                                found = true;
                                switch (n)
                                {
                                case 0:
                                case 1:
                                    OccurrenceByCases[m, 0]++;
                                    break;

                                case 2:
                                case 3:
                                    OccurrenceByCases[m, 1]++;
                                    break;

                                case 4:
                                case 5:
                                    OccurrenceByCases[m, 2]++;
                                    break;

                                case 6:
                                case 7:
                                    OccurrenceByCases[m, 3]++;
                                    break;

                                case 8:
                                case 9:
                                    OccurrenceByCases[m, 4]++;
                                    break;

                                case 10:
                                case 11:
                                    OccurrenceByCases[m, 5]++;
                                    break;
                                }
                            }
                            else
                            {
                                n++;
                            }
                        }
                        m++;
                    }
                    if (found)
                    {
                        foundWords.Add(m - 1);
                    }
                }

                repetitions = foundWords
                              .Select(numOfWord => new { Num = numOfWord, Count = foundWords.Count(num => num == numOfWord) })
                              .Where(obj => obj.Count > 1)
                              .Distinct()
                              .ToDictionary(obj => obj.Num, obj => obj.Count);

                foreach (KeyValuePair <int, int> rep in repetitions)
                {
                    if (Repetitions[rep.Key].ContainsKey(rep.Value))
                    {
                        Repetitions[rep.Key][rep.Value]++;
                    }
                    else
                    {
                        Repetitions[rep.Key].Add(rep.Value, 1);
                    }
                }

                distinctFoundWords = foundWords.Distinct().ToList();

                if (distinctFoundWords.Count > 1)
                {
                    doubleCombinations = Utility.GenerateCombinations(distinctFoundWords.Count, 2);
                    foreach (int[] comb in doubleCombinations)
                    {
                        m = Math.Max(distinctFoundWords[comb[0]], distinctFoundWords[comb[1]]);
                        n = Math.Min(distinctFoundWords[comb[0]], distinctFoundWords[comb[1]]);
                        TableOfCooccurrence[m, n].CooccurCoeff += contexts[i].SizeCoeff;
                        TableOfCooccurrence[m, n].Contexts.Add(i, contexts[i].SizeCoeff);
                    }
                }
                if (distinctFoundWords.Count > 2)
                {
                    tripleCombinations = Utility.GenerateCombinations(distinctFoundWords.Count, 3);
                    foreach (int[] comb in tripleCombinations)
                    {
                        words = new List <string>();
                        for (int k = 0; k < comb.Length; k++)
                        {
                            words.Add(KeyWords[distinctFoundWords[comb[k]]]);
                        }
                        words = words.OrderBy(q => q).ToList();
                        string str = Utility.MakeString(words);
                        if (TripleOccurrences.ContainsKey(str))
                        {
                            TripleOccurrences[str] += contexts[i].SizeCoeff;
                        }
                        else
                        {
                            TripleOccurrences.Add(str, contexts[i].SizeCoeff);
                        }
                    }
                }
                if (distinctFoundWords.Count > 3)
                {
                    quadrupleCombinations = Utility.GenerateCombinations(distinctFoundWords.Count, 4);
                    foreach (int[] comb in quadrupleCombinations)
                    {
                        words = new List <string>();
                        for (int k = 0; k < comb.Length; k++)
                        {
                            words.Add(KeyWords[distinctFoundWords[comb[k]]]);
                        }
                        words = words.OrderBy(q => q).ToList();
                        string str = Utility.MakeString(words);
                        if (QuadrupleOccurrences.ContainsKey(str))
                        {
                            QuadrupleOccurrences[str] += contexts[i].SizeCoeff;
                        }
                        else
                        {
                            QuadrupleOccurrences.Add(str, contexts[i].SizeCoeff);
                        }
                    }
                }
            }
        }