Пример #1
0
        private void GenerateSentenceToken()
        {
            foreach (var paragraph in _paragraphTokens)
            {
                string[] sentence = GetSentences(paragraph.TextValue);
                SentenceCount += sentence.Length;
                for (int i = 0; i < sentence.Length; i++)
                {
                    if (!String.IsNullOrEmpty(sentence[i]))
                    {
                        SentenceToken sentenceToken = new SentenceToken
                        {
                            TextValue = sentence[i]
                        };


                        var words = _rsw.Execute(sentence[i].Tokenize());
                        WordTokenCount += words.Count;
                        for (int j = 0; j < words.Count; j++)
                        {
                            if (!String.IsNullOrEmpty(words[j]))
                            {
                                WordToken wt = new WordToken
                                {
                                    TextValue       = words[j],
                                    Lemma           = _rusStemmer.Stem(words[j]),
                                    ParagraphNumber = paragraph.Number
                                                      //NumberOfSentence = i
                                };
                                sentenceToken.ListOfWord.Add(wt);
                            }
                            if (BagOfLemm.ContainsKey(_rusStemmer.Stem(words[j])))
                            {
                                (BagOfLemm[_rusStemmer.Stem(words[j])] as WordCounter).CountInAllText++;
                                var n = sentenceToken.ListOfWord.Where(w => w.Lemma == _rusStemmer.Stem(words[j])).Count();
                                if (sentenceToken.ListOfWord.Where(w => w.Lemma == _rusStemmer.Stem(words[j])).Count() <= 1)
                                {
                                    BagOfLemm[_rusStemmer.Stem(words[j])].CountSentenceForThisWord++;
                                }
                            }
                            else
                            {
                                BagOfLemm.Add(_rusStemmer.Stem(words[j]), new WordCounter()
                                {
                                    CountInAllText = 1, CountSentenceForThisWord = 1, CounterParagraphForThisWord = 1
                                });
                            }
                            //TODO переделать
                        }
                        paragraph.ListOfSentence.Add(sentenceToken);
                    }
                }
            }
            //CalculateCountSentenceForWordToken();
        }
Пример #2
0
        public Sentence(string locale, TokenInfo[] tokens)
        {
            Locale = locale.ToLower();

            m_tokens = new SentenceToken[tokens.Length];
            m_nonWhitespaceIndices = new List <int>();
            var lstNonWhitespaceTokens = new List <SentenceToken>();

            for (int i = 0; i < tokens.Length; i++)
            {
                m_tokens[i] = new SentenceToken(tokens[i], this, i);

                var tokenValue = m_tokens[i].Value;

                if (String.IsNullOrEmpty(tokenValue))
                {
                    if (i == 0) // empty token at the begnning of the sentence is the sentence-start token
                    {
                        m_nonWhitespaceIndices.Add(i);
                        lstNonWhitespaceTokens.Add(m_tokens[i]);
                        continue;
                    }
                    else
                    {
                        continue;
                    }
                }

                if (!StringUtil.IsWhiteSpace(tokenValue[0]))
                {
                    m_nonWhitespaceIndices.Add(i);
                    lstNonWhitespaceTokens.Add(m_tokens[i]);
                }
            }

            m_nonWhitespaceTokens = lstNonWhitespaceTokens.ToArray();
        }