Пример #1
0
        public IParsedReview Create()
        {
            if (review != null)
            {
                return(review);
            }

            review = new ParsedReview(nrcDictionary, document, manager.Context);
            foreach (var sentence in document.Sentences)
            {
                CreateSentence(sentence);
                IPhrase phrase     = null;
                string  phraseWord = null;
                for (var i = 0; i < sentence.Words.Count; i++)
                {
                    var documentWord = sentence.Words[i];
                    if (documentWord.Phrase != null)
                    {
                        if (phraseWord != documentWord.Phrase)
                        {
                            phraseWord = documentWord.Phrase;
                            phrase     = documentWord.UnderlyingWord as IPhrase ??
                                         wordsFactory.CreatePhrase(phraseWord);
                        }
                    }
                    else
                    {
                        phrase     = null;
                        phraseWord = null;
                    }

                    // !! we need to create new - because if we use underlying
                    // we can lose if words is changed to aspect
                    IWordItem word = wordsFactory.CreateWord(documentWord.Text, documentWord.POS);
                    word.NormalizedEntity = documentWord.NormalizedEntity;
                    word.Entity           = documentWord.EntityType;
                    word.CustomEntity     = documentWord.CustomEntity;
                    word.WordIndex        = i;
                    AddWord(word, i == sentence.Words.Count - 1);
                    phrase?.Add(word);
                }
            }

            foreach (var sentence in review.Sentences)
            {
                foreach (var phrase in sentence.Occurrences.GetPhrases().Where(item => item.AllWords.Count() > 1))
                {
                    phrase.IsSentiment    = manager.IsSentiment(phrase);
                    phrase.IsFeature      = manager.IsFeature(phrase);
                    phrase.IsTopAttribute = manager.IsAttribute(phrase);
                }
            }

            return(review);
        }
Пример #2
0
        public IEnumerable <IPhrase> GetPhrases(IWordItem word)
        {
            if (word == null)
            {
                throw new ArgumentNullException(nameof(word));
            }

            log.LogDebug("GetPhrases {0}", word);
            IWordItem[] currentWords = word.Relationship.Part.Occurrences
                                       .Where(item => !item.CanNotBeFeature() && !item.IsSentiment).ToArray();

            if (currentWords.Length <= 1)
            {
                yield break;
            }

            var all       = string.Join(" ", currentWords.Select(item => item.Text).ToArray());
            var wordIndex = Array.IndexOf(currentWords, word);

            if (wordIndex < 0)
            {
                log.LogDebug("{0} is not found in important list in <{1}>", word, all);
                yield break;
            }

            var nGramBlocks = new List <NGramBlock>();
            var wordsTable  = new Dictionary <WordEx, IWordItem>();
            var words       = new WordEx[currentWords.Length];

            foreach (IWordItem item in currentWords)
            {
                WordEx wordEx = WordExFactory.Construct(item);
                words[wordsTable.Count] = wordEx;
                wordsTable[wordEx]      = item;
            }

            nGramBlocks.AddRange(words.GetNearNGram(wordIndex, 3));
            nGramBlocks.AddRange(words.GetNearNGram(wordIndex, 2));
            foreach (NGramBlock nGramBlock in nGramBlocks)
            {
                IPhrase phrase = handler.CreatePhrase("NP");
                foreach (WordEx occurence in nGramBlock.WordOccurrences)
                {
                    phrase.Add(wordsTable[occurence]);
                }

                yield return(phrase);
            }
        }