public IParsedReview Create() { if (review != null) { return(review); } review = new ParsedReview(nrcDictionary, document, manager.Context); foreach (var sentence in document.Sentences) { CreateSentence(sentence); IPhrase phrase = null; string phraseWord = null; for (var i = 0; i < sentence.Words.Count; i++) { var documentWord = sentence.Words[i]; if (documentWord.Phrase != null) { if (phraseWord != documentWord.Phrase) { phraseWord = documentWord.Phrase; phrase = documentWord.UnderlyingWord as IPhrase ?? wordsFactory.CreatePhrase(phraseWord); } } else { phrase = null; phraseWord = null; } // !! we need to create new - because if we use underlying // we can lose if words is changed to aspect IWordItem word = wordsFactory.CreateWord(documentWord.Text, documentWord.POS); word.NormalizedEntity = documentWord.NormalizedEntity; word.Entity = documentWord.EntityType; word.CustomEntity = documentWord.CustomEntity; word.WordIndex = i; AddWord(word, i == sentence.Words.Count - 1); phrase?.Add(word); } } foreach (var sentence in review.Sentences) { foreach (var phrase in sentence.Occurrences.GetPhrases().Where(item => item.AllWords.Count() > 1)) { phrase.IsSentiment = manager.IsSentiment(phrase); phrase.IsFeature = manager.IsFeature(phrase); phrase.IsTopAttribute = manager.IsAttribute(phrase); } } return(review); }
public IEnumerable <IPhrase> GetPhrases(IWordItem word) { if (word == null) { throw new ArgumentNullException(nameof(word)); } log.LogDebug("GetPhrases {0}", word); IWordItem[] currentWords = word.Relationship.Part.Occurrences .Where(item => !item.CanNotBeFeature() && !item.IsSentiment).ToArray(); if (currentWords.Length <= 1) { yield break; } var all = string.Join(" ", currentWords.Select(item => item.Text).ToArray()); var wordIndex = Array.IndexOf(currentWords, word); if (wordIndex < 0) { log.LogDebug("{0} is not found in important list in <{1}>", word, all); yield break; } var nGramBlocks = new List <NGramBlock>(); var wordsTable = new Dictionary <WordEx, IWordItem>(); var words = new WordEx[currentWords.Length]; foreach (IWordItem item in currentWords) { WordEx wordEx = WordExFactory.Construct(item); words[wordsTable.Count] = wordEx; wordsTable[wordEx] = item; } nGramBlocks.AddRange(words.GetNearNGram(wordIndex, 3)); nGramBlocks.AddRange(words.GetNearNGram(wordIndex, 2)); foreach (NGramBlock nGramBlock in nGramBlocks) { IPhrase phrase = handler.CreatePhrase("NP"); foreach (WordEx occurence in nGramBlock.WordOccurrences) { phrase.Add(wordsTable[occurence]); } yield return(phrase); } }