public static Document Construct(this LightDocument document, IWordFactory factory) { var result = new Document(document.Text); result.Author = document.Author; result.DocumentTime = document.DocumentTime; result.Id = document.Id; document.Title = document.Title; foreach (var sentence in document.Sentences) { var resultSentence = new SentenceItem(sentence.Text); result.Add(resultSentence); if (sentence.Words != null) { for (var i = 0; i < sentence.Words.Length; i++) { var word = sentence.Words[i]; var wordItem = factory.CreateWord(word.Text, word.Tag); wordItem.WordIndex = i; WordEx wordData = WordExFactory.Construct(wordItem); wordData.Phrase = word.Phrase; resultSentence.Add(wordData); } } } return(result); }
public void Setup() { mockArffDataSet = ArffDataSet.Create <PositivityType>("Test"); instance = CreateProcessArff(); document = new Document("Test"); document.Sentences.Add(new SentenceItem("Test")); document.Sentences[0].Words.Add( WordExFactory.Construct( new TestWordItem("Good") { Stemmed = "Good", IsSentiment = true })); document.Sentences[0].Words.Add( WordExFactory.Construct( new TestWordItem("Two") { Stemmed = "Two" })); document.Sentences[0].Words.Add( WordExFactory.Construct( new TestWordItem("#Three") { Stemmed = "#Three" })); var factory = ActualWordsHandler.InstanceSimple.Container.Resolve <Func <Document, IParsedReviewManager> >()(document); review = factory.Create(); }
public void Setup() { document = new Document("Test"); document.Sentences.Add(new SentenceItem("Test")); document.Sentences[0].Words.Add(WordExFactory.Construct(new TestWordItem("Age"))); document.Sentences[0].Words.Add(WordExFactory.Construct(new TestWordItem("move"))); document.Sentences[0].Words.Add(WordExFactory.Construct(new TestWordItem("forest"))); }
public IEnumerable <IPhrase> GetPhrases(IWordItem word) { if (word == null) { throw new ArgumentNullException(nameof(word)); } log.LogDebug("GetPhrases {0}", word); IWordItem[] currentWords = word.Relationship.Part.Occurrences .Where(item => !item.CanNotBeFeature() && !item.IsSentiment).ToArray(); if (currentWords.Length <= 1) { yield break; } var all = string.Join(" ", currentWords.Select(item => item.Text).ToArray()); var wordIndex = Array.IndexOf(currentWords, word); if (wordIndex < 0) { log.LogDebug("{0} is not found in important list in <{1}>", word, all); yield break; } var nGramBlocks = new List <NGramBlock>(); var wordsTable = new Dictionary <WordEx, IWordItem>(); var words = new WordEx[currentWords.Length]; foreach (IWordItem item in currentWords) { WordEx wordEx = WordExFactory.Construct(item); words[wordsTable.Count] = wordEx; wordsTable[wordEx] = item; } nGramBlocks.AddRange(words.GetNearNGram(wordIndex, 3)); nGramBlocks.AddRange(words.GetNearNGram(wordIndex, 2)); foreach (NGramBlock nGramBlock in nGramBlocks) { IPhrase phrase = handler.CreatePhrase("NP"); foreach (WordEx occurence in nGramBlock.WordOccurrences) { phrase.Add(wordsTable[occurence]); } yield return(phrase); } }
public static Document Construct(this LightDocument document, IWordFactory factory) { var result = new Document(document.Text); result.Author = document.Author; result.DocumentTime = document.DocumentTime; result.Id = document.Id; document.Title = document.Title; foreach (var sentence in document.Sentences) { var resultSentence = new SentenceItem(sentence.Text); result.Add(resultSentence, false); if (sentence.Words != null) { for (var i = 0; i < sentence.Words.Length; i++) { var word = sentence.Words[i]; var wordItem = factory.CreateWord(word.Text, word.Tag); wordItem.WordIndex = i; WordEx wordData = WordExFactory.Construct(wordItem); wordData.Phrase = word.Phrase; if (!string.IsNullOrEmpty(word.Entity)) { if (entityCache.TryGetValue(word.Entity, out var entity)) { wordData.EntityType = entity; } else { wordData.CustomEntity = word.Entity; } } else { wordData.EntityType = NamedEntities.None; } resultSentence.Add(wordData); } } } return(result); }