Exemplo n.º 1
0
        public static Document Construct(this LightDocument document, IWordFactory factory)
        {
            var result = new Document(document.Text);

            result.Author       = document.Author;
            result.DocumentTime = document.DocumentTime;
            result.Id           = document.Id;
            document.Title      = document.Title;

            foreach (var sentence in document.Sentences)
            {
                var resultSentence = new SentenceItem(sentence.Text);
                result.Add(resultSentence);
                if (sentence.Words != null)
                {
                    for (var i = 0; i < sentence.Words.Length; i++)
                    {
                        var word     = sentence.Words[i];
                        var wordItem = factory.CreateWord(word.Text, word.Tag);
                        wordItem.WordIndex = i;
                        WordEx wordData = WordExFactory.Construct(wordItem);
                        wordData.Phrase = word.Phrase;
                        resultSentence.Add(wordData);
                    }
                }
            }

            return(result);
        }
Exemplo n.º 2
0
        public void Setup()
        {
            mockArffDataSet = ArffDataSet.Create <PositivityType>("Test");
            instance        = CreateProcessArff();
            document        = new Document("Test");
            document.Sentences.Add(new SentenceItem("Test"));
            document.Sentences[0].Words.Add(
                WordExFactory.Construct(
                    new TestWordItem("Good")
            {
                Stemmed     = "Good",
                IsSentiment = true
            }));
            document.Sentences[0].Words.Add(
                WordExFactory.Construct(
                    new TestWordItem("Two")
            {
                Stemmed = "Two"
            }));
            document.Sentences[0].Words.Add(
                WordExFactory.Construct(
                    new TestWordItem("#Three")
            {
                Stemmed = "#Three"
            }));
            var factory = ActualWordsHandler.InstanceSimple.Container.Resolve <Func <Document, IParsedReviewManager> >()(document);

            review = factory.Create();
        }
Exemplo n.º 3
0
 public void Setup()
 {
     document = new Document("Test");
     document.Sentences.Add(new SentenceItem("Test"));
     document.Sentences[0].Words.Add(WordExFactory.Construct(new TestWordItem("Age")));
     document.Sentences[0].Words.Add(WordExFactory.Construct(new TestWordItem("move")));
     document.Sentences[0].Words.Add(WordExFactory.Construct(new TestWordItem("forest")));
 }
Exemplo n.º 4
0
        public IEnumerable <IPhrase> GetPhrases(IWordItem word)
        {
            if (word == null)
            {
                throw new ArgumentNullException(nameof(word));
            }

            log.LogDebug("GetPhrases {0}", word);
            IWordItem[] currentWords = word.Relationship.Part.Occurrences
                                       .Where(item => !item.CanNotBeFeature() && !item.IsSentiment).ToArray();

            if (currentWords.Length <= 1)
            {
                yield break;
            }

            var all       = string.Join(" ", currentWords.Select(item => item.Text).ToArray());
            var wordIndex = Array.IndexOf(currentWords, word);

            if (wordIndex < 0)
            {
                log.LogDebug("{0} is not found in important list in <{1}>", word, all);
                yield break;
            }

            var nGramBlocks = new List <NGramBlock>();
            var wordsTable  = new Dictionary <WordEx, IWordItem>();
            var words       = new WordEx[currentWords.Length];

            foreach (IWordItem item in currentWords)
            {
                WordEx wordEx = WordExFactory.Construct(item);
                words[wordsTable.Count] = wordEx;
                wordsTable[wordEx]      = item;
            }

            nGramBlocks.AddRange(words.GetNearNGram(wordIndex, 3));
            nGramBlocks.AddRange(words.GetNearNGram(wordIndex, 2));
            foreach (NGramBlock nGramBlock in nGramBlocks)
            {
                IPhrase phrase = handler.CreatePhrase("NP");
                foreach (WordEx occurence in nGramBlock.WordOccurrences)
                {
                    phrase.Add(wordsTable[occurence]);
                }

                yield return(phrase);
            }
        }
Exemplo n.º 5
0
        public static Document Construct(this LightDocument document, IWordFactory factory)
        {
            var result = new Document(document.Text);

            result.Author       = document.Author;
            result.DocumentTime = document.DocumentTime;
            result.Id           = document.Id;
            document.Title      = document.Title;

            foreach (var sentence in document.Sentences)
            {
                var resultSentence = new SentenceItem(sentence.Text);
                result.Add(resultSentence, false);
                if (sentence.Words != null)
                {
                    for (var i = 0; i < sentence.Words.Length; i++)
                    {
                        var word     = sentence.Words[i];
                        var wordItem = factory.CreateWord(word.Text, word.Tag);
                        wordItem.WordIndex = i;
                        WordEx wordData = WordExFactory.Construct(wordItem);
                        wordData.Phrase = word.Phrase;
                        if (!string.IsNullOrEmpty(word.Entity))
                        {
                            if (entityCache.TryGetValue(word.Entity, out var entity))
                            {
                                wordData.EntityType = entity;
                            }
                            else
                            {
                                wordData.CustomEntity = word.Entity;
                            }
                        }
                        else
                        {
                            wordData.EntityType = NamedEntities.None;
                        }

                        resultSentence.Add(wordData);
                    }
                }
            }

            return(result);
        }