/// <summary>
        /// Extracts a Stanford-Word to a models.Word and adds to sentence
        /// </summary>
        /// <param name="stanfordWord"></param>
        /// <returns></returns>
        private bool ExtractWord(
            CoreMap stanfordWord, ref InternalSentence sentence)
        {
            uint offsetBegin, offsetEnd;

            if (!UInt32.TryParse(
                    stanfordWord.get(_charOffsetBeginAnnotationClass).ToString(),
                    out offsetBegin))
            {
                return(false);
            }

            if (!UInt32.TryParse(
                    stanfordWord.get(_charOffsetEndAnnotationClass).ToString(),
                    out offsetEnd))
            {
                return(false);
            }

            sentence.words.Add(new InternalWord()
            {
                annotatedText = stanfordWord.get(_textAnnotationClass).ToString(),
                //originalText = stanfordWord.get(_originalTextAnnotationClass).ToString(),
                originalText         = _rawData.Substring((int)offsetBegin, (int)offsetEnd - (int)offsetBegin),
                characterOffsetBegin = offsetBegin,
                characterOffsetEnd   = offsetEnd
            });

            return(true);
        }
        /// <summary>
        /// Gets stem text
        /// </summary>
        /// <param name="text">Text to stem</param>
        /// <returns>Text that is stemmed</returns>
        public string GetStemmedText(string text)
        {
            try
            {
                // Annotation
                var annotation = new Annotation(text);
                _pipeLine.annotate(annotation);

                // Sentence
                ArrayList sentences = annotation.get(_sentencesAnnotation.getClass()) as ArrayList;
                CoreMap   sentence  = sentences.get(0) as CoreMap;

                // Token
                ArrayList tokens = sentence.get(_tokensAnnotation.getClass()) as ArrayList;
                CoreLabel token  = tokens.get(0) as CoreLabel;

                // Lemma
                string lemma = token.get(_lemmaAnnotation.getClass()).ToString();

                return(lemma);
            }
            catch (Exception)
            {
                return(null);
            }
        }
        private static void ExtractKeyPhrases(CoreDocument coredoc, int id)
        {
            ArrayList sents = _analyzer.GetSents(coredoc);

            if (sents != null)
            {
                List <string> NP = new List <string>();
                for (int i = 0; i < sents.size(); i++)
                {
                    CoreMap     sentence   = (CoreMap)sents.get(i);
                    List <Tree> smallTrees = NPExtractor.getKeyPhrases((Tree)sentence.get(typeof(TreeCoreAnnotations.TreeAnnotation))).ToList();
                    foreach (var tree in smallTrees)
                    {
                        List leaves   = tree.getLeaves();
                        var  objarray = leaves.toArray();
                        //foreach (var obj in objarray)
                        //{
                        //    NP.Add(obj.ToString());
                        //}
                        string joinedNP = String.Join(" ", objarray);
                        NP.Add(joinedNP);
                    }
                }
                NounPhrases.Add(id, NP);
            }
        }
Beispiel #4
0
        /// <summary>Return a new <see cref="ParseResult"/> constructed from <paramref name="annotation"/></summary>
        internal ParseResult(Annotation annotation)
        {
            java.util.AbstractList sentences = annotation.get(SentencesAnnotationClass) as java.util.AbstractList;
            CoreMap sentence = sentences.get(0) as CoreMap;
            LabeledScoredTreeNode constituencyParse = sentence.get(TreeAnnotationClass) as LabeledScoredTreeNode;
            // Skip the ROOT
            Tree childOfRoot = constituencyParse.firstChild();

            Constituents = childOfRoot;
            Constituents.indexLeaves();

            // Build the collection of tokens
            var parsedTokens = sentence.get(TokensAnnotationClass) as java.util.AbstractList;
            var mentions     = sentence.get(MentionsAnnotationClass);

            for (int tokenIndex = 0; tokenIndex < parsedTokens.size(); tokenIndex++)
            {
                CoreLabel source        = parsedTokens.get(tokenIndex) as CoreLabel;
                var       tokenMentions = source.get(MentionTokenAnnotationClass);
                var       tokenGender   = source.get(GenderAnnotationClass);
                Tokens.Add(new ParseToken
                {
                    Index            = source.index(),
                    Word             = source.word(),
                    Lemma            = source.lemma(),
                    PartOfSpeech     = source.get(PartOfSpeechAnnotationClass) as string,
                    NamedEntityClass = source.get(NamedEntityTagAnnotationClass) as string,
                });
            }

            // Create the list of dependencies between tokens
            SemanticGraph dependencyGraph = sentence.get(DependencyAnnotationClass) as SemanticGraph;

            //java.util.List dependencies = dependencyGraph.edgeListSorted();
            java.util.Iterator dependencyGraphEdges = dependencyGraph.edgeIterable().iterator();
            while (dependencyGraphEdges.hasNext())
            {
                SemanticGraphEdge edge = dependencyGraphEdges.next() as SemanticGraphEdge;

                string      relationName      = edge.getRelation().getShortName();
                string      relationSpecifier = edge.getRelation().getSpecific();
                IndexedWord governor          = edge.getGovernor();
                IndexedWord dependent         = edge.getDependent();

                Dependencies.Add((relationName, relationSpecifier, governor.index(), dependent.index()));
            }
        }
        /// <summary>
        /// Extracts the sentence members of a Standford Sentence to a models
        /// sentence
        /// </summary>
        /// <param name="stanfordSentence"></param>
        /// <param name="sentence"></param>
        /// <returns></returns>
        private bool ExtractSentence(
            CoreMap stanfordSentence, ref InternalSentence sentence)
        {
            sentence.text = stanfordSentence.get(_textAnnotationClass).ToString();

            if (!UInt32.TryParse(
                    stanfordSentence.get(_charOffsetBeginAnnotationClass).ToString(),
                    out sentence.fileOffsetBegin))
            {
                return(false);
            }

            if (!UInt32.TryParse(
                    stanfordSentence.get(_charOffsetEndAnnotationClass).ToString(),
                    out sentence.fileOffsetEnd))
            {
                return(false);
            }

            return(true);
        }
        public void CreateParseTree(CoreDocument coredoc)
        {
            if (coredoc != null)
            {
                ArrayList sents = (ArrayList)coredoc.annotation().get(typeof(CoreAnnotations.SentencesAnnotation));
                for (int i = 0; i < sents.size(); i++)
                {
                    CoreMap sentence = (CoreMap)sents.get(i);

                    this.constituencyParse = (Tree)sentence.get(typeof(TreeCoreAnnotations.TreeAnnotation));

                    Set treeConstituents = (Set)constituencyParse.constituents(new LabeledScoredConstituentFactory());
                    treeArray = treeConstituents.toArray();
                }
            }
        }
        /// <summary>
        ///
        /// </summary>
        /// <param name="index"></param>
        /// <returns></returns>
        public bool GetSentence(uint index, ref InternalSentence sentence)
        {
            if ((index < 0) ||
                (index > (this.SentencesCount - 1)))
            {
                throw new IndexOutOfRangeException();
            }

            // extract the basic sentence fields
            CoreMap stanfordSentence = (CoreMap)_sentences.get((int)index);

            if (!ExtractSentence(stanfordSentence, ref sentence))
            {
                return(false);
            }

            // extract words
            java.util.ArrayList stanfordWords =
                (java.util.ArrayList)stanfordSentence.get(_tokensAnnotationClass);

            if (stanfordWords.size() <= 0)
            {
                return(true);
            }

            // a "minor" optimisation (capacity)
            sentence.words = new System.Collections.Generic.List <InternalWord>(
                stanfordWords.size());

            foreach (CoreMap stanfordWord in stanfordWords)
            {
                if (!ExtractWord(stanfordWord, ref sentence))
                {
                    // NOTE: currently, we fail the entire sentence even if a single
                    // word fails
                    return(false);
                }
            }
            ;

            return(true);
        }
 public Tree GetParseTree(CoreMap sent)
 {
     return((Tree)sent.get(typeof(TreeCoreAnnotations.TreeAnnotation)));
 }
        /// <summary>
        /// Pull the Sentiment annotation for this sentence.
        /// </summary>
        private int GetSentiment(CoreMap sentence)
        {
            var tree = (Tree)sentence.get(typeof(SentimentCoreAnnotations.SentimentAnnotatedTree)); //pull the annotated tree
            var sentiment = RNNCoreAnnotations.getPredictedClass(tree); //a score between 0-4, higher being more positive.  

            return sentiment;
        }