Beispiel #1
0
        /*Stem the given word with, return the stemmed word
         */
        public List <string> Stem(string word)
        {
            if (pipeline == null)
            {
                Initial();
            }
            var lemmas = new List <String>();
            // create an empty Annotation just with the given text
            var document = new Annotation(word);

            // run all Annotators on this text
            try
            {
                pipeline.annotate(document);
            }
            catch (Exception)
            {
                return(null);
            }
            // Iterate over all of the sentences found
            var senObj    = new edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation();
            var obj       = document.get(senObj.getClass());
            var tokenObj  = new edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation();
            var lemmaObj  = new edu.stanford.nlp.ling.CoreAnnotations.LemmaAnnotation();
            var sentences = (ArrayList)obj;

            foreach (CoreMap sentence in sentences)
            {
                // Iterate over all tokens in a sentence
                lemmas.AddRange(from CoreLabel token in (ArrayList)sentence.get(tokenObj.getClass())
                                select(string) token.get(lemmaObj.getClass()));
            }
            return(lemmas);
        }
Beispiel #2
0
        public List <string> Tokenize(string sequence)
        {
            if (sequence == null)
            {
                throw new Exception("Sequence should not be null for tokenizer.");
            }
            if (pipeline == null)
            {
                Initial();
            }
            var document = new Annotation(sequence);

            pipeline.annotate(document);

            var tokens = (ArrayList)document.get(tokenObj.getClass());

            return((from CoreMap token in tokens select token.ToString()).ToList());
        }
Beispiel #3
0
        public void Parse(string sentence)
        {
            if (pipeline == null)
            {
                Initial();
            }
            Annotation context = new Annotation(sentence);

            pipeline.annotate(context);
            this.tokens = (ArrayList)context.get(tokenObj.getClass());
            var sentences = (ArrayList)context.get(senObj.getClass());

            foreach (CoreMap sen in sentences)
            {
                this.dependencies = (SemanticGraph)sen.get(depObj.getClass());
                break;
            }
        }
Beispiel #4
0
        /*Stem the given word with, return the stemmed word
         */
        public void FindNer(string context)
        {
            if (context == null)
            {
                throw new Exception("Input should not be null for finding NER!");
            }
            if (pipeline == null)
            {
                Initial();
            }
            // create an empty Annotation just with the given text
            var annotation = new Annotation(context);

            // run all Annotators on this text
            pipeline.annotate(annotation);
            var ners      = (edu.stanford.nlp.util.CoreMap)annotation.get(nerObj.getClass());
            var sentences = (ArrayList)annotation.get(new edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation().getClass());

            nerPairs = new List <Pair <string, string> >();
            foreach (CoreMap sentence in sentences)
            {
                var  prevNerToken = "O";
                var  currNerToken = "O";
                bool newToken     = true;
                var  buffer       = new StringBuilder();

                foreach (CoreLabel token in (ArrayList)sentence.get(tokenObj.getClass()))
                {
                    currNerToken = (string)token.get(nerObj.getClass());
                    var word = (string)token.get(textObj.getClass());
                    // Strip out "O"s completely, makes code below easier to understand
                    if (currNerToken.Equals("O"))
                    {
                        if (!prevNerToken.Equals("O") && (buffer.Length > 0))
                        {
                            if (prevNerToken.Equals("LOCATION") || prevNerToken.Equals("PERSON") || prevNerToken.Equals("ORGANIZATION"))
                            {
                                nerPairs.Add(new Pair <string, string>(buffer.ToString(), prevNerToken));
                            }
                            buffer.Clear();
                            newToken     = true;
                            prevNerToken = "O";
                        }
                        continue;
                    }
                    if (newToken)
                    {
                        prevNerToken = currNerToken;
                        newToken     = false;
                        buffer.Append(word);
                        continue;
                    }

                    if (currNerToken.Equals(prevNerToken))
                    {
                        buffer.Append(" " + word);
                    }
                    else
                    {
                        // We're done with the current entity - print it out and reset
                        // TODO save this token into an appropriate ADT to return for useful processing..
                        if (prevNerToken.Equals("LOCATION") || prevNerToken.Equals("PERSON") || prevNerToken.Equals("ORGANIZATION"))
                        {
                            nerPairs.Add(new Pair <string, string>(buffer.ToString(), prevNerToken));
                        }
                        buffer.Clear();
                        buffer.Append(word);
                        newToken = true;
                    }
                    prevNerToken = currNerToken;
                }
                if (!prevNerToken.Equals("O") && buffer.Length > 0)
                {
                    if (prevNerToken.Equals("LOCATION") || prevNerToken.Equals("PERSON") || prevNerToken.Equals("ORGANIZATION"))
                    {
                        nerPairs.Add(new Pair <string, string>(buffer.ToString(), prevNerToken));
                    }
                }
            }
        }