Beispiel #1
0
        /// <summary>
        /// Get sentences from the given message text
        /// </summary>
        /// <param name="text">The message text</param>
        /// <returns>A list with the sentences</returns>
        public List <object> GetSentences(string text)
        {
            PerformanceTester.StartMET("GetSentences");
            var annotation = new Annotation(text);

            Tagger.annotate(annotation);
            Sentences = annotation.get(new CoreAnnotations.SentencesAnnotation().getClass()) as ArrayList;
            PerformanceTester.StopMET("GetSentences");
            return(new List <object>(Sentences.toArray()));
        }
Beispiel #2
0
        /// <summary>
        /// Tag the given message text
        /// </summary>
        /// <param name="text">The message text</param>
        /// <returns>A list with the tagged words</returns>
        public List <TaggedWord> Tag(string text, bool useSavedSentences = false)
        {
            PerformanceTester.StartMET("Tagging");
            var list = new List <TaggedWord>();

            ArrayList sentences = null;

            if (useSavedSentences && Sentences != null)
            {
                sentences = Sentences;
            }
            else
            {
                var annotation = new Annotation(text);
                Tagger.annotate(annotation);
                sentences = annotation.get(new CoreAnnotations.SentencesAnnotation().getClass()) as ArrayList;
            }
            foreach (CoreMap sentence in sentences)
            {
                var tokens = sentence.get(new
                                          CoreAnnotations.TokensAnnotation().getClass()) as ArrayList;
                foreach (CoreLabel token in tokens)
                {
                    var original = token.get(new CoreAnnotations.OriginalTextAnnotation().getClass());
                    var after    = token.get(new CoreAnnotations.AfterAnnotation().getClass());
                    var before   = token.get(new CoreAnnotations.BeforeAnnotation().getClass());
                    var word     = token.get(new CoreAnnotations.TextAnnotation().getClass());
                    var pos      = token.get(new CoreAnnotations.PartOfSpeechAnnotation().getClass());
                    //var ner = token.get(new CoreAnnotations.NamedEntityTagAnnotation().getClass());
                    //var lemma = token.get(new CoreAnnotations.LemmaAnnotation().getClass());

                    var taggedWord = new TaggedWord()
                    {
                        Word     = word.ToString(),
                        Original = original.ToString(),
                        WhiteSpaceCharacterAfter  = after.ToString(),
                        WhiteSpaceCharacterBefore = before.ToString(),
                        POSStringIdentifier       = pos.ToString(),
                        //Lemma = lemma.ToString(),
                        //NERStringIdentifier = ner.ToString()
                    };
                    list.Add(taggedWord);
                }
            }

            PerformanceTester.StopMET("Tagging");

            return(list);
        }