private static void AddWordInfo(Element wordInfo, ICoreMap token, int id, string curNS)
 {
     // store the position of this word in the sentence
     wordInfo.AddAttribute(new Attribute("id", int.ToString(id)));
     SetSingleElement(wordInfo, "word", curNS, token.Get(typeof(CoreAnnotations.TextAnnotation)));
     SetSingleElement(wordInfo, "lemma", curNS, token.Get(typeof(CoreAnnotations.LemmaAnnotation)));
     if (token.ContainsKey(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)) && token.ContainsKey(typeof(CoreAnnotations.CharacterOffsetEndAnnotation)))
     {
         SetSingleElement(wordInfo, "CharacterOffsetBegin", curNS, int.ToString(token.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation))));
         SetSingleElement(wordInfo, "CharacterOffsetEnd", curNS, int.ToString(token.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation))));
     }
     if (token.ContainsKey(typeof(CoreAnnotations.PartOfSpeechAnnotation)))
     {
         SetSingleElement(wordInfo, "POS", curNS, token.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)));
     }
     if (token.ContainsKey(typeof(CoreAnnotations.NamedEntityTagAnnotation)))
     {
         SetSingleElement(wordInfo, "NER", curNS, token.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)));
     }
     if (token.ContainsKey(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation)))
     {
         SetSingleElement(wordInfo, "NormalizedNER", curNS, token.Get(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation)));
     }
     if (token.ContainsKey(typeof(CoreAnnotations.SpeakerAnnotation)))
     {
         SetSingleElement(wordInfo, "Speaker", curNS, token.Get(typeof(CoreAnnotations.SpeakerAnnotation)));
     }
     if (token.ContainsKey(typeof(TimeAnnotations.TimexAnnotation)))
     {
         Timex   timex     = token.Get(typeof(TimeAnnotations.TimexAnnotation));
         Element timexElem = new Element("Timex", curNS);
         timexElem.AddAttribute(new Attribute("tid", timex.Tid()));
         timexElem.AddAttribute(new Attribute("type", timex.TimexType()));
         timexElem.AppendChild(timex.Value());
         wordInfo.AppendChild(timexElem);
     }
     if (token.ContainsKey(typeof(CoreAnnotations.TrueCaseAnnotation)))
     {
         Element cur = new Element("TrueCase", curNS);
         cur.AppendChild(token.Get(typeof(CoreAnnotations.TrueCaseAnnotation)));
         wordInfo.AppendChild(cur);
     }
     if (token.ContainsKey(typeof(CoreAnnotations.TrueCaseTextAnnotation)))
     {
         Element cur = new Element("TrueCaseText", curNS);
         cur.AppendChild(token.Get(typeof(CoreAnnotations.TrueCaseTextAnnotation)));
         wordInfo.AppendChild(cur);
     }
     if (token.ContainsKey(typeof(SentimentCoreAnnotations.SentimentClass)))
     {
         Element cur = new Element("sentiment", curNS);
         cur.AppendChild(token.Get(typeof(SentimentCoreAnnotations.SentimentClass)));
         wordInfo.AppendChild(cur);
     }
     if (token.ContainsKey(typeof(CoreAnnotations.WikipediaEntityAnnotation)))
     {
         Element cur = new Element("entitylink", curNS);
         cur.AppendChild(token.Get(typeof(CoreAnnotations.WikipediaEntityAnnotation)));
         wordInfo.AppendChild(cur);
     }
 }
        private static void DisplayAnnotation(PrintWriter @out, string query, Annotation anno, bool includeOffsets)
        {
            IList <ICoreMap> timexAnns = anno.Get(typeof(TimeAnnotations.TimexAnnotations));
            IList <string>   pieces    = new List <string>();
            IList <bool>     tagged    = new List <bool>();
            int previousEnd            = 0;

            foreach (ICoreMap timexAnn in timexAnns)
            {
                int begin = timexAnn.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation));
                int end   = timexAnn.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation));
                if (begin >= previousEnd)
                {
                    pieces.Add(Sharpen.Runtime.Substring(query, previousEnd, begin));
                    tagged.Add(false);
                    pieces.Add(Sharpen.Runtime.Substring(query, begin, end));
                    tagged.Add(true);
                    previousEnd = end;
                }
            }
            if (previousEnd < query.Length)
            {
                pieces.Add(Sharpen.Runtime.Substring(query, previousEnd));
                tagged.Add(false);
            }
            @out.Println("<table id='Annotated'><tr><td>");
            for (int i = 0; i < pieces.Count; ++i)
            {
                if (tagged[i])
                {
                    @out.Print("<span style=\"background-color: #FF8888\">");
                    @out.Print(StringEscapeUtils.EscapeHtml4(pieces[i]));
                    @out.Print("</span>");
                }
                else
                {
                    @out.Print(StringEscapeUtils.EscapeHtml4(pieces[i]));
                }
            }
            @out.Println("</td></tr></table>");
            @out.Println("<h3>Temporal Expressions</h3>");
            if (timexAnns.Count > 0)
            {
                @out.Println("<table>");
                @out.Println("<tr><th>Text</th><th>Value</th>");
                if (includeOffsets)
                {
                    @out.Println("<th>Char Begin</th><th>Char End</th><th>Token Begin</th><th>Token End</th>");
                }
                @out.Println("<th>Timex3 Tag</th></tr>");
                foreach (ICoreMap timexAnn_1 in timexAnns)
                {
                    @out.Println("<tr>");
                    Timex timex = timexAnn_1.Get(typeof(TimeAnnotations.TimexAnnotation));
                    int   begin = timexAnn_1.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation));
                    int   end   = timexAnn_1.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation));
                    @out.Print("<td>" + StringEscapeUtils.EscapeHtml4(Sharpen.Runtime.Substring(query, begin, end)) + "</td>");
                    @out.Print("<td>" + ((timex.Value() != null) ? StringEscapeUtils.EscapeHtml4(timex.Value()) : string.Empty) + "</td>");
                    if (includeOffsets)
                    {
                        @out.Print("<td>" + begin + "</td>");
                        @out.Print("<td>" + end + "</td>");
                        @out.Print("<td>" + timexAnn_1.Get(typeof(CoreAnnotations.TokenBeginAnnotation)) + "</td>");
                        @out.Print("<td>" + timexAnn_1.Get(typeof(CoreAnnotations.TokenEndAnnotation)) + "</td>");
                    }
                    @out.Print("<td>" + StringEscapeUtils.EscapeHtml4(timex.ToString()) + "</td>");
                    @out.Println("</tr>");
                }
                @out.Println("</table>");
            }
            else
            {
                @out.Println("<em>No temporal expressions.</em>");
            }
            @out.Println("<h3>POS Tags</h3>");
            @out.Println("<table><tr><td>");
            foreach (ICoreMap sentence in anno.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
                foreach (CoreLabel token in tokens)
                {
                    string tokenOutput = StringEscapeUtils.EscapeHtml4(token.Word() + "/" + token.Tag());
                    @out.Print(tokenOutput + " ");
                }
                @out.Println("<br>");
            }
            @out.Println("</td></tr></table>");
        }
Esempio n. 3
0
        /// <summary>Link the given mention, if possible.</summary>
        /// <param name="mention">
        /// The mention to link, as given by
        /// <see cref="EntityMentionsAnnotator"/>
        /// </param>
        /// <returns>The Wikidict entry for the given mention, or the normalized timex / numeric value -- as appropriate.</returns>
        public virtual Optional <string> Link(ICoreMap mention)
        {
            string surfaceForm = mention.Get(typeof(CoreAnnotations.OriginalTextAnnotation)) == null?mention.Get(typeof(CoreAnnotations.TextAnnotation)) : mention.Get(typeof(CoreAnnotations.OriginalTextAnnotation));

            // set up key for wikidict ; if caseless use lower case version of surface form
            string mentionSurfaceFormKey;

            if (wikidictCaseless)
            {
                mentionSurfaceFormKey = surfaceForm.ToLower();
            }
            else
            {
                mentionSurfaceFormKey = surfaceForm;
            }
            // get ner
            string ner = mention.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation));

            if (ner != null && (Sharpen.Runtime.EqualsIgnoreCase(KBPRelationExtractor.NERTag.Date.name, ner) || Sharpen.Runtime.EqualsIgnoreCase("TIME", ner) || Sharpen.Runtime.EqualsIgnoreCase("SET", ner)) && mention.Get(typeof(TimeAnnotations.TimexAnnotation
                                                                                                                                                                                                                                     )) != null && mention.Get(typeof(TimeAnnotations.TimexAnnotation)).Value() != null)
            {
                // Case: normalize dates
                Timex timex = mention.Get(typeof(TimeAnnotations.TimexAnnotation));
                if (timex.Value() != null && !timex.Value().Equals("PRESENT") && !timex.Value().Equals("PRESENT_REF") && !timex.Value().Equals("PAST") && !timex.Value().Equals("PAST_REF") && !timex.Value().Equals("FUTURE") && !timex.Value().Equals("FUTURE_REF"
                                                                                                                                                                                                                                                        ))
                {
                    return(Optional.Of(NormalizeTimex(timex.Value())));
                }
                else
                {
                    return(Optional.Empty());
                }
            }
            else
            {
                if (ner != null && Sharpen.Runtime.EqualsIgnoreCase("ORDINAL", ner) && mention.Get(typeof(CoreAnnotations.NumericValueAnnotation)) != null)
                {
                    // Case: normalize ordinals
                    Number numericValue = mention.Get(typeof(CoreAnnotations.NumericValueAnnotation));
                    return(Optional.Of(numericValue.ToString()));
                }
                else
                {
                    if (NumberPattern.Matcher(surfaceForm).Matches())
                    {
                        // Case: keep numbers as is
                        return(Optional.Of(surfaceForm));
                    }
                    else
                    {
                        if (ner != null && !"O".Equals(ner) && dictionary.Contains(mentionSurfaceFormKey))
                        {
                            // Case: link with Wikidict
                            return(Optional.Of(dictionary[mentionSurfaceFormKey]));
                        }
                        else
                        {
                            // Else: keep the surface form as is
                            return(Optional.Empty());
                        }
                    }
                }
            }
        }