private static void AddWordInfo(Element wordInfo, ICoreMap token, int id, string curNS) { // store the position of this word in the sentence wordInfo.AddAttribute(new Attribute("id", int.ToString(id))); SetSingleElement(wordInfo, "word", curNS, token.Get(typeof(CoreAnnotations.TextAnnotation))); SetSingleElement(wordInfo, "lemma", curNS, token.Get(typeof(CoreAnnotations.LemmaAnnotation))); if (token.ContainsKey(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)) && token.ContainsKey(typeof(CoreAnnotations.CharacterOffsetEndAnnotation))) { SetSingleElement(wordInfo, "CharacterOffsetBegin", curNS, int.ToString(token.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)))); SetSingleElement(wordInfo, "CharacterOffsetEnd", curNS, int.ToString(token.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation)))); } if (token.ContainsKey(typeof(CoreAnnotations.PartOfSpeechAnnotation))) { SetSingleElement(wordInfo, "POS", curNS, token.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation))); } if (token.ContainsKey(typeof(CoreAnnotations.NamedEntityTagAnnotation))) { SetSingleElement(wordInfo, "NER", curNS, token.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation))); } if (token.ContainsKey(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation))) { SetSingleElement(wordInfo, "NormalizedNER", curNS, token.Get(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation))); } if (token.ContainsKey(typeof(CoreAnnotations.SpeakerAnnotation))) { SetSingleElement(wordInfo, "Speaker", curNS, token.Get(typeof(CoreAnnotations.SpeakerAnnotation))); } if (token.ContainsKey(typeof(TimeAnnotations.TimexAnnotation))) { Timex timex = token.Get(typeof(TimeAnnotations.TimexAnnotation)); Element timexElem = new Element("Timex", curNS); timexElem.AddAttribute(new Attribute("tid", timex.Tid())); timexElem.AddAttribute(new Attribute("type", timex.TimexType())); timexElem.AppendChild(timex.Value()); wordInfo.AppendChild(timexElem); } if (token.ContainsKey(typeof(CoreAnnotations.TrueCaseAnnotation))) { Element cur = new Element("TrueCase", curNS); cur.AppendChild(token.Get(typeof(CoreAnnotations.TrueCaseAnnotation))); wordInfo.AppendChild(cur); } if (token.ContainsKey(typeof(CoreAnnotations.TrueCaseTextAnnotation))) { Element cur = new Element("TrueCaseText", curNS); cur.AppendChild(token.Get(typeof(CoreAnnotations.TrueCaseTextAnnotation))); wordInfo.AppendChild(cur); } if (token.ContainsKey(typeof(SentimentCoreAnnotations.SentimentClass))) { Element cur = new Element("sentiment", curNS); cur.AppendChild(token.Get(typeof(SentimentCoreAnnotations.SentimentClass))); wordInfo.AppendChild(cur); } if (token.ContainsKey(typeof(CoreAnnotations.WikipediaEntityAnnotation))) { Element cur = new Element("entitylink", curNS); cur.AppendChild(token.Get(typeof(CoreAnnotations.WikipediaEntityAnnotation))); wordInfo.AppendChild(cur); } }
private static void DisplayAnnotation(PrintWriter @out, string query, Annotation anno, bool includeOffsets) { IList <ICoreMap> timexAnns = anno.Get(typeof(TimeAnnotations.TimexAnnotations)); IList <string> pieces = new List <string>(); IList <bool> tagged = new List <bool>(); int previousEnd = 0; foreach (ICoreMap timexAnn in timexAnns) { int begin = timexAnn.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)); int end = timexAnn.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation)); if (begin >= previousEnd) { pieces.Add(Sharpen.Runtime.Substring(query, previousEnd, begin)); tagged.Add(false); pieces.Add(Sharpen.Runtime.Substring(query, begin, end)); tagged.Add(true); previousEnd = end; } } if (previousEnd < query.Length) { pieces.Add(Sharpen.Runtime.Substring(query, previousEnd)); tagged.Add(false); } @out.Println("<table id='Annotated'><tr><td>"); for (int i = 0; i < pieces.Count; ++i) { if (tagged[i]) { @out.Print("<span style=\"background-color: #FF8888\">"); @out.Print(StringEscapeUtils.EscapeHtml4(pieces[i])); @out.Print("</span>"); } else { @out.Print(StringEscapeUtils.EscapeHtml4(pieces[i])); } } @out.Println("</td></tr></table>"); @out.Println("<h3>Temporal Expressions</h3>"); if (timexAnns.Count > 0) { @out.Println("<table>"); @out.Println("<tr><th>Text</th><th>Value</th>"); if (includeOffsets) { @out.Println("<th>Char Begin</th><th>Char End</th><th>Token Begin</th><th>Token End</th>"); } @out.Println("<th>Timex3 Tag</th></tr>"); foreach (ICoreMap timexAnn_1 in timexAnns) { @out.Println("<tr>"); Timex timex = timexAnn_1.Get(typeof(TimeAnnotations.TimexAnnotation)); int begin = timexAnn_1.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)); int end = timexAnn_1.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation)); @out.Print("<td>" + StringEscapeUtils.EscapeHtml4(Sharpen.Runtime.Substring(query, begin, end)) + "</td>"); @out.Print("<td>" + ((timex.Value() != null) ? StringEscapeUtils.EscapeHtml4(timex.Value()) : string.Empty) + "</td>"); if (includeOffsets) { @out.Print("<td>" + begin + "</td>"); @out.Print("<td>" + end + "</td>"); @out.Print("<td>" + timexAnn_1.Get(typeof(CoreAnnotations.TokenBeginAnnotation)) + "</td>"); @out.Print("<td>" + timexAnn_1.Get(typeof(CoreAnnotations.TokenEndAnnotation)) + "</td>"); } @out.Print("<td>" + StringEscapeUtils.EscapeHtml4(timex.ToString()) + "</td>"); @out.Println("</tr>"); } @out.Println("</table>"); } else { @out.Println("<em>No temporal expressions.</em>"); } @out.Println("<h3>POS Tags</h3>"); @out.Println("<table><tr><td>"); foreach (ICoreMap sentence in anno.Get(typeof(CoreAnnotations.SentencesAnnotation))) { IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); foreach (CoreLabel token in tokens) { string tokenOutput = StringEscapeUtils.EscapeHtml4(token.Word() + "/" + token.Tag()); @out.Print(tokenOutput + " "); } @out.Println("<br>"); } @out.Println("</td></tr></table>"); }
/// <summary>Link the given mention, if possible.</summary> /// <param name="mention"> /// The mention to link, as given by /// <see cref="EntityMentionsAnnotator"/> /// </param> /// <returns>The Wikidict entry for the given mention, or the normalized timex / numeric value -- as appropriate.</returns> public virtual Optional <string> Link(ICoreMap mention) { string surfaceForm = mention.Get(typeof(CoreAnnotations.OriginalTextAnnotation)) == null?mention.Get(typeof(CoreAnnotations.TextAnnotation)) : mention.Get(typeof(CoreAnnotations.OriginalTextAnnotation)); // set up key for wikidict ; if caseless use lower case version of surface form string mentionSurfaceFormKey; if (wikidictCaseless) { mentionSurfaceFormKey = surfaceForm.ToLower(); } else { mentionSurfaceFormKey = surfaceForm; } // get ner string ner = mention.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)); if (ner != null && (Sharpen.Runtime.EqualsIgnoreCase(KBPRelationExtractor.NERTag.Date.name, ner) || Sharpen.Runtime.EqualsIgnoreCase("TIME", ner) || Sharpen.Runtime.EqualsIgnoreCase("SET", ner)) && mention.Get(typeof(TimeAnnotations.TimexAnnotation )) != null && mention.Get(typeof(TimeAnnotations.TimexAnnotation)).Value() != null) { // Case: normalize dates Timex timex = mention.Get(typeof(TimeAnnotations.TimexAnnotation)); if (timex.Value() != null && !timex.Value().Equals("PRESENT") && !timex.Value().Equals("PRESENT_REF") && !timex.Value().Equals("PAST") && !timex.Value().Equals("PAST_REF") && !timex.Value().Equals("FUTURE") && !timex.Value().Equals("FUTURE_REF" )) { return(Optional.Of(NormalizeTimex(timex.Value()))); } else { return(Optional.Empty()); } } else { if (ner != null && Sharpen.Runtime.EqualsIgnoreCase("ORDINAL", ner) && mention.Get(typeof(CoreAnnotations.NumericValueAnnotation)) != null) { // Case: normalize ordinals Number numericValue = mention.Get(typeof(CoreAnnotations.NumericValueAnnotation)); return(Optional.Of(numericValue.ToString())); } else { if (NumberPattern.Matcher(surfaceForm).Matches()) { // Case: keep numbers as is return(Optional.Of(surfaceForm)); } else { if (ner != null && !"O".Equals(ner) && dictionary.Contains(mentionSurfaceFormKey)) { // Case: link with Wikidict return(Optional.Of(dictionary[mentionSurfaceFormKey])); } else { // Else: keep the surface form as is return(Optional.Empty()); } } } } }