public override bool Equals(object o) { if (this == o) { return(true); } if (o == null || GetType() != o.GetType()) { return(false); } Timex timex = (Timex)o; if (beginPoint != timex.beginPoint) { return(false); } if (endPoint != timex.endPoint) { return(false); } if (type != null ? !type.Equals(timex.type) : timex.type != null) { return(false); } if (val != null ? !val.Equals(timex.val) : timex.val != null) { return(false); } return(true); }
private IList <HeidelTimeKBPAnnotator.HeidelTimeOutputReader.Node> ToNodeSequence(string output) { // First of all, get rid of all XML markup that HeidelTime inserts. output = timeMLOpen.Matcher(output).ReplaceAll(string.Empty).Trim(); output = timeMLClose.Matcher(output).ReplaceAll(string.Empty).Trim(); // Now go through and chunk sequence into <TIMEX3> tag regions. Matcher openMatcher = timexTagOpen.Matcher(output); Matcher attrMatcher = attr.Matcher(output); Matcher closeMatcher = timexTagClose.Matcher(output); IList <HeidelTimeKBPAnnotator.HeidelTimeOutputReader.Node> ret = new List <HeidelTimeKBPAnnotator.HeidelTimeOutputReader.Node>(); // TODO: save metadata of TIMEX token positions or stuff. int charIdx = 0; Dictionary <string, string> attrs = new Dictionary <string, string>(); while (openMatcher.Find(charIdx)) { int tagBegin = openMatcher.Start(); int tagBeginEnd = openMatcher.End(); // Add everything before this tagBegin to a node. if (charIdx < tagBegin) { ret.Add(new HeidelTimeKBPAnnotator.HeidelTimeOutputReader.Node(Sharpen.Runtime.Substring(output, charIdx, tagBegin), charIdx, tagBegin)); } attrs.Clear(); // Get the attributes while (attrMatcher.Find(tagBegin + 1) && attrMatcher.End() < tagBeginEnd) { attrs[attrMatcher.Group("key")] = attrMatcher.Group("value"); tagBegin = attrMatcher.End(); } // Ok, move to the close tag. bool matched = closeMatcher.Find(tagBeginEnd); System.Diagnostics.Debug.Assert(matched); // Assert statements are sometimes ignored. int tagEndBegin = closeMatcher.Start(); int tagEnd = closeMatcher.End(); string text = Sharpen.Runtime.Substring(output, tagBeginEnd, tagEndBegin); Timex timex = ToTimex(text, attrs); ret.Add(new HeidelTimeKBPAnnotator.HeidelTimeOutputReader.TimexNode(text, tagBeginEnd, tagEndBegin, timex)); charIdx = closeMatcher.End(); } // Add everything before this tagBegin to a node. to the if (charIdx < output.Length) { ret.Add(new HeidelTimeKBPAnnotator.HeidelTimeOutputReader.Node(Sharpen.Runtime.Substring(output, charIdx, output.Length), charIdx, output.Length)); } return(ret); }
// Used to create timex from XML (mainly for testing) public static Timex FromXml(string xml) { IElement element = XMLUtils.ParseElement(xml); if ("TIMEX3".Equals(element.GetNodeName())) { Timex t = new Timex(); // t.init(xml, element); // Doesn't preserve original input xml // Will reorder attributes of xml so can match xml of test timex and actual timex // (for which we can't control the order of the attributes now we don't use nu.xom...) t.Init(element); return(t); } else { throw new ArgumentException("Invalid timex xml: " + xml); } }
private static IList <ICoreMap> ToTimexCoreMaps(IElement docElem, ICoreMap originalDocument) { //--Collect Token Offsets IDictionary <int, int> beginMap = Generics.NewHashMap(); IDictionary <int, int> endMap = Generics.NewHashMap(); bool haveTokenOffsets = true; foreach (ICoreMap sent in originalDocument.Get(typeof(CoreAnnotations.SentencesAnnotation))) { foreach (CoreLabel token in sent.Get(typeof(CoreAnnotations.TokensAnnotation))) { int tokBegin = token.Get(typeof(CoreAnnotations.TokenBeginAnnotation)); int tokEnd = token.Get(typeof(CoreAnnotations.TokenEndAnnotation)); if (tokBegin == null || tokEnd == null) { haveTokenOffsets = false; } int charBegin = token.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)); int charEnd = token.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation)); beginMap[charBegin] = tokBegin; endMap[charEnd] = tokEnd; } } IList <ICoreMap> timexMaps = new List <ICoreMap>(); int offset = 0; INodeList docNodes = docElem.GetChildNodes(); for (int i = 0; i < docNodes.GetLength(); i++) { INode content = docNodes.Item(i); if (content is IText) { IText text = (IText)content; offset += text.GetWholeText().Length; } else { if (content is IElement) { IElement child = (IElement)content; if (child.GetNodeName().Equals("TIMEX3")) { Timex timex = new Timex(child); if (child.GetChildNodes().GetLength() != 1) { throw new Exception("TIMEX3 should only contain text " + child); } string timexText = child.GetTextContent(); ICoreMap timexMap = new ArrayCoreMap(); timexMap.Set(typeof(TimeAnnotations.TimexAnnotation), timex); timexMap.Set(typeof(CoreAnnotations.TextAnnotation), timexText); int charBegin = offset; timexMap.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), offset); offset += timexText.Length; timexMap.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), offset); int charEnd = offset; //(tokens) if (haveTokenOffsets) { int tokBegin = beginMap[charBegin]; int searchStep = 1; //if no exact match, search around the character offset while (tokBegin == null) { tokBegin = beginMap[charBegin - searchStep]; if (tokBegin == null) { tokBegin = beginMap[charBegin + searchStep]; } searchStep += 1; } searchStep = 1; int tokEnd = endMap[charEnd]; while (tokEnd == null) { tokEnd = endMap[charEnd - searchStep]; if (tokEnd == null) { tokEnd = endMap[charEnd + searchStep]; } searchStep += 1; } timexMap.Set(typeof(CoreAnnotations.TokenBeginAnnotation), tokBegin); timexMap.Set(typeof(CoreAnnotations.TokenEndAnnotation), tokEnd); } timexMaps.Add(timexMap); } else { throw new Exception("unexpected element " + child); } } else { throw new Exception("unexpected content " + content); } } } return(timexMaps); }
private IList <ICoreMap> ToCoreMaps(ICoreMap annotation, IList <TimeExpression> timeExpressions, SUTime.TimeIndex timeIndex) { if (timeExpressions == null) { return(null); } IList <ICoreMap> coreMaps = new List <ICoreMap>(timeExpressions.Count); foreach (TimeExpression te in timeExpressions) { ICoreMap cm = te.GetAnnotation(); SUTime.Temporal temporal = te.GetTemporal(); if (temporal != null) { string origText = annotation.Get(typeof(CoreAnnotations.TextAnnotation)); string text = cm.Get(typeof(CoreAnnotations.TextAnnotation)); if (origText != null) { // Make sure the text is from original (and not from concatenated tokens) ChunkAnnotationUtils.AnnotateChunkText(cm, annotation); text = cm.Get(typeof(CoreAnnotations.TextAnnotation)); } IDictionary <string, string> timexAttributes; try { timexAttributes = temporal.GetTimexAttributes(timeIndex); if (options.includeRange) { SUTime.Temporal rangeTemporal = temporal.GetRange(); if (rangeTemporal != null) { timexAttributes["range"] = rangeTemporal.ToString(); } } } catch (Exception e) { if (options.verbose) { logger.Warn("Failed to get attributes from " + text + ", timeIndex " + timeIndex); logger.Warn(e); } continue; } Timex timex; try { timex = Timex.FromMap(text, timexAttributes); } catch (Exception e) { if (options.verbose) { logger.Warn("Failed to process timex " + text + " with attributes " + timexAttributes); logger.Warn(e); } continue; } System.Diagnostics.Debug.Assert(timex != null); // Timex.fromMap never returns null and if it exceptions, we've already done a continue cm.Set(typeof(TimeAnnotations.TimexAnnotation), timex); coreMaps.Add(cm); } } return(coreMaps); }
/// <summary>Gets two Calendars, marking the beginning and ending of this Timex's range.</summary> /// <param name="documentTime"> /// The time the document containing this Timex was written. (Not /// necessary for resolving all Timex expressions. This may be /// <see langword="null"/> /// , but then relative time expressions cannot be /// resolved.) /// </param> /// <returns>The begin point and end point Calendars.</returns> public virtual Pair <Calendar, Calendar> GetRange(Timex documentTime) { if (this.val == null) { throw new NotSupportedException("no value specified for " + this); } else { // YYYYMMDD or YYYYMMDDT... where the time is concatenated directly with the // date if (val.Length >= 8 && Pattern.Matches("\\d\\d\\d\\d\\d\\d\\d\\d", Sharpen.Runtime.Substring(this.val, 0, 8))) { int year = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4)); int month = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 4, 6)); int day = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 6, 8)); return(new Pair <Calendar, Calendar>(MakeCalendar(year, month, day), MakeCalendar(year, month, day))); } else { // YYYY-MM-DD or YYYY-MM-DDT... if (val.Length >= 10 && Pattern.Matches("\\d\\d\\d\\d-\\d\\d-\\d\\d", Sharpen.Runtime.Substring(this.val, 0, 10))) { int year = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4)); int month = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 5, 7)); int day = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 8, 10)); return(new Pair <Calendar, Calendar>(MakeCalendar(year, month, day), MakeCalendar(year, month, day))); } else { // YYYYMMDDL+ if (Pattern.Matches("\\d\\d\\d\\d\\d\\d\\d\\d[A-Z]+", this.val)) { int year = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4)); int month = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 4, 6)); int day = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 6, 8)); return(new Pair <Calendar, Calendar>(MakeCalendar(year, month, day), MakeCalendar(year, month, day))); } else { // YYYYMM or YYYYMMT... if (val.Length >= 6 && Pattern.Matches("\\d\\d\\d\\d\\d\\d", Sharpen.Runtime.Substring(this.val, 0, 6))) { int year = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4)); int month = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 4, 6)); Calendar begin = MakeCalendar(year, month, 1); int lastDay = begin.GetActualMaximum(Calendar.Date); Calendar end = MakeCalendar(year, month, lastDay); return(new Pair <Calendar, Calendar>(begin, end)); } else { // YYYY-MM or YYYY-MMT... if (val.Length >= 7 && Pattern.Matches("\\d\\d\\d\\d-\\d\\d", Sharpen.Runtime.Substring(this.val, 0, 7))) { int year = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4)); int month = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 5, 7)); Calendar begin = MakeCalendar(year, month, 1); int lastDay = begin.GetActualMaximum(Calendar.Date); Calendar end = MakeCalendar(year, month, lastDay); return(new Pair <Calendar, Calendar>(begin, end)); } else { // YYYY or YYYYT... if (val.Length >= 4 && Pattern.Matches("\\d\\d\\d\\d", Sharpen.Runtime.Substring(this.val, 0, 4))) { int year = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4)); return(new Pair <Calendar, Calendar>(MakeCalendar(year, 1, 1), MakeCalendar(year, 12, 31))); } } } } } } } // PDDY if (Pattern.Matches("P\\d+Y", this.val) && documentTime != null) { Calendar rc = documentTime.GetDate(); int yearRange = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 1, this.val.Length - 1)); // in the future if (this.beginPoint < this.endPoint) { Calendar start = CopyCalendar(rc); Calendar end = CopyCalendar(rc); end.Add(Calendar.Year, yearRange); return(new Pair <Calendar, Calendar>(start, end)); } else { // in the past if (this.beginPoint > this.endPoint) { Calendar start = CopyCalendar(rc); Calendar end = CopyCalendar(rc); start.Add(Calendar.Year, 0 - yearRange); return(new Pair <Calendar, Calendar>(start, end)); } } throw new Exception("begin and end are equal " + this); } // PDDM if (Pattern.Matches("P\\d+M", this.val) && documentTime != null) { Calendar rc = documentTime.GetDate(); int monthRange = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 1, this.val.Length - 1)); // in the future if (this.beginPoint < this.endPoint) { Calendar start = CopyCalendar(rc); Calendar end = CopyCalendar(rc); end.Add(Calendar.Month, monthRange); return(new Pair <Calendar, Calendar>(start, end)); } // in the past if (this.beginPoint > this.endPoint) { Calendar start = CopyCalendar(rc); Calendar end = CopyCalendar(rc); start.Add(Calendar.Month, 0 - monthRange); return(new Pair <Calendar, Calendar>(start, end)); } throw new Exception("begin and end are equal " + this); } // PDDD if (Pattern.Matches("P\\d+D", this.val) && documentTime != null) { Calendar rc = documentTime.GetDate(); int dayRange = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 1, this.val.Length - 1)); // in the future if (this.beginPoint < this.endPoint) { Calendar start = CopyCalendar(rc); Calendar end = CopyCalendar(rc); end.Add(Calendar.DayOfMonth, dayRange); return(new Pair <Calendar, Calendar>(start, end)); } // in the past if (this.beginPoint > this.endPoint) { Calendar start = CopyCalendar(rc); Calendar end = CopyCalendar(rc); start.Add(Calendar.DayOfMonth, 0 - dayRange); return(new Pair <Calendar, Calendar>(start, end)); } throw new Exception("begin and end are equal " + this); } // YYYYSP if (Pattern.Matches("\\d+SP", this.val)) { int year = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4)); Calendar start = MakeCalendar(year, 2, 1); Calendar end = MakeCalendar(year, 4, 31); return(new Pair <Calendar, Calendar>(start, end)); } // YYYYSU if (Pattern.Matches("\\d+SU", this.val)) { int year = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4)); Calendar start = MakeCalendar(year, 5, 1); Calendar end = MakeCalendar(year, 7, 31); return(new Pair <Calendar, Calendar>(start, end)); } // YYYYFA if (Pattern.Matches("\\d+FA", this.val)) { int year = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4)); Calendar start = MakeCalendar(year, 8, 1); Calendar end = MakeCalendar(year, 10, 31); return(new Pair <Calendar, Calendar>(start, end)); } // YYYYWI if (Pattern.Matches("\\d+WI", this.val)) { int year = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4)); Calendar start = MakeCalendar(year, 11, 1); Calendar end = MakeCalendar(year + 1, 1, 29); return(new Pair <Calendar, Calendar>(start, end)); } // YYYYWDD if (Pattern.Matches("\\d\\d\\d\\dW\\d+", this.val)) { int year = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4)); int week = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 5)); int startDay = (week - 1) * 7; int endDay = startDay + 6; Calendar start = MakeCalendar(year, startDay); Calendar end = MakeCalendar(year, endDay); return(new Pair <Calendar, Calendar>(start, end)); } // PRESENT_REF if (this.val.Equals("PRESENT_REF")) { Calendar rc = documentTime.GetDate(); // todo: This case doesn't check for documentTime being null and will NPE Calendar start = CopyCalendar(rc); Calendar end = CopyCalendar(rc); return(new Pair <Calendar, Calendar>(start, end)); } throw new Exception(string.Format("unknown value \"%s\" in %s", this.val, this)); }
public TimexNode(string contents, int start, int end, Timex timex) : base(contents, start, end) { this.timex = timex; }
/* * Old implementation based on JDOM. * No longer maintained due to JDOM licensing issues. * private static Annotation toAnnotation(String xml) throws IOException { * Element docElem; * try { * docElem = new SAXBuilder().build(new StringReader(xml)).getRootElement(); * } catch (JDOMException e) { * throw new RuntimeException(String.format("error:\n%s\ninput:\n%s", e, xml)); * } * Element textElem = docElem.getChild("TEXT"); * StringBuilder text = new StringBuilder(); * int offset = 0; * List<CoreMap> sentences = new ArrayList<CoreMap>(); * for (Object sentObj: textElem.getChildren("SENT")) { * CoreMap sentence = new ArrayCoreMap(); * sentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset); * Element sentElem = (Element)sentObj; * Tree tree = Tree.valueOf(sentElem.getText()); * List<CoreLabel> tokens = new ArrayList<CoreLabel>(); * List<Tree> preTerminals = preTerminals(tree); * for (Tree preTerminal: preTerminals) { * String posTag = preTerminal.value(); * for (Tree wordTree: preTerminal.children()) { * String word = wordTree.value(); * CoreLabel token = new CoreLabel(); * token.set(CoreAnnotations.TextAnnotation.class, word); * token.set(CoreAnnotations.TextAnnotation.class, word); * token.set(CoreAnnotations.PartOfSpeechAnnotation.class, posTag); * token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset); * offset += word.length(); * token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset); * text.append(word); * text.append(' '); * offset += 1; * tokens.add(token); * } * } * if (preTerminals.size() > 0) { * text.setCharAt(text.length() - 1, '\n'); * } * sentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset - 1); * sentence.set(CoreAnnotations.TokensAnnotation.class, tokens); * sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree); * sentences.add(sentence); * } * * String docID = docElem.getAttributeValue("id"); * Matcher matcher = datePattern.matcher(docID); * matcher.find(); * Calendar docDate = new Timex(matcher.group(1)).getDate(); * * Annotation document = new Annotation(text.toString()); * document.set(CoreAnnotations.DocIDAnnotation.class, docID); * document.set(CoreAnnotations.CalendarAnnotation.class, docDate); * document.set(CoreAnnotations.SentencesAnnotation.class, sentences); * return document; * } */ /// <exception cref="System.IO.IOException"/> private static Annotation ToAnnotation(string xml) { Element docElem; try { Builder parser = new Builder(); StringReader @in = new StringReader(xml); docElem = parser.Build(@in).GetRootElement(); } catch (Exception e) { throw new Exception(string.Format("error:\n%s\ninput:\n%s", e, xml)); } Element textElem = docElem.GetFirstChildElement("TEXT"); StringBuilder text = new StringBuilder(); int offset = 0; IList <ICoreMap> sentences = new List <ICoreMap>(); Elements sentenceElements = textElem.GetChildElements("SENT"); for (int crtsent = 0; crtsent < sentenceElements.Size(); crtsent++) { Element sentElem = sentenceElements.Get(crtsent); ICoreMap sentence = new ArrayCoreMap(); sentence.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), offset); Tree tree = Tree.ValueOf(sentElem.GetChild(0).GetValue()); // XXX ms: is this the same as sentElem.getText() in JDOM? IList <CoreLabel> tokens = new List <CoreLabel>(); IList <Tree> preTerminals = PreTerminals(tree); foreach (Tree preTerminal in preTerminals) { string posTag = preTerminal.Value(); foreach (Tree wordTree in preTerminal.Children()) { string word = wordTree.Value(); CoreLabel token = new CoreLabel(); token.Set(typeof(CoreAnnotations.TextAnnotation), word); token.Set(typeof(CoreAnnotations.TextAnnotation), word); token.Set(typeof(CoreAnnotations.PartOfSpeechAnnotation), posTag); token.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), offset); offset += word.Length; token.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), offset); text.Append(word); text.Append(' '); offset += 1; tokens.Add(token); } } if (preTerminals.Count > 0) { Sharpen.Runtime.SetCharAt(text, text.Length - 1, '\n'); } sentence.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), offset - 1); sentence.Set(typeof(CoreAnnotations.TokensAnnotation), tokens); sentence.Set(typeof(TreeCoreAnnotations.TreeAnnotation), tree); sentences.Add(sentence); } string docID = docElem.GetAttributeValue("id"); Matcher matcher = datePattern.Matcher(docID); matcher.Find(); Calendar docDate = new Timex("DATE", matcher.Group(1)).GetDate(); Annotation document = new Annotation(text.ToString()); document.Set(typeof(CoreAnnotations.DocIDAnnotation), docID); document.Set(typeof(CoreAnnotations.CalendarAnnotation), docDate); document.Set(typeof(CoreAnnotations.SentencesAnnotation), sentences); return(document); }