Exemple #1
0
        public override bool Equals(object o)
        {
            if (this == o)
            {
                return(true);
            }
            if (o == null || GetType() != o.GetType())
            {
                return(false);
            }
            Timex timex = (Timex)o;

            if (beginPoint != timex.beginPoint)
            {
                return(false);
            }
            if (endPoint != timex.endPoint)
            {
                return(false);
            }
            if (type != null ? !type.Equals(timex.type) : timex.type != null)
            {
                return(false);
            }
            if (val != null ? !val.Equals(timex.val) : timex.val != null)
            {
                return(false);
            }
            return(true);
        }
            private IList <HeidelTimeKBPAnnotator.HeidelTimeOutputReader.Node> ToNodeSequence(string output)
            {
                // First of all, get rid of all XML markup that HeidelTime inserts.
                output = timeMLOpen.Matcher(output).ReplaceAll(string.Empty).Trim();
                output = timeMLClose.Matcher(output).ReplaceAll(string.Empty).Trim();
                // Now go through and chunk sequence into <TIMEX3> tag regions.
                Matcher openMatcher  = timexTagOpen.Matcher(output);
                Matcher attrMatcher  = attr.Matcher(output);
                Matcher closeMatcher = timexTagClose.Matcher(output);
                IList <HeidelTimeKBPAnnotator.HeidelTimeOutputReader.Node> ret = new List <HeidelTimeKBPAnnotator.HeidelTimeOutputReader.Node>();
                // TODO: save metadata of TIMEX token positions or stuff.
                int charIdx = 0;
                Dictionary <string, string> attrs = new Dictionary <string, string>();

                while (openMatcher.Find(charIdx))
                {
                    int tagBegin    = openMatcher.Start();
                    int tagBeginEnd = openMatcher.End();
                    // Add everything before this tagBegin to a node.
                    if (charIdx < tagBegin)
                    {
                        ret.Add(new HeidelTimeKBPAnnotator.HeidelTimeOutputReader.Node(Sharpen.Runtime.Substring(output, charIdx, tagBegin), charIdx, tagBegin));
                    }
                    attrs.Clear();
                    // Get the attributes
                    while (attrMatcher.Find(tagBegin + 1) && attrMatcher.End() < tagBeginEnd)
                    {
                        attrs[attrMatcher.Group("key")] = attrMatcher.Group("value");
                        tagBegin = attrMatcher.End();
                    }
                    // Ok, move to the close tag.
                    bool matched = closeMatcher.Find(tagBeginEnd);
                    System.Diagnostics.Debug.Assert(matched);
                    // Assert statements are sometimes ignored.
                    int    tagEndBegin = closeMatcher.Start();
                    int    tagEnd      = closeMatcher.End();
                    string text        = Sharpen.Runtime.Substring(output, tagBeginEnd, tagEndBegin);
                    Timex  timex       = ToTimex(text, attrs);
                    ret.Add(new HeidelTimeKBPAnnotator.HeidelTimeOutputReader.TimexNode(text, tagBeginEnd, tagEndBegin, timex));
                    charIdx = closeMatcher.End();
                }
                // Add everything before this tagBegin to a node. to the
                if (charIdx < output.Length)
                {
                    ret.Add(new HeidelTimeKBPAnnotator.HeidelTimeOutputReader.Node(Sharpen.Runtime.Substring(output, charIdx, output.Length), charIdx, output.Length));
                }
                return(ret);
            }
Exemple #3
0
        // Used to create timex from XML (mainly for testing)
        public static Timex FromXml(string xml)
        {
            IElement element = XMLUtils.ParseElement(xml);

            if ("TIMEX3".Equals(element.GetNodeName()))
            {
                Timex t = new Timex();
                //      t.init(xml, element);
                // Doesn't preserve original input xml
                // Will reorder attributes of xml so can match xml of test timex and actual timex
                // (for which we can't control the order of the attributes now we don't use nu.xom...)
                t.Init(element);
                return(t);
            }
            else
            {
                throw new ArgumentException("Invalid timex xml: " + xml);
            }
        }
Exemple #4
0
        private static IList <ICoreMap> ToTimexCoreMaps(IElement docElem, ICoreMap originalDocument)
        {
            //--Collect Token Offsets
            IDictionary <int, int> beginMap = Generics.NewHashMap();
            IDictionary <int, int> endMap   = Generics.NewHashMap();
            bool haveTokenOffsets           = true;

            foreach (ICoreMap sent in originalDocument.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                foreach (CoreLabel token in sent.Get(typeof(CoreAnnotations.TokensAnnotation)))
                {
                    int tokBegin = token.Get(typeof(CoreAnnotations.TokenBeginAnnotation));
                    int tokEnd   = token.Get(typeof(CoreAnnotations.TokenEndAnnotation));
                    if (tokBegin == null || tokEnd == null)
                    {
                        haveTokenOffsets = false;
                    }
                    int charBegin = token.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation));
                    int charEnd   = token.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation));
                    beginMap[charBegin] = tokBegin;
                    endMap[charEnd]     = tokEnd;
                }
            }
            IList <ICoreMap> timexMaps = new List <ICoreMap>();
            int       offset           = 0;
            INodeList docNodes         = docElem.GetChildNodes();

            for (int i = 0; i < docNodes.GetLength(); i++)
            {
                INode content = docNodes.Item(i);
                if (content is IText)
                {
                    IText text = (IText)content;
                    offset += text.GetWholeText().Length;
                }
                else
                {
                    if (content is IElement)
                    {
                        IElement child = (IElement)content;
                        if (child.GetNodeName().Equals("TIMEX3"))
                        {
                            Timex timex = new Timex(child);
                            if (child.GetChildNodes().GetLength() != 1)
                            {
                                throw new Exception("TIMEX3 should only contain text " + child);
                            }
                            string   timexText = child.GetTextContent();
                            ICoreMap timexMap  = new ArrayCoreMap();
                            timexMap.Set(typeof(TimeAnnotations.TimexAnnotation), timex);
                            timexMap.Set(typeof(CoreAnnotations.TextAnnotation), timexText);
                            int charBegin = offset;
                            timexMap.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), offset);
                            offset += timexText.Length;
                            timexMap.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), offset);
                            int charEnd = offset;
                            //(tokens)
                            if (haveTokenOffsets)
                            {
                                int tokBegin   = beginMap[charBegin];
                                int searchStep = 1;
                                //if no exact match, search around the character offset
                                while (tokBegin == null)
                                {
                                    tokBegin = beginMap[charBegin - searchStep];
                                    if (tokBegin == null)
                                    {
                                        tokBegin = beginMap[charBegin + searchStep];
                                    }
                                    searchStep += 1;
                                }
                                searchStep = 1;
                                int tokEnd = endMap[charEnd];
                                while (tokEnd == null)
                                {
                                    tokEnd = endMap[charEnd - searchStep];
                                    if (tokEnd == null)
                                    {
                                        tokEnd = endMap[charEnd + searchStep];
                                    }
                                    searchStep += 1;
                                }
                                timexMap.Set(typeof(CoreAnnotations.TokenBeginAnnotation), tokBegin);
                                timexMap.Set(typeof(CoreAnnotations.TokenEndAnnotation), tokEnd);
                            }
                            timexMaps.Add(timexMap);
                        }
                        else
                        {
                            throw new Exception("unexpected element " + child);
                        }
                    }
                    else
                    {
                        throw new Exception("unexpected content " + content);
                    }
                }
            }
            return(timexMaps);
        }
        private IList <ICoreMap> ToCoreMaps(ICoreMap annotation, IList <TimeExpression> timeExpressions, SUTime.TimeIndex timeIndex)
        {
            if (timeExpressions == null)
            {
                return(null);
            }
            IList <ICoreMap> coreMaps = new List <ICoreMap>(timeExpressions.Count);

            foreach (TimeExpression te in timeExpressions)
            {
                ICoreMap        cm       = te.GetAnnotation();
                SUTime.Temporal temporal = te.GetTemporal();
                if (temporal != null)
                {
                    string origText = annotation.Get(typeof(CoreAnnotations.TextAnnotation));
                    string text     = cm.Get(typeof(CoreAnnotations.TextAnnotation));
                    if (origText != null)
                    {
                        // Make sure the text is from original (and not from concatenated tokens)
                        ChunkAnnotationUtils.AnnotateChunkText(cm, annotation);
                        text = cm.Get(typeof(CoreAnnotations.TextAnnotation));
                    }
                    IDictionary <string, string> timexAttributes;
                    try
                    {
                        timexAttributes = temporal.GetTimexAttributes(timeIndex);
                        if (options.includeRange)
                        {
                            SUTime.Temporal rangeTemporal = temporal.GetRange();
                            if (rangeTemporal != null)
                            {
                                timexAttributes["range"] = rangeTemporal.ToString();
                            }
                        }
                    }
                    catch (Exception e)
                    {
                        if (options.verbose)
                        {
                            logger.Warn("Failed to get attributes from " + text + ", timeIndex " + timeIndex);
                            logger.Warn(e);
                        }
                        continue;
                    }
                    Timex timex;
                    try
                    {
                        timex = Timex.FromMap(text, timexAttributes);
                    }
                    catch (Exception e)
                    {
                        if (options.verbose)
                        {
                            logger.Warn("Failed to process timex " + text + " with attributes " + timexAttributes);
                            logger.Warn(e);
                        }
                        continue;
                    }
                    System.Diagnostics.Debug.Assert(timex != null);
                    // Timex.fromMap never returns null and if it exceptions, we've already done a continue
                    cm.Set(typeof(TimeAnnotations.TimexAnnotation), timex);
                    coreMaps.Add(cm);
                }
            }
            return(coreMaps);
        }
Exemple #6
0
 /// <summary>Gets two Calendars, marking the beginning and ending of this Timex's range.</summary>
 /// <param name="documentTime">
 /// The time the document containing this Timex was written. (Not
 /// necessary for resolving all Timex expressions. This may be
 /// <see langword="null"/>
 /// , but then relative time expressions cannot be
 /// resolved.)
 /// </param>
 /// <returns>The begin point and end point Calendars.</returns>
 public virtual Pair <Calendar, Calendar> GetRange(Timex documentTime)
 {
     if (this.val == null)
     {
         throw new NotSupportedException("no value specified for " + this);
     }
     else
     {
         // YYYYMMDD or YYYYMMDDT... where the time is concatenated directly with the
         // date
         if (val.Length >= 8 && Pattern.Matches("\\d\\d\\d\\d\\d\\d\\d\\d", Sharpen.Runtime.Substring(this.val, 0, 8)))
         {
             int year  = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4));
             int month = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 4, 6));
             int day   = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 6, 8));
             return(new Pair <Calendar, Calendar>(MakeCalendar(year, month, day), MakeCalendar(year, month, day)));
         }
         else
         {
             // YYYY-MM-DD or YYYY-MM-DDT...
             if (val.Length >= 10 && Pattern.Matches("\\d\\d\\d\\d-\\d\\d-\\d\\d", Sharpen.Runtime.Substring(this.val, 0, 10)))
             {
                 int year  = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4));
                 int month = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 5, 7));
                 int day   = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 8, 10));
                 return(new Pair <Calendar, Calendar>(MakeCalendar(year, month, day), MakeCalendar(year, month, day)));
             }
             else
             {
                 // YYYYMMDDL+
                 if (Pattern.Matches("\\d\\d\\d\\d\\d\\d\\d\\d[A-Z]+", this.val))
                 {
                     int year  = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4));
                     int month = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 4, 6));
                     int day   = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 6, 8));
                     return(new Pair <Calendar, Calendar>(MakeCalendar(year, month, day), MakeCalendar(year, month, day)));
                 }
                 else
                 {
                     // YYYYMM or YYYYMMT...
                     if (val.Length >= 6 && Pattern.Matches("\\d\\d\\d\\d\\d\\d", Sharpen.Runtime.Substring(this.val, 0, 6)))
                     {
                         int      year    = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4));
                         int      month   = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 4, 6));
                         Calendar begin   = MakeCalendar(year, month, 1);
                         int      lastDay = begin.GetActualMaximum(Calendar.Date);
                         Calendar end     = MakeCalendar(year, month, lastDay);
                         return(new Pair <Calendar, Calendar>(begin, end));
                     }
                     else
                     {
                         // YYYY-MM or YYYY-MMT...
                         if (val.Length >= 7 && Pattern.Matches("\\d\\d\\d\\d-\\d\\d", Sharpen.Runtime.Substring(this.val, 0, 7)))
                         {
                             int      year    = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4));
                             int      month   = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 5, 7));
                             Calendar begin   = MakeCalendar(year, month, 1);
                             int      lastDay = begin.GetActualMaximum(Calendar.Date);
                             Calendar end     = MakeCalendar(year, month, lastDay);
                             return(new Pair <Calendar, Calendar>(begin, end));
                         }
                         else
                         {
                             // YYYY or YYYYT...
                             if (val.Length >= 4 && Pattern.Matches("\\d\\d\\d\\d", Sharpen.Runtime.Substring(this.val, 0, 4)))
                             {
                                 int year = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4));
                                 return(new Pair <Calendar, Calendar>(MakeCalendar(year, 1, 1), MakeCalendar(year, 12, 31)));
                             }
                         }
                     }
                 }
             }
         }
     }
     // PDDY
     if (Pattern.Matches("P\\d+Y", this.val) && documentTime != null)
     {
         Calendar rc        = documentTime.GetDate();
         int      yearRange = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 1, this.val.Length - 1));
         // in the future
         if (this.beginPoint < this.endPoint)
         {
             Calendar start = CopyCalendar(rc);
             Calendar end   = CopyCalendar(rc);
             end.Add(Calendar.Year, yearRange);
             return(new Pair <Calendar, Calendar>(start, end));
         }
         else
         {
             // in the past
             if (this.beginPoint > this.endPoint)
             {
                 Calendar start = CopyCalendar(rc);
                 Calendar end   = CopyCalendar(rc);
                 start.Add(Calendar.Year, 0 - yearRange);
                 return(new Pair <Calendar, Calendar>(start, end));
             }
         }
         throw new Exception("begin and end are equal " + this);
     }
     // PDDM
     if (Pattern.Matches("P\\d+M", this.val) && documentTime != null)
     {
         Calendar rc         = documentTime.GetDate();
         int      monthRange = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 1, this.val.Length - 1));
         // in the future
         if (this.beginPoint < this.endPoint)
         {
             Calendar start = CopyCalendar(rc);
             Calendar end   = CopyCalendar(rc);
             end.Add(Calendar.Month, monthRange);
             return(new Pair <Calendar, Calendar>(start, end));
         }
         // in the past
         if (this.beginPoint > this.endPoint)
         {
             Calendar start = CopyCalendar(rc);
             Calendar end   = CopyCalendar(rc);
             start.Add(Calendar.Month, 0 - monthRange);
             return(new Pair <Calendar, Calendar>(start, end));
         }
         throw new Exception("begin and end are equal " + this);
     }
     // PDDD
     if (Pattern.Matches("P\\d+D", this.val) && documentTime != null)
     {
         Calendar rc       = documentTime.GetDate();
         int      dayRange = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 1, this.val.Length - 1));
         // in the future
         if (this.beginPoint < this.endPoint)
         {
             Calendar start = CopyCalendar(rc);
             Calendar end   = CopyCalendar(rc);
             end.Add(Calendar.DayOfMonth, dayRange);
             return(new Pair <Calendar, Calendar>(start, end));
         }
         // in the past
         if (this.beginPoint > this.endPoint)
         {
             Calendar start = CopyCalendar(rc);
             Calendar end   = CopyCalendar(rc);
             start.Add(Calendar.DayOfMonth, 0 - dayRange);
             return(new Pair <Calendar, Calendar>(start, end));
         }
         throw new Exception("begin and end are equal " + this);
     }
     // YYYYSP
     if (Pattern.Matches("\\d+SP", this.val))
     {
         int      year  = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4));
         Calendar start = MakeCalendar(year, 2, 1);
         Calendar end   = MakeCalendar(year, 4, 31);
         return(new Pair <Calendar, Calendar>(start, end));
     }
     // YYYYSU
     if (Pattern.Matches("\\d+SU", this.val))
     {
         int      year  = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4));
         Calendar start = MakeCalendar(year, 5, 1);
         Calendar end   = MakeCalendar(year, 7, 31);
         return(new Pair <Calendar, Calendar>(start, end));
     }
     // YYYYFA
     if (Pattern.Matches("\\d+FA", this.val))
     {
         int      year  = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4));
         Calendar start = MakeCalendar(year, 8, 1);
         Calendar end   = MakeCalendar(year, 10, 31);
         return(new Pair <Calendar, Calendar>(start, end));
     }
     // YYYYWI
     if (Pattern.Matches("\\d+WI", this.val))
     {
         int      year  = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4));
         Calendar start = MakeCalendar(year, 11, 1);
         Calendar end   = MakeCalendar(year + 1, 1, 29);
         return(new Pair <Calendar, Calendar>(start, end));
     }
     // YYYYWDD
     if (Pattern.Matches("\\d\\d\\d\\dW\\d+", this.val))
     {
         int      year     = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 0, 4));
         int      week     = System.Convert.ToInt32(Sharpen.Runtime.Substring(this.val, 5));
         int      startDay = (week - 1) * 7;
         int      endDay   = startDay + 6;
         Calendar start    = MakeCalendar(year, startDay);
         Calendar end      = MakeCalendar(year, endDay);
         return(new Pair <Calendar, Calendar>(start, end));
     }
     // PRESENT_REF
     if (this.val.Equals("PRESENT_REF"))
     {
         Calendar rc = documentTime.GetDate();
         // todo: This case doesn't check for documentTime being null and will NPE
         Calendar start = CopyCalendar(rc);
         Calendar end   = CopyCalendar(rc);
         return(new Pair <Calendar, Calendar>(start, end));
     }
     throw new Exception(string.Format("unknown value \"%s\" in %s", this.val, this));
 }
 public TimexNode(string contents, int start, int end, Timex timex)
     : base(contents, start, end)
 {
     this.timex = timex;
 }
Exemple #8
0
        /*
         * Old implementation based on JDOM.
         * No longer maintained due to JDOM licensing issues.
         * private static Annotation toAnnotation(String xml) throws IOException {
         * Element docElem;
         * try {
         * docElem = new SAXBuilder().build(new StringReader(xml)).getRootElement();
         * } catch (JDOMException e) {
         * throw new RuntimeException(String.format("error:\n%s\ninput:\n%s", e, xml));
         * }
         * Element textElem = docElem.getChild("TEXT");
         * StringBuilder text = new StringBuilder();
         * int offset = 0;
         * List<CoreMap> sentences = new ArrayList<CoreMap>();
         * for (Object sentObj: textElem.getChildren("SENT")) {
         * CoreMap sentence = new ArrayCoreMap();
         * sentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
         * Element sentElem = (Element)sentObj;
         * Tree tree = Tree.valueOf(sentElem.getText());
         * List<CoreLabel> tokens = new ArrayList<CoreLabel>();
         * List<Tree> preTerminals = preTerminals(tree);
         * for (Tree preTerminal: preTerminals) {
         * String posTag = preTerminal.value();
         * for (Tree wordTree: preTerminal.children()) {
         * String word = wordTree.value();
         * CoreLabel token = new CoreLabel();
         * token.set(CoreAnnotations.TextAnnotation.class, word);
         * token.set(CoreAnnotations.TextAnnotation.class, word);
         * token.set(CoreAnnotations.PartOfSpeechAnnotation.class, posTag);
         * token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
         * offset += word.length();
         * token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset);
         * text.append(word);
         * text.append(' ');
         * offset += 1;
         * tokens.add(token);
         * }
         * }
         * if (preTerminals.size() > 0) {
         * text.setCharAt(text.length() - 1, '\n');
         * }
         * sentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset - 1);
         * sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
         * sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
         * sentences.add(sentence);
         * }
         *
         * String docID = docElem.getAttributeValue("id");
         * Matcher matcher = datePattern.matcher(docID);
         * matcher.find();
         * Calendar docDate = new Timex(matcher.group(1)).getDate();
         *
         * Annotation document = new Annotation(text.toString());
         * document.set(CoreAnnotations.DocIDAnnotation.class, docID);
         * document.set(CoreAnnotations.CalendarAnnotation.class, docDate);
         * document.set(CoreAnnotations.SentencesAnnotation.class, sentences);
         * return document;
         * }
         */
        /// <exception cref="System.IO.IOException"/>
        private static Annotation ToAnnotation(string xml)
        {
            Element docElem;

            try
            {
                Builder      parser = new Builder();
                StringReader @in    = new StringReader(xml);
                docElem = parser.Build(@in).GetRootElement();
            }
            catch (Exception e)
            {
                throw new Exception(string.Format("error:\n%s\ninput:\n%s", e, xml));
            }
            Element          textElem         = docElem.GetFirstChildElement("TEXT");
            StringBuilder    text             = new StringBuilder();
            int              offset           = 0;
            IList <ICoreMap> sentences        = new List <ICoreMap>();
            Elements         sentenceElements = textElem.GetChildElements("SENT");

            for (int crtsent = 0; crtsent < sentenceElements.Size(); crtsent++)
            {
                Element  sentElem = sentenceElements.Get(crtsent);
                ICoreMap sentence = new ArrayCoreMap();
                sentence.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), offset);
                Tree tree = Tree.ValueOf(sentElem.GetChild(0).GetValue());
                // XXX ms: is this the same as sentElem.getText() in JDOM?
                IList <CoreLabel> tokens       = new List <CoreLabel>();
                IList <Tree>      preTerminals = PreTerminals(tree);
                foreach (Tree preTerminal in preTerminals)
                {
                    string posTag = preTerminal.Value();
                    foreach (Tree wordTree in preTerminal.Children())
                    {
                        string    word  = wordTree.Value();
                        CoreLabel token = new CoreLabel();
                        token.Set(typeof(CoreAnnotations.TextAnnotation), word);
                        token.Set(typeof(CoreAnnotations.TextAnnotation), word);
                        token.Set(typeof(CoreAnnotations.PartOfSpeechAnnotation), posTag);
                        token.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), offset);
                        offset += word.Length;
                        token.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), offset);
                        text.Append(word);
                        text.Append(' ');
                        offset += 1;
                        tokens.Add(token);
                    }
                }
                if (preTerminals.Count > 0)
                {
                    Sharpen.Runtime.SetCharAt(text, text.Length - 1, '\n');
                }
                sentence.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), offset - 1);
                sentence.Set(typeof(CoreAnnotations.TokensAnnotation), tokens);
                sentence.Set(typeof(TreeCoreAnnotations.TreeAnnotation), tree);
                sentences.Add(sentence);
            }
            string  docID   = docElem.GetAttributeValue("id");
            Matcher matcher = datePattern.Matcher(docID);

            matcher.Find();
            Calendar   docDate  = new Timex("DATE", matcher.Group(1)).GetDate();
            Annotation document = new Annotation(text.ToString());

            document.Set(typeof(CoreAnnotations.DocIDAnnotation), docID);
            document.Set(typeof(CoreAnnotations.CalendarAnnotation), docDate);
            document.Set(typeof(CoreAnnotations.SentencesAnnotation), sentences);
            return(document);
        }