public virtual IList <ICoreMap> ExtractTimeExpressionCoreMaps(ICoreMap annotation, ICoreMap docAnnotation)
        {
            SUTime.TimeIndex timeIndex;
            // initialized immediately below
            string docDate = null;

            if (docAnnotation != null)
            {
                timeIndex = docAnnotation.Get(typeof(TimeExpression.TimeIndexAnnotation));
                if (timeIndex == null)
                {
                    docAnnotation.Set(typeof(TimeExpression.TimeIndexAnnotation), timeIndex = new SUTime.TimeIndex());
                }
                // default look for the sentence's forum post date
                // if it doesn't have one, back off to the document date
                if (annotation.Get(typeof(CoreAnnotations.SectionDateAnnotation)) != null)
                {
                    docDate = annotation.Get(typeof(CoreAnnotations.SectionDateAnnotation));
                }
                else
                {
                    docDate = docAnnotation.Get(typeof(CoreAnnotations.DocDateAnnotation));
                }
                if (docDate == null)
                {
                    Calendar cal = docAnnotation.Get(typeof(CoreAnnotations.CalendarAnnotation));
                    if (cal == null)
                    {
                        if (options.verbose)
                        {
                            logger.Warn("WARNING: No document date specified");
                        }
                    }
                    else
                    {
                        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd:hh:mm:ss");
                        docDate = dateFormat.Format(cal.GetTime());
                    }
                }
            }
            else
            {
                timeIndex = new SUTime.TimeIndex();
            }
            if (StringUtils.IsNullOrEmpty(docDate))
            {
                docDate = null;
            }
            if (timeIndex.docDate == null && docDate != null)
            {
                try
                {
                    // TODO: have more robust parsing of document date?  docDate may not have century....
                    // TODO: if docDate didn't change, we can cache the parsing of the docDate and not repeat it for every sentence
                    timeIndex.docDate = SUTime.ParseDateTime(docDate, true);
                }
                catch (Exception e)
                {
                    throw new Exception("Could not parse date string: [" + docDate + "]", e);
                }
            }
            string sectionDate = annotation.Get(typeof(CoreAnnotations.SectionDateAnnotation));
            string refDate     = (sectionDate != null) ? sectionDate : docDate;

            return(ExtractTimeExpressionCoreMaps(annotation, refDate, timeIndex));
        }
 public virtual IList <TimeExpression> ExtractTimeExpressions(ICoreMap annotation, string refDateStr, SUTime.TimeIndex timeIndex)
 {
     SUTime.Time refDate = null;
     if (refDateStr != null)
     {
         try
         {
             // TODO: have more robust parsing of document date?  docDate may not have century....
             // TODO: if docDate didn't change, we can cache the parsing of the docDate and not repeat it for every sentence
             refDate = SUTime.ParseDateTime(refDateStr, true);
         }
         catch (Exception e)
         {
             throw new Exception("Could not parse date string: [" + refDateStr + "]", e);
         }
     }
     return(ExtractTimeExpressions(annotation, refDate, timeIndex));
 }
        public virtual IList <TimeExpression> ExtractTimeExpressions(ICoreMap annotation, SUTime.Time refDate, SUTime.TimeIndex timeIndex)
        {
            if (!annotation.ContainsKey(typeof(CoreAnnotations.NumerizedTokensAnnotation)))
            {
                try
                {
                    IList <ICoreMap> mergedNumbers = NumberNormalizer.FindAndMergeNumbers(annotation);
                    annotation.Set(typeof(CoreAnnotations.NumerizedTokensAnnotation), mergedNumbers);
                }
                catch (NumberFormatException e)
                {
                    logger.Warn("Caught bad number: " + e.Message);
                    annotation.Set(typeof(CoreAnnotations.NumerizedTokensAnnotation), new List <ICoreMap>());
                }
            }
            IList <MatchedExpression> matchedExpressions = expressionExtractor.ExtractExpressions(annotation);
            IList <TimeExpression>    timeExpressions    = new List <TimeExpression>(matchedExpressions.Count);

            foreach (MatchedExpression expr in matchedExpressions)
            {
                // Make sure we have the correct type (instead of just MatchedExpression)
                //timeExpressions.add(TimeExpression.TimeExpressionConverter.apply(expr));
                // TODO: Fix the extraction pipeline so it creates TimeExpression instead of MatchedExpressions
                // For now, grab the time expression from the annotation (this is good, so we don't have duplicate copies)
                TimeExpression annoTe = expr.GetAnnotation().Get(typeof(TimeExpression.Annotation));
                if (annoTe != null)
                {
                    timeExpressions.Add(annoTe);
                }
            }
            // We cache the document date in the timeIndex
            if (timeIndex.docDate == null)
            {
                if (refDate != null)
                {
                    timeIndex.docDate = refDate;
                }
                else
                {
                    if (options.searchForDocDate)
                    {
                        // there was no document date but option was set to look for document date
                        timeIndex.docDate = FindReferenceDate(timeExpressions);
                    }
                }
            }
            // Didn't have a reference date - try using cached doc date
            if (refDate == null)
            {
                refDate = timeIndex.docDate;
            }
            // Some resolving is done even if refDate null...
            ResolveTimeExpressions(annotation, timeExpressions, refDate);
            if (options.restrictToTimex3)
            {
                // Keep only TIMEX3 compatible timeExpressions
                IList <TimeExpression> kept = new List <TimeExpression>(timeExpressions.Count);
                foreach (TimeExpression te in timeExpressions)
                {
                    if (te.GetTemporal() != null && te.GetTemporal().GetTimexValue() != null)
                    {
                        kept.Add(te);
                    }
                    else
                    {
                        IList <ICoreMap> children = te.GetAnnotation().Get(typeof(TimeExpression.ChildrenAnnotation));
                        if (children != null)
                        {
                            foreach (ICoreMap child in children)
                            {
                                TimeExpression childTe = child.Get(typeof(TimeExpression.Annotation));
                                if (childTe != null)
                                {
                                    ResolveTimeExpression(annotation, childTe, refDate);
                                    if (childTe.GetTemporal() != null && childTe.GetTemporal().GetTimexValue() != null)
                                    {
                                        kept.Add(childTe);
                                    }
                                }
                            }
                        }
                    }
                }
                timeExpressions = kept;
            }
            // Add back nested time expressions for ranges....
            // For now only one level of nesting...
            if (options.includeNested)
            {
                IList <TimeExpression> nestedTimeExpressions = new List <TimeExpression>();
                foreach (TimeExpression te in timeExpressions)
                {
                    if (te.IsIncludeNested())
                    {
                        IList <ICoreMap> children = te.GetAnnotation().Get(typeof(TimeExpression.ChildrenAnnotation));
                        if (children != null)
                        {
                            foreach (ICoreMap child in children)
                            {
                                TimeExpression childTe = child.Get(typeof(TimeExpression.Annotation));
                                if (childTe != null)
                                {
                                    nestedTimeExpressions.Add(childTe);
                                }
                            }
                        }
                    }
                }
                ResolveTimeExpressions(annotation, nestedTimeExpressions, refDate);
                Sharpen.Collections.AddAll(timeExpressions, nestedTimeExpressions);
            }
            timeExpressions.Sort(MatchedExpression.ExprTokenOffsetsNestedFirstComparator);
            // Some resolving is done even if refDate null...
            ResolveTimeExpressions(annotation, timeExpressions, refDate);
            return(timeExpressions);
        }
        private IList <ICoreMap> ToCoreMaps(ICoreMap annotation, IList <TimeExpression> timeExpressions, SUTime.TimeIndex timeIndex)
        {
            if (timeExpressions == null)
            {
                return(null);
            }
            IList <ICoreMap> coreMaps = new List <ICoreMap>(timeExpressions.Count);

            foreach (TimeExpression te in timeExpressions)
            {
                ICoreMap        cm       = te.GetAnnotation();
                SUTime.Temporal temporal = te.GetTemporal();
                if (temporal != null)
                {
                    string origText = annotation.Get(typeof(CoreAnnotations.TextAnnotation));
                    string text     = cm.Get(typeof(CoreAnnotations.TextAnnotation));
                    if (origText != null)
                    {
                        // Make sure the text is from original (and not from concatenated tokens)
                        ChunkAnnotationUtils.AnnotateChunkText(cm, annotation);
                        text = cm.Get(typeof(CoreAnnotations.TextAnnotation));
                    }
                    IDictionary <string, string> timexAttributes;
                    try
                    {
                        timexAttributes = temporal.GetTimexAttributes(timeIndex);
                        if (options.includeRange)
                        {
                            SUTime.Temporal rangeTemporal = temporal.GetRange();
                            if (rangeTemporal != null)
                            {
                                timexAttributes["range"] = rangeTemporal.ToString();
                            }
                        }
                    }
                    catch (Exception e)
                    {
                        if (options.verbose)
                        {
                            logger.Warn("Failed to get attributes from " + text + ", timeIndex " + timeIndex);
                            logger.Warn(e);
                        }
                        continue;
                    }
                    Timex timex;
                    try
                    {
                        timex = Timex.FromMap(text, timexAttributes);
                    }
                    catch (Exception e)
                    {
                        if (options.verbose)
                        {
                            logger.Warn("Failed to process timex " + text + " with attributes " + timexAttributes);
                            logger.Warn(e);
                        }
                        continue;
                    }
                    System.Diagnostics.Debug.Assert(timex != null);
                    // Timex.fromMap never returns null and if it exceptions, we've already done a continue
                    cm.Set(typeof(TimeAnnotations.TimexAnnotation), timex);
                    coreMaps.Add(cm);
                }
            }
            return(coreMaps);
        }
        public virtual IList <ICoreMap> ExtractTimeExpressionCoreMaps(ICoreMap annotation, string docDate, SUTime.TimeIndex timeIndex)
        {
            IList <TimeExpression> timeExpressions = ExtractTimeExpressions(annotation, docDate, timeIndex);

            return(ToCoreMaps(annotation, timeExpressions, timeIndex));
        }
 public virtual IList <ICoreMap> ExtractTimeExpressionCoreMaps(ICoreMap annotation, string docDate)
 {
     SUTime.TimeIndex timeIndex = new SUTime.TimeIndex();
     return(ExtractTimeExpressionCoreMaps(annotation, docDate, timeIndex));
 }
Пример #7
0
        /// <summary>Helper method for people not working from a complete Annotation.</summary>
        /// <returns>A list of CoreMap.  Each CoreMap represents a detected temporal expression.</returns>
        public virtual IList <ICoreMap> AnnotateSingleSentence(ICoreMap sentence, string docDate, SUTime.TimeIndex timeIndex)
        {
            ICoreMap annotationCopy = NumberSequenceClassifier.AlignSentence(sentence);

            if (docDate != null && docDate.IsEmpty())
            {
                docDate = null;
            }
            return(timexExtractor.ExtractTimeExpressionCoreMaps(annotationCopy, docDate, timeIndex));
        }
Пример #8
0
        public virtual void Annotate(Annotation annotation)
        {
            SUTime.TimeIndex timeIndex = new SUTime.TimeIndex();
            string           docDate   = annotation.Get(typeof(CoreAnnotations.DocDateAnnotation));

            if (docDate == null)
            {
                Calendar cal = annotation.Get(typeof(CoreAnnotations.CalendarAnnotation));
                if (cal == null)
                {
                    if (!quiet)
                    {
                        log.Warn("No document date specified");
                    }
                }
                else
                {
                    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd:hh:mm:ss");
                    docDate = dateFormat.Format(cal.GetTime());
                }
            }
            IList <ICoreMap> allTimeExpressions;
            // initialized below = null;
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));

            if (sentences != null)
            {
                allTimeExpressions = new List <ICoreMap>();
                IList <ICoreMap> allNumerics = new List <ICoreMap>();
                foreach (ICoreMap sentence in sentences)
                {
                    // make sure that token character offsets align with the actual sentence text
                    // They may not align due to token normalizations, such as "(" to "-LRB-".
                    ICoreMap alignedSentence = NumberSequenceClassifier.AlignSentence(sentence);
                    // uncomment the next line for verbose dumping of tokens....
                    // log.info("SENTENCE: " + ((ArrayCoreMap) sentence).toShorterString());
                    IList <ICoreMap> timeExpressions = timexExtractor.ExtractTimeExpressionCoreMaps(alignedSentence, docDate, timeIndex);
                    if (timeExpressions != null)
                    {
                        Sharpen.Collections.AddAll(allTimeExpressions, timeExpressions);
                        sentence.Set(typeof(TimeAnnotations.TimexAnnotations), timeExpressions);
                        foreach (ICoreMap timeExpression in timeExpressions)
                        {
                            timeExpression.Set(typeof(CoreAnnotations.SentenceIndexAnnotation), sentence.Get(typeof(CoreAnnotations.SentenceIndexAnnotation)));
                        }
                    }
                    IList <ICoreMap> numbers = alignedSentence.Get(typeof(CoreAnnotations.NumerizedTokensAnnotation));
                    if (numbers != null)
                    {
                        sentence.Set(typeof(CoreAnnotations.NumerizedTokensAnnotation), numbers);
                        Sharpen.Collections.AddAll(allNumerics, numbers);
                    }
                }
                annotation.Set(typeof(CoreAnnotations.NumerizedTokensAnnotation), allNumerics);
            }
            else
            {
                allTimeExpressions = AnnotateSingleSentence(annotation, docDate, timeIndex);
            }
            annotation.Set(typeof(TimeAnnotations.TimexAnnotations), allTimeExpressions);
        }