Ejemplo n.º 1
0
        /// <summary>Helper method for people not working from a complete Annotation.</summary>
        /// <returns>A list of CoreMap.  Each CoreMap represents a detected temporal expression.</returns>
        public virtual IList <ICoreMap> AnnotateSingleSentence(ICoreMap sentence, string docDate, SUTime.TimeIndex timeIndex)
        {
            ICoreMap annotationCopy = NumberSequenceClassifier.AlignSentence(sentence);

            if (docDate != null && docDate.IsEmpty())
            {
                docDate = null;
            }
            return(timexExtractor.ExtractTimeExpressionCoreMaps(annotationCopy, docDate, timeIndex));
        }
Ejemplo n.º 2
0
        public virtual void Annotate(Annotation annotation)
        {
            SUTime.TimeIndex timeIndex = new SUTime.TimeIndex();
            string           docDate   = annotation.Get(typeof(CoreAnnotations.DocDateAnnotation));

            if (docDate == null)
            {
                Calendar cal = annotation.Get(typeof(CoreAnnotations.CalendarAnnotation));
                if (cal == null)
                {
                    if (!quiet)
                    {
                        log.Warn("No document date specified");
                    }
                }
                else
                {
                    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd:hh:mm:ss");
                    docDate = dateFormat.Format(cal.GetTime());
                }
            }
            IList <ICoreMap> allTimeExpressions;
            // initialized below = null;
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));

            if (sentences != null)
            {
                allTimeExpressions = new List <ICoreMap>();
                IList <ICoreMap> allNumerics = new List <ICoreMap>();
                foreach (ICoreMap sentence in sentences)
                {
                    // make sure that token character offsets align with the actual sentence text
                    // They may not align due to token normalizations, such as "(" to "-LRB-".
                    ICoreMap alignedSentence = NumberSequenceClassifier.AlignSentence(sentence);
                    // uncomment the next line for verbose dumping of tokens....
                    // log.info("SENTENCE: " + ((ArrayCoreMap) sentence).toShorterString());
                    IList <ICoreMap> timeExpressions = timexExtractor.ExtractTimeExpressionCoreMaps(alignedSentence, docDate, timeIndex);
                    if (timeExpressions != null)
                    {
                        Sharpen.Collections.AddAll(allTimeExpressions, timeExpressions);
                        sentence.Set(typeof(TimeAnnotations.TimexAnnotations), timeExpressions);
                        foreach (ICoreMap timeExpression in timeExpressions)
                        {
                            timeExpression.Set(typeof(CoreAnnotations.SentenceIndexAnnotation), sentence.Get(typeof(CoreAnnotations.SentenceIndexAnnotation)));
                        }
                    }
                    IList <ICoreMap> numbers = alignedSentence.Get(typeof(CoreAnnotations.NumerizedTokensAnnotation));
                    if (numbers != null)
                    {
                        sentence.Set(typeof(CoreAnnotations.NumerizedTokensAnnotation), numbers);
                        Sharpen.Collections.AddAll(allNumerics, numbers);
                    }
                }
                annotation.Set(typeof(CoreAnnotations.NumerizedTokensAnnotation), allNumerics);
            }
            else
            {
                allTimeExpressions = AnnotateSingleSentence(annotation, docDate, timeIndex);
            }
            annotation.Set(typeof(TimeAnnotations.TimexAnnotations), allTimeExpressions);
        }