public virtual IList <MatchedExpression> Extract(ICoreMap annotation) { if (!annotation.ContainsKey(typeof(CoreAnnotations.NumerizedTokensAnnotation))) { IList <ICoreMap> mergedNumbers = NumberNormalizer.FindAndMergeNumbers(annotation); annotation.Set(typeof(CoreAnnotations.NumerizedTokensAnnotation), mergedNumbers); } return(extractor.ExtractExpressions(annotation)); }
public virtual IList <TimeExpression> ExtractTimeExpressions(ICoreMap annotation, SUTime.Time refDate, SUTime.TimeIndex timeIndex) { if (!annotation.ContainsKey(typeof(CoreAnnotations.NumerizedTokensAnnotation))) { try { IList <ICoreMap> mergedNumbers = NumberNormalizer.FindAndMergeNumbers(annotation); annotation.Set(typeof(CoreAnnotations.NumerizedTokensAnnotation), mergedNumbers); } catch (NumberFormatException e) { logger.Warn("Caught bad number: " + e.Message); annotation.Set(typeof(CoreAnnotations.NumerizedTokensAnnotation), new List <ICoreMap>()); } } IList <MatchedExpression> matchedExpressions = expressionExtractor.ExtractExpressions(annotation); IList <TimeExpression> timeExpressions = new List <TimeExpression>(matchedExpressions.Count); foreach (MatchedExpression expr in matchedExpressions) { // Make sure we have the correct type (instead of just MatchedExpression) //timeExpressions.add(TimeExpression.TimeExpressionConverter.apply(expr)); // TODO: Fix the extraction pipeline so it creates TimeExpression instead of MatchedExpressions // For now, grab the time expression from the annotation (this is good, so we don't have duplicate copies) TimeExpression annoTe = expr.GetAnnotation().Get(typeof(TimeExpression.Annotation)); if (annoTe != null) { timeExpressions.Add(annoTe); } } // We cache the document date in the timeIndex if (timeIndex.docDate == null) { if (refDate != null) { timeIndex.docDate = refDate; } else { if (options.searchForDocDate) { // there was no document date but option was set to look for document date timeIndex.docDate = FindReferenceDate(timeExpressions); } } } // Didn't have a reference date - try using cached doc date if (refDate == null) { refDate = timeIndex.docDate; } // Some resolving is done even if refDate null... ResolveTimeExpressions(annotation, timeExpressions, refDate); if (options.restrictToTimex3) { // Keep only TIMEX3 compatible timeExpressions IList <TimeExpression> kept = new List <TimeExpression>(timeExpressions.Count); foreach (TimeExpression te in timeExpressions) { if (te.GetTemporal() != null && te.GetTemporal().GetTimexValue() != null) { kept.Add(te); } else { IList <ICoreMap> children = te.GetAnnotation().Get(typeof(TimeExpression.ChildrenAnnotation)); if (children != null) { foreach (ICoreMap child in children) { TimeExpression childTe = child.Get(typeof(TimeExpression.Annotation)); if (childTe != null) { ResolveTimeExpression(annotation, childTe, refDate); if (childTe.GetTemporal() != null && childTe.GetTemporal().GetTimexValue() != null) { kept.Add(childTe); } } } } } } timeExpressions = kept; } // Add back nested time expressions for ranges.... // For now only one level of nesting... if (options.includeNested) { IList <TimeExpression> nestedTimeExpressions = new List <TimeExpression>(); foreach (TimeExpression te in timeExpressions) { if (te.IsIncludeNested()) { IList <ICoreMap> children = te.GetAnnotation().Get(typeof(TimeExpression.ChildrenAnnotation)); if (children != null) { foreach (ICoreMap child in children) { TimeExpression childTe = child.Get(typeof(TimeExpression.Annotation)); if (childTe != null) { nestedTimeExpressions.Add(childTe); } } } } } ResolveTimeExpressions(annotation, nestedTimeExpressions, refDate); Sharpen.Collections.AddAll(timeExpressions, nestedTimeExpressions); } timeExpressions.Sort(MatchedExpression.ExprTokenOffsetsNestedFirstComparator); // Some resolving is done even if refDate null... ResolveTimeExpressions(annotation, timeExpressions, refDate); return(timeExpressions); }