public List <ExtractResult> Extract(string text, DateObject reference) { var ret = new List <ExtractResult>(); var originText = text; List <MatchResult <string> > superfluousWordMatches = null; if ((this.config.Options & DateTimeOptions.EnablePreview) != 0) { text = MatchingUtil.PreProcessTextRemoveSuperfluousWords(text, this.config.SuperfluousWordMatcher, out superfluousWordMatches); } // The order is important, since there can be conflicts in merging AddTo(ret, this.config.DateExtractor.Extract(text, reference), text); AddTo(ret, this.config.TimeExtractor.Extract(text, reference), text); AddTo(ret, this.config.DatePeriodExtractor.Extract(text, reference), text); AddTo(ret, this.config.DurationExtractor.Extract(text, reference), text); AddTo(ret, this.config.TimePeriodExtractor.Extract(text, reference), text); AddTo(ret, this.config.DateTimePeriodExtractor.Extract(text, reference), text); AddTo(ret, this.config.DateTimeExtractor.Extract(text, reference), text); AddTo(ret, this.config.SetExtractor.Extract(text, reference), text); AddTo(ret, this.config.HolidayExtractor.Extract(text, reference), text); if ((this.config.Options & DateTimeOptions.EnablePreview) != 0) { AddTo(ret, this.config.TimeZoneExtractor.Extract(text, reference), text); ret = this.config.TimeZoneExtractor.RemoveAmbiguousTimezone(ret); } // This should be at the end since if need the extractor to determine the previous text contains time or not AddTo(ret, NumberEndingRegexMatch(text, ret), text); // Modify time entity to an alternative DateTime expression if it follows a DateTime entity if ((this.config.Options & DateTimeOptions.ExtendedTypes) != 0) { ret = this.config.DateTimeAltExtractor.Extract(ret, text, reference); } ret = FilterUnspecificDatePeriod(ret); ret = FilterAmbiguity(ret, text); ret = AddMod(ret, text); // Filtering if ((this.config.Options & DateTimeOptions.CalendarMode) != 0) { ret = CheckCalendarFilterList(ret, text); } ret = ret.OrderBy(p => p.Start).ToList(); if ((this.config.Options & DateTimeOptions.EnablePreview) != 0) { ret = MatchingUtil.PosProcessExtractionRecoverSuperfluousWords(ret, superfluousWordMatches, originText); } return(ret); }
public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) { var referenceTime = refTime; DateTimeParseResult pr = null; var originText = er.Text; if ((this.Config.Options & DateTimeOptions.EnablePreview) != 0) { er.Text = MatchingUtil.PreProcessTextRemoveSuperfluousWords(er.Text, Config.SuperfluousWordMatcher, out var _); er.Length += er.Text.Length - originText.Length; } bool hasBefore = false, hasAfter = false, hasSince = false, hasAround = false, hasEqual = false, hasDateAfter = false; // "InclusiveModifier" means MOD should include the start/end time // For example, cases like "on or later than", "earlier than or in" have inclusive modifier var hasInclusiveModifier = false; var matchIsAfter = false; var modStr = string.Empty; // Analyze and process modifiers // Push, save the MOD string if (er.Metadata != null && er.Metadata.HasMod) { var beforeMatch = Config.BeforeRegex.MatchBegin(er.Text, trim: true); var afterMatch = Config.AfterRegex.MatchBegin(er.Text, trim: true); var sinceMatch = Config.SinceRegex.MatchBegin(er.Text, trim: true); var aroundMatch = Config.AroundRegex.MatchBegin(er.Text, trim: true); var equalMatch = Config.EqualRegex.MatchBegin(er.Text, trim: true); // check also after match if (this.Config.CheckBothBeforeAfter && er.Data != null && er.Data.Equals(Constants.HAS_MOD)) { if (!beforeMatch.Success) { beforeMatch = Config.BeforeRegex.MatchEnd(er.Text, trim: true); matchIsAfter = matchIsAfter || beforeMatch.Success; } if (!afterMatch.Success) { afterMatch = Config.AfterRegex.MatchEnd(er.Text, trim: true); matchIsAfter = matchIsAfter || afterMatch.Success; } if (!sinceMatch.Success) { sinceMatch = Config.SinceRegex.MatchEnd(er.Text, trim: true); matchIsAfter = matchIsAfter || sinceMatch.Success; } if (!aroundMatch.Success) { aroundMatch = Config.AroundRegex.MatchEnd(er.Text, trim: true); matchIsAfter = matchIsAfter || aroundMatch.Success; } if (!equalMatch.Success) { equalMatch = Config.EqualRegex.MatchEnd(er.Text, trim: true); matchIsAfter = matchIsAfter || equalMatch.Success; } } if (beforeMatch.Success) { hasBefore = true; er.Start += matchIsAfter ? 0 : beforeMatch.Length; er.Length -= beforeMatch.Length; er.Text = matchIsAfter ? er.Text.Substring(0, (int)er.Length) : er.Text.Substring(beforeMatch.Length); modStr = beforeMatch.Value; if (!string.IsNullOrEmpty(beforeMatch.Groups[Constants.IncludeGroupName].Value)) { hasInclusiveModifier = true; } } else if (afterMatch.Success) { hasAfter = true; er.Start += matchIsAfter ? 0 : afterMatch.Length; er.Length -= afterMatch.Length; er.Text = matchIsAfter ? er.Text.Substring(0, (int)er.Length) : er.Text.Substring(afterMatch.Length); modStr = afterMatch.Value; if (!string.IsNullOrEmpty(afterMatch.Groups[Constants.IncludeGroupName].Value)) { hasInclusiveModifier = true; } } else if (sinceMatch.Success) { hasSince = true; er.Start += matchIsAfter ? 0 : sinceMatch.Length; er.Length -= sinceMatch.Length; er.Text = matchIsAfter ? er.Text.Substring(0, (int)er.Length) : er.Text.Substring(sinceMatch.Length); modStr = sinceMatch.Value; } else if (aroundMatch.Success) { hasAround = true; er.Start += matchIsAfter ? 0 : aroundMatch.Length; er.Length -= aroundMatch.Length; er.Text = matchIsAfter ? er.Text.Substring(0, (int)er.Length) : er.Text.Substring(aroundMatch.Length); modStr = aroundMatch.Value; } else if (equalMatch.Success) { hasEqual = true; er.Start += matchIsAfter ? 0 : equalMatch.Length; er.Length -= equalMatch.Length; er.Text = matchIsAfter ? er.Text.Substring(0, (int)er.Length) : er.Text.Substring(equalMatch.Length); modStr = equalMatch.Value; } else if ((er.Type.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal) && Config.YearRegex.Match(er.Text).Success) || er.Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) || er.Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) { // This has to be put at the end of the if, or cases like "before 2012" and "after 2012" would fall into this // 2012 or after/above // 3 pm or later var match = Config.SuffixAfter.MatchEnd(er.Text, trim: true); if (match.Success) { hasDateAfter = true; er.Length -= match.Length; er.Text = er.Text.Substring(0, er.Length ?? 0); modStr = match.Value; } } } // Parse extracted datetime mention pr = ParseResult(er, referenceTime); if (pr == null) { return(null); } // Apply processed modifiers // Pop, restore the MOD string if (hasBefore && pr.Value != null) { pr.Length += modStr.Length; pr.Start -= matchIsAfter ? 0 : modStr.Length; pr.Text = matchIsAfter ? pr.Text + modStr : modStr + pr.Text; var val = (DateTimeResolutionResult)pr.Value; val.Mod = CombineMod(val.Mod, !hasInclusiveModifier ? Constants.BEFORE_MOD : Constants.UNTIL_MOD); pr.Value = val; } if (hasAfter && pr.Value != null) { pr.Length += modStr.Length; pr.Start -= matchIsAfter ? 0 : modStr.Length; pr.Text = matchIsAfter ? pr.Text + modStr : modStr + pr.Text; var val = (DateTimeResolutionResult)pr.Value; if (!hasInclusiveModifier) { val.Mod = CombineMod(val.Mod, Constants.AFTER_MOD); } else { val.Mod = CombineMod(val.Mod, Constants.SINCE_MOD); } pr.Value = val; } if (hasSince && pr.Value != null) { pr.Length += modStr.Length; pr.Start -= matchIsAfter ? 0 : modStr.Length; pr.Text = matchIsAfter ? pr.Text + modStr : modStr + pr.Text; var val = (DateTimeResolutionResult)pr.Value; val.Mod = CombineMod(val.Mod, Constants.SINCE_MOD); pr.Value = val; } if (hasAround && pr.Value != null) { pr.Length += modStr.Length; pr.Start -= matchIsAfter ? 0 : modStr.Length; pr.Text = matchIsAfter ? pr.Text + modStr : modStr + pr.Text; var val = (DateTimeResolutionResult)pr.Value; val.Mod = CombineMod(val.Mod, Constants.APPROX_MOD); pr.Value = val; } if (hasEqual && pr.Value != null) { pr.Length += modStr.Length; pr.Start -= matchIsAfter ? 0 : modStr.Length; pr.Text = matchIsAfter ? pr.Text + modStr : modStr + pr.Text; } if (hasDateAfter && pr.Value != null) { pr.Length += modStr.Length; pr.Text = pr.Text + modStr; var val = (DateTimeResolutionResult)pr.Value; val.Mod = CombineMod(val.Mod, Constants.SINCE_MOD); pr.Value = val; hasSince = true; } // For cases like "3 pm or later on monday" if (pr.Value != null && Config.SuffixAfter.Match(pr.Text)?.Index != 0 && pr.Type.Equals(Constants.SYS_DATETIME_DATETIME, StringComparison.Ordinal) && !this.Config.CheckBothBeforeAfter) { var val = (DateTimeResolutionResult)pr.Value; val.Mod = CombineMod(val.Mod, Constants.SINCE_MOD); pr.Value = val; hasSince = true; } if ((Config.Options & DateTimeOptions.SplitDateAndTime) != 0 && ((DateTimeResolutionResult)pr?.Value)?.SubDateTimeEntities != null) { pr.Value = DateTimeResolutionForSplit(pr); } else { var hasRangeChangingMod = hasBefore || hasAfter || hasSince; if (pr.Value != null) { ((DateTimeResolutionResult)pr.Value).HasRangeChangingMod = hasRangeChangingMod; } pr = SetParseResult(pr, hasRangeChangingMod); } // In this version, ExperimentalMode only cope with the "IncludePeriodEnd" case if ((this.Config.Options & DateTimeOptions.ExperimentalMode) != 0) { if (pr?.Metadata != null && pr.Metadata.PossiblyIncludePeriodEnd) { pr = SetInclusivePeriodEnd(pr); } } if ((this.Config.Options & DateTimeOptions.EnablePreview) != 0) { if (pr != null) { pr.Length += originText.Length - pr.Text.Length; pr.Text = originText; } } return(pr); }
public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) { var referenceTime = refTime; DateTimeParseResult pr = null; var originText = er.Text; if ((this.Config.Options & DateTimeOptions.EnablePreview) != 0) { er.Text = MatchingUtil.PreProcessTextRemoveSuperfluousWords(er.Text, Config.SuperfluousWordMatcher, out var _); er.Length += er.Text.Length - originText.Length; } // Push, save the MOD string bool hasBefore = false, hasAfter = false, hasSince = false, hasAround = false, hasDateAfter = false; // "InclusiveModifier" means MOD should include the start/end time // For example, cases like "on or later than", "earlier than or in" have inclusive modifier bool hasInclusiveModifier = false; var modStr = string.Empty; var beforeMatch = Config.BeforeRegex.MatchBegin(er.Text, trim: true); var afterMatch = Config.AfterRegex.MatchBegin(er.Text, trim: true); var sinceMatch = Config.SinceRegex.MatchBegin(er.Text, trim: true); var aroundMatch = Config.AroundRegex.MatchBegin(er.Text, trim: true); if (beforeMatch.Success) { hasBefore = true; er.Start += beforeMatch.Length; er.Length -= beforeMatch.Length; er.Text = er.Text.Substring(beforeMatch.Length); modStr = beforeMatch.Value; if (!string.IsNullOrEmpty(beforeMatch.Groups["include"].Value)) { hasInclusiveModifier = true; } } else if (afterMatch.Success) { hasAfter = true; er.Start += afterMatch.Length; er.Length -= afterMatch.Length; er.Text = er.Text.Substring(afterMatch.Length); modStr = afterMatch.Value; if (!string.IsNullOrEmpty(afterMatch.Groups["include"].Value)) { hasInclusiveModifier = true; } } else if (sinceMatch.Success) { hasSince = true; er.Start += sinceMatch.Length; er.Length -= sinceMatch.Length; er.Text = er.Text.Substring(sinceMatch.Length); modStr = sinceMatch.Value; } else if (aroundMatch.Success) { hasAround = true; er.Start += aroundMatch.Length; er.Length -= aroundMatch.Length; er.Text = er.Text.Substring(aroundMatch.Length); modStr = aroundMatch.Value; } else if ((er.Type.Equals(Constants.SYS_DATETIME_DATEPERIOD) && Config.YearRegex.Match(er.Text).Success) || er.Type.Equals(Constants.SYS_DATETIME_DATE)) { // This has to be put at the end of the if, or cases like "before 2012" and "after 2012" would fall into this // 2012 or after/above var match = Config.DateAfter.MatchEnd(er.Text, trim: true); if (match.Success) { hasDateAfter = true; er.Length -= match.Length; er.Text = er.Text.Substring(0, er.Length ?? 0); modStr = match.Value; } } if (er.Type.Equals(Constants.SYS_DATETIME_DATE)) { pr = this.Config.DateParser.Parse(er, referenceTime); if (pr.Value == null) { pr = Config.HolidayParser.Parse(er, referenceTime); } } else if (er.Type.Equals(Constants.SYS_DATETIME_TIME)) { pr = this.Config.TimeParser.Parse(er, referenceTime); } else if (er.Type.Equals(Constants.SYS_DATETIME_DATETIME)) { pr = this.Config.DateTimeParser.Parse(er, referenceTime); } else if (er.Type.Equals(Constants.SYS_DATETIME_DATEPERIOD)) { pr = this.Config.DatePeriodParser.Parse(er, referenceTime); } else if (er.Type.Equals(Constants.SYS_DATETIME_TIMEPERIOD)) { pr = this.Config.TimePeriodParser.Parse(er, referenceTime); } else if (er.Type.Equals(Constants.SYS_DATETIME_DATETIMEPERIOD)) { pr = this.Config.DateTimePeriodParser.Parse(er, referenceTime); } else if (er.Type.Equals(Constants.SYS_DATETIME_DURATION)) { pr = this.Config.DurationParser.Parse(er, referenceTime); } else if (er.Type.Equals(Constants.SYS_DATETIME_SET)) { pr = this.Config.SetParser.Parse(er, referenceTime); } else if (er.Type.Equals(Constants.SYS_DATETIME_DATETIMEALT)) { pr = this.Config.DateTimeAltParser.Parse(er, referenceTime); } else if (er.Type.Equals(Constants.SYS_DATETIME_TIMEZONE)) { if ((Config.Options & DateTimeOptions.EnablePreview) != 0) { pr = this.Config.TimeZoneParser.Parse(er, referenceTime); } } else { return(null); } // Pop, restore the MOD string if (hasBefore && (pr != null && pr.Value != null)) { pr.Length += modStr.Length; pr.Start -= modStr.Length; pr.Text = modStr + pr.Text; var val = (DateTimeResolutionResult)pr.Value; if (!hasInclusiveModifier) { val.Mod = Constants.BEFORE_MOD; } else { val.Mod = Constants.UNTIL_MOD; } pr.Value = val; } if (hasAfter && (pr != null && pr.Value != null)) { pr.Length += modStr.Length; pr.Start -= modStr.Length; pr.Text = modStr + pr.Text; var val = (DateTimeResolutionResult)pr.Value; if (!hasInclusiveModifier) { val.Mod = Constants.AFTER_MOD; } else { val.Mod = Constants.SINCE_MOD; } pr.Value = val; } if (hasSince && (pr != null && pr.Value != null)) { pr.Length += modStr.Length; pr.Start -= modStr.Length; pr.Text = modStr + pr.Text; var val = (DateTimeResolutionResult)pr.Value; val.Mod = Constants.SINCE_MOD; pr.Value = val; } if (hasAround && (pr != null && pr.Value != null)) { pr.Length += modStr.Length; pr.Start -= modStr.Length; pr.Text = modStr + pr.Text; var val = (DateTimeResolutionResult)pr.Value; val.Mod = Constants.APPROX_MOD; pr.Value = val; } if (hasDateAfter && (pr != null && pr.Value != null)) { pr.Length += modStr.Length; pr.Text = pr.Text + modStr; var val = (DateTimeResolutionResult)pr.Value; val.Mod = Constants.SINCE_MOD; pr.Value = val; hasSince = true; } if ((Config.Options & DateTimeOptions.SplitDateAndTime) != 0 && ((DateTimeResolutionResult)pr?.Value)?.SubDateTimeEntities != null) { if (pr != null) { pr.Value = DateTimeResolutionForSplit(pr); } } else { var hasModifier = hasBefore || hasAfter || hasSince; pr = SetParseResult(pr, hasModifier); } // In this version, ExperimentalMode only cope with the "IncludePeriodEnd" case if ((this.Config.Options & DateTimeOptions.ExperimentalMode) != 0) { if (pr?.Metadata != null && pr.Metadata.PossiblyIncludePeriodEnd) { pr = SetInclusivePeriodEnd(pr); } } if ((this.Config.Options & DateTimeOptions.EnablePreview) != 0) { pr.Length += originText.Length - pr.Text.Length; pr.Text = originText; } return(pr); }