// Cases like "3 days from today", "5 weeks before yesterday", "2 months after tomorrow" // Note that these cases are of type "date" private List <Token> ExtractRelativeDurationDate(string text, List <Token> tokens, DateObject reference) { var ret = new List <Token>(); var tempTokens = new List <Token>(tokens); var durationEr = Config.DurationExtractor.Extract(text, reference); foreach (var er in durationEr) { // if it is a multiple duration but its type is not equal to Date, skip it here if (IsMultipleDuration(er) && !IsMultipleDurationDate(er)) { continue; } // Some types of duration can be compounded with "before", "after" or "from" suffix to create a "date" // While some other types of durations, when compounded with such suffix, it will not create a "date", but create a "dateperiod" // For example, durations like "3 days", "2 weeks", "1 week and 2 days", can be compounded with such suffix to create a "date" // But "more than 3 days", "less than 2 weeks", when compounded with such suffix, it will become cases like "more than 3 days from today" which is a "dateperiod", not a "date" // As this parent method is aimed to extract RelativeDurationDate, so for cases with "more than" or "less than", we remove the prefix so as to extract the expected RelativeDurationDate if (IsInequalityDuration(er)) { StripInequalityDuration(er); } var match = Config.DateUnitRegex.Match(er.Text); if (match.Success) { ret.AddRange(AgoLaterUtil.ExtractorDurationWithBeforeAndAfter(text, er, ret, Config.UtilityConfiguration)); // Take into account also holiday dates if (ret.Count < 1) { var holidayEr = Config.HolidayExtractor.Extract(text, reference); foreach (var holiday in holidayEr) { tempTokens.Add(new Token((int)holiday.Start, (int)(holiday.Start + holiday.Length))); } } // Check for combined patterns Duration + Date, e.g. '3 days before Monday', '4 weeks after January 15th' if (ret.Count < 1 && tempTokens.Count > 0 && er.Text != match.Value) { var afterStr = text.Substring((int)er.Start + (int)er.Length); var connector = Config.BeforeAfterRegex.MatchBegin(afterStr, trim: true); if (connector.Success) { foreach (var token in tempTokens) { var start = (int)er.Start + (int)er.Length + connector.Index + connector.Length; var length = token.Start - start; if (length > 0 && start + length < text.Length && string.IsNullOrWhiteSpace(text.Substring(start, length))) { Token tok = new Token((int)er.Start, token.End); ret.Add(tok); } } } } } } // Extract cases like "in 3 weeks", which equals to "3 weeks from today" var relativeDurationDateWithInPrefix = ExtractRelativeDurationDateWithInPrefix(text, durationEr, reference); // For cases like "in 3 weeks from today", we should choose "3 weeks from today" as the extract result rather than "in 3 weeks" or "in 3 weeks from today" foreach (var extractResultWithInPrefix in relativeDurationDateWithInPrefix) { if (!IsOverlapWithExistExtractions(extractResultWithInPrefix, ret)) { ret.Add(extractResultWithInPrefix); } } return(ret); }
// Used in ExtractRelativeDurationDateWithInPrefix to extract the connector "in" in cases like "In 3 days/weeks/months/years" private List <Token> ExtractInConnector(string text, string firstStr, string secondStr, Token duration, out bool success, bool inPrefix) { List <Token> ret = new List <Token>(); var match = inPrefix ? Config.InConnectorRegex.MatchEnd(firstStr, trim: true) : Config.InConnectorRegex.MatchBegin(firstStr, trim: true); success = match.Success; if (match.Success) { var rangeUnitMatch = Config.RangeUnitRegex.Match(text.Substring(duration.Start, duration.Length)); if (rangeUnitMatch.Success) { var sinceYearMatch = Config.SinceYearSuffixRegex.Match(secondStr); if (sinceYearMatch.Success) { var start = inPrefix ? match.Index : sinceYearMatch.Index; var end = inPrefix ? duration.End + sinceYearMatch.Length : duration.End + match.Index + match.Length; ret.Add(new Token(start, end)); } else { var start = inPrefix ? match.Index : duration.Start; var end = inPrefix ? duration.End : duration.End + match.Index + match.Length; ret.Add(new Token(start, end)); } } } return(ret); }
// Cases like "today after 2:00pm", "1/1/2015 before 2:00 in the afternoon" private IEnumerable <Token> MergeDateWithTimePeriodSuffix(string text, List <ExtractResult> dateErs, List <ExtractResult> timeErs) { var ret = new List <Token>(); if (!dateErs.Any()) { return(ret); } if (!timeErs.Any()) { return(ret); } var ers = dateErs; ers.AddRange(timeErs); ers = ers.OrderBy(o => o.Start).ToList(); var i = 0; while (i < ers.Count - 1) { var j = i + 1; while (j < ers.Count && ers[i].IsOverlap(ers[j])) { j++; } if (j >= ers.Count) { break; } if (ers[i].Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) && ers[j].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) { var middleBegin = ers[i].Start + ers[i].Length ?? 0; var middleEnd = ers[j].Start ?? 0; if (middleBegin > middleEnd) { i = j + 1; continue; } var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); if (IsValidConnectorForDateAndTimePeriod(middleStr)) { var begin = ers[i].Start ?? 0; var end = (ers[j].Start ?? 0) + (ers[j].Length ?? 0); ret.Add(new Token(begin, end)); } i = j + 1; continue; } i = j; } // Handle "in the afternoon" at the end of entity for (var idx = 0; idx < ret.Count; idx++) { var afterStr = text.Substring(ret[idx].End); var match = this.config.SuffixRegex.Match(afterStr); if (match.Success) { ret[idx] = new Token(ret[idx].Start, ret[idx].End + match.Length); } } return(ret); }