// Handle cases like "三天前" "Three days ago" private DateTimeResolutionResult ParserDurationWithAgoAndLater(string text, DateObject referenceDate) { var ret = new DateTimeResolutionResult(); var numStr = string.Empty; var unitStr = string.Empty; var durationRes = this.config.DurationExtractor.Extract(text, referenceDate); if (durationRes.Count > 0) { var match = this.config.UnitRegex.Match(text); if (match.Success) { var suffix = text.Substring((int)durationRes[0].Start + (int)durationRes[0].Length).Trim(); var srcUnit = match.Groups["unit"].Value; var numberStr = text.Substring((int)durationRes[0].Start, match.Index - (int)durationRes[0].Start).Trim(); var unitMatch = this.config.DurationRelativeDurationUnitRegex.Match(text); // set the inexact number "数" (few) to 3 for now var number = numberStr.Equals(unitMatch.Groups["few"].Value, StringComparison.Ordinal) ? 3 : ConvertCJKToNum(numberStr); if (!numberStr.Equals(unitMatch.Groups["few"].Value, StringComparison.Ordinal)) { if (suffix.Equals(unitMatch.Value, StringComparison.Ordinal)) { var pr = this.config.DurationParser.Parse(durationRes[0], referenceDate); var future = suffix.Equals(unitMatch.Groups["later"].Value, StringComparison.Ordinal); int swift = 0; if (pr != null) { var resultDateTime = DurationParsingUtil.ShiftDateTime(pr.TimexStr, referenceDate.AddDays(swift), future); ret.Timex = $"{DateTimeFormatUtil.LuisDate(resultDateTime)}"; ret.FutureValue = ret.PastValue = resultDateTime; ret.Success = true; return(ret); } } } if (this.config.UnitMap.ContainsKey(srcUnit)) { unitStr = this.config.UnitMap[srcUnit]; ret.Timex = TimexUtility.GenerateDurationTimex(number, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); DateObject date = Constants.InvalidDate; var beforeMatch = this.config.BeforeRegex.Match(suffix); if (beforeMatch.Success && suffix.StartsWith(beforeMatch.Value, StringComparison.Ordinal)) { date = DurationParsingUtil.ShiftDateTime(ret.Timex, referenceDate, future: false); } var afterMatch = this.config.AfterRegex.Match(suffix); if (afterMatch.Success && suffix.StartsWith(afterMatch.Value, StringComparison.Ordinal)) { date = DurationParsingUtil.ShiftDateTime(ret.Timex, referenceDate, future: true); } if (date != Constants.InvalidDate) { ret.Timex = $"{DateTimeFormatUtil.LuisDate(date)}"; ret.FutureValue = ret.PastValue = date; ret.Success = true; return(ret); } } } } return(ret); }
public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) { var referenceTime = refDate; var dateTimeParseResult = ParseMergedDuration(er.Text, referenceTime); if (!dateTimeParseResult.Success) { dateTimeParseResult = DurationParsingUtil.ParseInexactNumberUnit(er.Text, this.config); } if (!dateTimeParseResult.Success) { var parseResult = this.config.InternalParser.Parse(er); var unitResult = parseResult.Value as UnitValue; if (unitResult == null) { return(null); } var unitStr = unitResult.Unit; var number = string.IsNullOrEmpty(unitResult.Number) ? 1 : double.Parse(unitResult.Number, CultureInfo.InvariantCulture); dateTimeParseResult.Timex = TimexUtility.GenerateDurationTimex(number, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); dateTimeParseResult.FutureValue = dateTimeParseResult.PastValue = number * this.config.UnitValueMap[unitStr]; dateTimeParseResult.Success = true; } if (dateTimeParseResult.Success) { dateTimeParseResult.FutureResolution = new Dictionary <string, string> { { TimeTypeConstants.DURATION, dateTimeParseResult.FutureValue.ToString() }, }; dateTimeParseResult.PastResolution = new Dictionary <string, string> { { TimeTypeConstants.DURATION, dateTimeParseResult.PastValue.ToString() }, }; } if (dateTimeParseResult.Success) { var moreOrLessMatch = config.MoreOrLessRegex.Match(er.Text); if (moreOrLessMatch.Success) { if (moreOrLessMatch.Groups["less"].Success) { dateTimeParseResult.Mod = Constants.LESS_THAN_MOD; } else if (moreOrLessMatch.Groups["more"].Success) { dateTimeParseResult.Mod = Constants.MORE_THAN_MOD; } } } var ret = new DateTimeParseResult { Text = er.Text, Start = er.Start, Length = er.Length, Type = er.Type, Data = er.Data, Value = dateTimeParseResult, TimexStr = dateTimeParseResult.Timex, ResolutionStr = string.Empty, }; return(ret); }
private bool TryGetResultFromRegex(Regex regex, string text, string numStr, out DateTimeResolutionResult ret) { ret = new DateTimeResolutionResult(); var match = regex.Match(text); if (match.Success) { var srcUnit = match.Groups["unit"].Value; if (this.config.UnitValueMap.ContainsKey(srcUnit)) { var unitStr = this.config.UnitMap[srcUnit]; var numVal = double.Parse(numStr, CultureInfo.InvariantCulture); ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit]; ret.Success = true; } } return(match.Success); }
// handle cases that don't contain numbers private DateTimeResolutionResult ParseImplicitDuration(string text, DateObject referenceTime) { var ret = new DateTimeResolutionResult(); // handle "all day" "all year" if (TryGetResultFromRegex(config.AllDateUnitRegex, text, "1", out var result)) { ret = result; } // handle "half day", "half year" if (TryGetResultFromRegex(config.HalfDateUnitRegex, text, "0.5", out result)) { ret = result; } // handle single duration unit, it is filtered in the extraction that there is a relative word in advance if (TryGetResultFromRegex(config.FollowedUnit, text, "1", out result)) { ret = result; } // handle "during/for the day/week/month/year" if ((config.Options & DateTimeOptions.CalendarMode) != 0 && TryGetResultFromRegex(config.DuringRegex, text, "1", out result)) { ret = result; } else { // handle cases like "the hour", which are special durations always not in CalendarMode if ((this.config.Options & DateTimeOptions.CalendarMode) == 0) { var regex = this.config.PrefixArticleRegex; if (regex != null) { var match = RegExpUtility.MatchBegin(regex, text, false); if (match.Success) { var srcUnit = text.Substring(match.Length); if (this.config.UnitValueMap.ContainsKey(srcUnit)) { var numStr = "1"; var unitStr = this.config.UnitMap[srcUnit]; var numVal = double.Parse(numStr, CultureInfo.InvariantCulture); ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit]; ret.Success = true; } } } } } return(ret); }
private DateTimeResolutionResult ParseAnUnit(string text) { var ret = new DateTimeResolutionResult(); var suffixStr = text; var match = this.config.AnUnitRegex.Match(text); if (!match.Success) { match = this.config.HalfDateUnitRegex.Match(text); } if (match.Success) { var numVal = match.Groups["half"].Success ? 0.5 : 1; numVal = match.Groups["quarter"].Success ? 0.25 : numVal; numVal = match.Groups["threequarter"].Success ? 0.75 : numVal; numVal += ParseNumberWithUnitAndSuffix(suffixStr); var srcUnit = match.Groups["unit"].Value; if (this.config.UnitMap.ContainsKey(srcUnit)) { var unitStr = this.config.UnitMap[srcUnit]; ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit]; ret.Success = true; } else if (match.Groups[Constants.BusinessDayGroupName].Success) { ret.Timex = TimexUtility.GenerateDurationTimex(numVal, Constants.TimexBusinessDay, false); // The line below was containing this.config.UnitValueMap[srcUnit.Split()[1]] // it was updated to accommodate single word "business day" expressions. ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit.Split()[srcUnit.Split().Length - 1]]; ret.Success = true; } } return(ret); }
private DateTimeResolutionResult ParseNumberCombinedUnit(string text) { var ret = new DateTimeResolutionResult(); var suffixStr = text; // if there are NO spaces between number and unit var match = this.config.NumberCombinedWithUnit.Match(text); if (match.Success) { var numVal = double.Parse(match.Groups["num"].Value, CultureInfo.InvariantCulture) + ParseNumberWithUnitAndSuffix(suffixStr); var srcUnit = match.Groups["unit"].Value; if (this.config.UnitMap.ContainsKey(srcUnit)) { var unitStr = this.config.UnitMap[srcUnit]; if (numVal > 1000 && (unitStr.Equals(Constants.TimexYear, StringComparison.Ordinal) || unitStr.Equals(Constants.TimexMonthFull, StringComparison.Ordinal) || unitStr.Equals(Constants.TimexWeek, StringComparison.Ordinal))) { return(ret); } ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit]; ret.Success = true; return(ret); } } return(ret); }
private DateTimeResolutionResult ParseNumberSpaceUnit(string text) { var ret = new DateTimeResolutionResult(); var suffixStr = text; // if there are spaces between number and unit var ers = ExtractNumbersBeforeUnit(text); if (ers.Count == 1) { var pr = this.config.NumberParser.Parse(ers[0]); // followed unit: {num} (<followed unit>and a half hours) var srcUnit = string.Empty; var noNum = text.Substring(ers[0].Start + ers[0].Length ?? 0).Trim(); var match = this.config.FollowedUnit.Match(noNum); if (match.Success) { srcUnit = match.Groups["unit"].Value; suffixStr = match.Groups[Constants.SuffixGroupName].Value; // check also beforeStr for "and an half" if (this.config.CheckBothBeforeAfter && string.IsNullOrEmpty(suffixStr)) { noNum = text.Substring(0, (int)ers[0].Start).Trim(); var prefixMatch = this.config.SuffixAndRegex.Match(noNum); if (prefixMatch.Success) { suffixStr = prefixMatch.Groups[Constants.SuffixGroupName].Value; } } } if (match.Success && match.Groups[Constants.BusinessDayGroupName].Success) { var numVal = int.Parse(pr.Value.ToString(), CultureInfo.InvariantCulture); ret.Timex = TimexUtility.GenerateDurationTimex(numVal, Constants.TimexBusinessDay, false); // The line below was containing this.config.UnitValueMap[srcUnit.Split()[1]] // it was updated to accommodate single word "business day" expressions. ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit.Split()[srcUnit.Split().Length - 1]]; ret.Success = true; return(ret); } if (this.config.UnitMap.TryGetValue(srcUnit, out var unitStr)) { // First try to parse combined expression 'num + suffix' double numVal; var combStr = pr.Text + " " + suffixStr; if (this.config.DoubleNumbers.ContainsKey(combStr)) { numVal = ParseNumberWithUnitAndSuffix(combStr); } else { numVal = double.Parse(pr.Value.ToString(), CultureInfo.InvariantCulture) + ParseNumberWithUnitAndSuffix(suffixStr); } ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit]; ret.Success = true; return(ret); } } return(ret); }