private DateTimeResolutionResult ParseNumberCombinedUnit(string text) { var ret = new DateTimeResolutionResult(); var suffixStr = text; // if there are NO spaces between number and unit var match = this.config.NumberCombinedWithUnit.Match(text); if (match.Success) { var numVal = double.Parse(match.Groups["num"].Value) + ParseNumberWithUnitAndSuffix(suffixStr); var numStr = numVal.ToString(CultureInfo.InvariantCulture); var srcUnit = match.Groups["unit"].Value.ToLower(); if (this.config.UnitMap.ContainsKey(srcUnit)) { var unitStr = this.config.UnitMap[srcUnit]; if (double.Parse(numStr) > 1000 && (unitStr.Equals(Constants.TimexYear) || unitStr.Equals(Constants.TimexMonthFull) || unitStr.Equals(Constants.TimexWeek))) { return(ret); } ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = double.Parse(numStr) * this.config.UnitValueMap[srcUnit]; ret.Success = true; return(ret); } } return(ret); }
private DateTimeResolutionResult ParseNumberSpaceUnit(string text) { var ret = new DateTimeResolutionResult(); var suffixStr = text; // if there are spaces between number and unit var ers = ExtractNumbersBeforeUnit(text); if (ers.Count == 1) { var pr = this.config.NumberParser.Parse(ers[0]); // followed unit: {num} (<followed unit>and a half hours) var srcUnit = string.Empty; var noNum = text.Substring(ers[0].Start + ers[0].Length ?? 0).Trim(); var match = this.config.FollowedUnit.Match(noNum); if (match.Success) { srcUnit = match.Groups["unit"].Value; suffixStr = match.Groups[Constants.SuffixGroupName].Value; // check also beforeStr for "and an half" if (this.config.CheckBothBeforeAfter && string.IsNullOrEmpty(suffixStr)) { noNum = text.Substring(0, (int)ers[0].Start).Trim(); var prefixMatch = this.config.SuffixAndRegex.Match(noNum); if (prefixMatch.Success) { suffixStr = prefixMatch.Groups[Constants.SuffixGroupName].Value; } } } if (match.Success && match.Groups[Constants.BusinessDayGroupName].Success) { var numVal = int.Parse(pr.Value.ToString(), CultureInfo.InvariantCulture); ret.Timex = TimexUtility.GenerateDurationTimex(numVal, Constants.TimexBusinessDay, false); // The line below was containing this.config.UnitValueMap[srcUnit.Split()[1]] // it was updated to accommodate single word "business day" expressions. ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit.Split()[srcUnit.Split().Length - 1]]; ret.Success = true; return(ret); } if (this.config.UnitMap.TryGetValue(srcUnit, out var unitStr)) { var numVal = double.Parse(pr.Value.ToString(), CultureInfo.InvariantCulture) + ParseNumberWithUnitAndSuffix(suffixStr); ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit]; ret.Success = true; return(ret); } } return(ret); }
// handle cases that don't contain numbers private DateTimeResolutionResult ParseImplicitDuration(string text, DateObject referenceTime) { var ret = new DateTimeResolutionResult(); // handle "all day" "all year" if (TryGetResultFromRegex(config.AllDateUnitRegex, text, "1", out var result)) { ret = result; } // handle "half day", "half year" if (TryGetResultFromRegex(config.HalfDateUnitRegex, text, "0.5", out result)) { ret = result; } // handle single duration unit, it is filtered in the extraction that there is a relative word in advance if (TryGetResultFromRegex(config.FollowedUnit, text, "1", out result)) { ret = result; } // handle "during/for the day/week/month/year" if ((config.Options & DateTimeOptions.CalendarMode) != 0 && TryGetResultFromRegex(config.DuringRegex, text, "1", out result)) { ret = result; } else { // handle cases like "the hour", which are special durations always not in CalendarMode if ((this.config.Options & DateTimeOptions.CalendarMode) == 0) { var regex = this.config.PrefixArticleRegex; if (regex != null) { var match = RegExpUtility.MatchBegin(regex, text, false); if (match.Success) { var srcUnit = text.Substring(match.Length); if (this.config.UnitValueMap.ContainsKey(srcUnit)) { var numStr = "1"; var unitStr = this.config.UnitMap[srcUnit]; var numVal = double.Parse(numStr, CultureInfo.InvariantCulture); ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit]; ret.Success = true; } } } } } return(ret); }
public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) { var referenceTime = refDate; var dateTimeParseResult = ParseMergedDuration(er.Text, referenceTime); if (!dateTimeParseResult.Success) { var parseResult = this.config.InternalParser.Parse(er); var unitResult = parseResult.Value as UnitValue; if (unitResult == null) { return(null); } var unitStr = unitResult.Unit; var numStr = unitResult.Number; dateTimeParseResult.Timex = TimexUtility.GenerateDurationTimex(double.Parse(numStr), unitStr, BaseDurationParser.IsLessThanDay(unitStr)); dateTimeParseResult.FutureValue = dateTimeParseResult.PastValue = double.Parse(numStr) * this.config.UnitValueMap[unitStr]; dateTimeParseResult.Success = true; } if (dateTimeParseResult.Success) { dateTimeParseResult.FutureResolution = new Dictionary <string, string> { { TimeTypeConstants.DURATION, dateTimeParseResult.FutureValue.ToString() }, }; dateTimeParseResult.PastResolution = new Dictionary <string, string> { { TimeTypeConstants.DURATION, dateTimeParseResult.PastValue.ToString() }, }; } var ret = new DateTimeParseResult { Text = er.Text, Start = er.Start, Length = er.Length, Type = er.Type, Data = er.Data, Value = dateTimeParseResult, TimexStr = dateTimeParseResult.Timex, ResolutionStr = string.Empty, }; return(ret); }
private DateTimeResolutionResult ParseNumberSpaceUnit(string text) { var ret = new DateTimeResolutionResult(); var suffixStr = text; // if there are spaces between number and unit var ers = ExtractNumbersBeforeUnit(text); if (ers.Count == 1) { var pr = this.config.NumberParser.Parse(ers[0]); // followed unit: {num} (<followed unit>and a half hours) var srcUnit = string.Empty; var noNum = text.Substring(ers[0].Start + ers[0].Length ?? 0).Trim(); var match = this.config.FollowedUnit.Match(noNum); if (match.Success) { srcUnit = match.Groups["unit"].Value; suffixStr = match.Groups[Constants.SuffixGroupName].Value; } if (match.Success && match.Groups[Constants.BusinessDayGroupName].Success) { var numVal = int.Parse(pr.Value.ToString(), CultureInfo.InvariantCulture); ret.Timex = TimexUtility.GenerateDurationTimex(numVal, Constants.TimexBusinessDay, false); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit.Split()[1]]; ret.Success = true; return(ret); } if (this.config.UnitMap.TryGetValue(srcUnit, out var unitStr)) { var numVal = double.Parse(pr.Value.ToString(), CultureInfo.InvariantCulture) + ParseNumberWithUnitAndSuffix(suffixStr); ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit]; ret.Success = true; return(ret); } } return(ret); }
private static DateTimeResolutionResult ParseInexactNumberUnit(string text, Regex inexactNumberUnitRegex, IImmutableDictionary <string, string> unitMap, IImmutableDictionary <string, long> unitValueMap, bool isCJK = false) { var ret = new DateTimeResolutionResult(); var match = inexactNumberUnitRegex.Match(text); if (match.Success) { // set the inexact number "few", "some" to 3 for now double numVal = match.Groups["NumTwoTerm"].Success ? 2 : 3; var srcUnit = match.Groups["unit"].Value; if (unitMap.ContainsKey(srcUnit)) { var unitStr = unitMap[srcUnit]; if (numVal > 1000 && (unitStr.Equals(Constants.TimexYear, StringComparison.Ordinal) || unitStr.Equals(Constants.TimexMonthFull, StringComparison.Ordinal) || unitStr.Equals(Constants.TimexWeek, StringComparison.Ordinal))) { return(ret); } ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, IsLessThanDay(unitStr)); // In CJK implementation unitValueMap uses the unitMap values as keys while // in standard implementation unitMap and unitValueMap have the same keys. var unitValue = isCJK ? unitValueMap[unitStr] : unitValueMap[srcUnit]; ret.FutureValue = ret.PastValue = numVal * unitValue; ret.Success = true; } else if (match.Groups[Constants.BusinessDayGroupName].Success) { ret.Timex = TimexUtility.GenerateDurationTimex(numVal, Constants.TimexBusinessDay, false); // The line below was containing this.config.UnitValueMap[srcUnit.Split()[1]] // it was updated to accommodate single word "business day" expressions. ret.FutureValue = ret.PastValue = numVal * unitValueMap[srcUnit.Split()[srcUnit.Split().Length - 1]]; ret.Success = true; } } return(ret); }
private DateTimeResolutionResult ParseAnUnit(string text) { var ret = new DateTimeResolutionResult(); var suffixStr = text; var match = this.config.AnUnitRegex.Match(text); if (!match.Success) { match = this.config.HalfDateUnitRegex.Match(text); } if (match.Success) { var numVal = match.Groups["half"].Success ? 0.5 : 1; numVal = match.Groups["quarter"].Success ? 0.25 : numVal; numVal = match.Groups["threequarter"].Success ? 0.75 : numVal; numVal += ParseNumberWithUnitAndSuffix(suffixStr); var srcUnit = match.Groups["unit"].Value; if (this.config.UnitMap.ContainsKey(srcUnit)) { var unitStr = this.config.UnitMap[srcUnit]; ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit]; ret.Success = true; } else if (match.Groups[Constants.BusinessDayGroupName].Success) { ret.Timex = TimexUtility.GenerateDurationTimex(numVal, Constants.TimexBusinessDay, false); // The line below was containing this.config.UnitValueMap[srcUnit.Split()[1]] // it was updated to accommodate single word "business day" expressions. ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit.Split()[srcUnit.Split().Length - 1]]; ret.Success = true; } } return(ret); }
private bool TryGetResultFromRegex(Regex regex, string text, string numStr, out DateTimeResolutionResult ret) { ret = new DateTimeResolutionResult(); var match = regex.Match(text); if (match.Success) { var srcUnit = match.Groups["unit"].Value; if (this.config.UnitValueMap.ContainsKey(srcUnit)) { var unitStr = this.config.UnitMap[srcUnit]; var numVal = double.Parse(numStr); ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit]; ret.Success = true; } } return(match.Success); }
private DateTimeResolutionResult ParseInexactNumberUnit(string text) { var ret = new DateTimeResolutionResult(); var match = config.InexactNumberUnitRegex.Match(text); if (match.Success) { // set the inexact number "few", "some" to 3 for now double numVal = match.Groups["NumTwoTerm"].Success ? 2 : 3; var srcUnit = match.Groups["unit"].Value; if (this.config.UnitMap.ContainsKey(srcUnit)) { var unitStr = this.config.UnitMap[srcUnit]; if (numVal > 1000 && (unitStr.Equals(Constants.TimexYear, StringComparison.Ordinal) || unitStr.Equals(Constants.TimexMonthFull, StringComparison.Ordinal) || unitStr.Equals(Constants.TimexWeek, StringComparison.Ordinal))) { return(ret); } ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit]; ret.Success = true; } else if (match.Groups[Constants.BusinessDayGroupName].Success) { ret.Timex = TimexUtility.GenerateDurationTimex(numVal, Constants.TimexBusinessDay, false); // The line below was containing this.config.UnitValueMap[srcUnit.Split()[1]] // it was updated to accommodate single word "business day" expressions. ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit.Split()[srcUnit.Split().Length - 1]]; ret.Success = true; } } return(ret); }
private DateTimeResolutionResult ParseInexactNumberUnit(string text) { var ret = new DateTimeResolutionResult(); var match = config.InexactNumberUnitRegex.Match(text); if (match.Success) { // set the inexact number "few", "some" to 3 for now double numVal = match.Groups["NumTwoTerm"].Success ? 2 : 3; var numStr = numVal.ToString(CultureInfo.InvariantCulture); var srcUnit = match.Groups["unit"].Value.ToLower(); if (this.config.UnitMap.ContainsKey(srcUnit)) { var unitStr = this.config.UnitMap[srcUnit]; if (double.Parse(numStr) > 1000 && (unitStr.Equals(Constants.TimexYear) || unitStr.Equals(Constants.TimexMonthFull) || unitStr.Equals(Constants.TimexWeek))) { return(ret); } ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = double.Parse(numStr, CultureInfo.InvariantCulture) * this.config.UnitValueMap[srcUnit]; ret.Success = true; } else if (match.Groups[Constants.BusinessDayGroupName].Success) { ret.Timex = TimexUtility.GenerateDurationTimex(numVal, Constants.TimexBusinessDay, false); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit.Split()[1]]; ret.Success = true; } } return(ret); }
private DateTimeResolutionResult ParseAnUnit(string text) { var ret = new DateTimeResolutionResult(); var suffixStr = text; var match = this.config.AnUnitRegex.Match(text); if (!match.Success) { match = this.config.HalfDateUnitRegex.Match(text); } if (match.Success) { var numVal = match.Groups["half"].Success ? 0.5 : 1; numVal += ParseNumberWithUnitAndSuffix(suffixStr); var numStr = numVal.ToString(CultureInfo.InvariantCulture); var srcUnit = match.Groups["unit"].Value.ToLower(); if (this.config.UnitMap.ContainsKey(srcUnit)) { var unitStr = this.config.UnitMap[srcUnit]; ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = double.Parse(numStr) * this.config.UnitValueMap[srcUnit]; ret.Success = true; } else if (match.Groups[Constants.BusinessDayGroupName].Success) { ret.Timex = TimexUtility.GenerateDurationTimex(numVal, Constants.TimexBusinessDay, false); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit.Split()[1]]; ret.Success = true; } } return(ret); }
// Handle cases like "Monday 7-9", where "7-9" can't be extracted by the TimePeriodExtractor private DateTimeResolutionResult ParsePureNumberCases(string text, DateObject referenceTime) { var ret = new DateTimeResolutionResult(); var trimmedText = text.Trim().ToLower(); var match = this.Config.PureNumberFromToRegex.Match(trimmedText); if (!match.Success) { match = this.Config.PureNumberBetweenAndRegex.Match(trimmedText); } if (match.Success && (match.Index == 0 || match.Index + match.Length == trimmedText.Length)) { int beginHour, endHour; ret.Comment = ParseTimePeriod(match, out beginHour, out endHour); var dateStr = string.Empty; // Parse following date var dateExtractResult = this.Config.DateExtractor.Extract(trimmedText.Replace(match.Value, ""), referenceTime); DateObject futureDate, pastDate; if (dateExtractResult.Count > 0) { var pr = this.Config.DateParser.Parse(dateExtractResult[0], referenceTime); if (pr.Value != null) { futureDate = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; pastDate = (DateObject)((DateTimeResolutionResult)pr.Value).PastValue; dateStr = pr.TimexStr; if (((DateTimeResolutionResult)pr.Value).TimeZoneResolution != null) { ret.TimeZoneResolution = ((DateTimeResolutionResult)pr.Value).TimeZoneResolution; } } else { return(ret); } } else { return(ret); } var pastHours = endHour - beginHour; var beginTimex = TimexUtility.CombineDateAndTimeTimex(dateStr, DateTimeFormatUtil.ShortTime(beginHour)); var endTimex = TimexUtility.CombineDateAndTimeTimex(dateStr, DateTimeFormatUtil.ShortTime(endHour)); var durationTimex = TimexUtility.GenerateDurationTimex(endHour - beginHour, Constants.TimexHour, isLessThanDay: true); ret.Timex = TimexUtility.GenerateDateTimePeriodTimex(beginTimex, endTimex, durationTimex); ret.FutureValue = new Tuple <DateObject, DateObject>( DateObject.MinValue.SafeCreateFromValue(futureDate.Year, futureDate.Month, futureDate.Day, beginHour, 0, 0), DateObject.MinValue.SafeCreateFromValue(futureDate.Year, futureDate.Month, futureDate.Day, endHour, 0, 0)); ret.PastValue = new Tuple <DateObject, DateObject>( DateObject.MinValue.SafeCreateFromValue(pastDate.Year, pastDate.Month, pastDate.Day, beginHour, 0, 0), DateObject.MinValue.SafeCreateFromValue(pastDate.Year, pastDate.Month, pastDate.Day, endHour, 0, 0)); ret.Success = true; } return(ret); }
public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) { var referenceTime = refDate; var dateTimeParseResult = ParseMergedDuration(er.Text, referenceTime); if (!dateTimeParseResult.Success) { dateTimeParseResult = DurationParsingUtil.ParseInexactNumberUnit(er.Text, this.config); } if (!dateTimeParseResult.Success) { var parseResult = this.config.InternalParser.Parse(er); var unitResult = parseResult.Value as UnitValue; if (unitResult == null) { return(null); } var unitStr = unitResult.Unit; var number = string.IsNullOrEmpty(unitResult.Number) ? 1 : double.Parse(unitResult.Number, CultureInfo.InvariantCulture); dateTimeParseResult.Timex = TimexUtility.GenerateDurationTimex(number, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); dateTimeParseResult.FutureValue = dateTimeParseResult.PastValue = number * this.config.UnitValueMap[unitStr]; dateTimeParseResult.Success = true; } if (dateTimeParseResult.Success) { dateTimeParseResult.FutureResolution = new Dictionary <string, string> { { TimeTypeConstants.DURATION, dateTimeParseResult.FutureValue.ToString() }, }; dateTimeParseResult.PastResolution = new Dictionary <string, string> { { TimeTypeConstants.DURATION, dateTimeParseResult.PastValue.ToString() }, }; } if (dateTimeParseResult.Success) { var moreOrLessMatch = config.MoreOrLessRegex.Match(er.Text); if (moreOrLessMatch.Success) { if (moreOrLessMatch.Groups["less"].Success) { dateTimeParseResult.Mod = Constants.LESS_THAN_MOD; } else if (moreOrLessMatch.Groups["more"].Success) { dateTimeParseResult.Mod = Constants.MORE_THAN_MOD; } } } var ret = new DateTimeParseResult { Text = er.Text, Start = er.Start, Length = er.Length, Type = er.Type, Data = er.Data, Value = dateTimeParseResult, TimexStr = dateTimeParseResult.Timex, ResolutionStr = string.Empty, }; return(ret); }
// Handle cases like "三天前" "Three days ago" private DateTimeResolutionResult ParserDurationWithAgoAndLater(string text, DateObject referenceDate) { var ret = new DateTimeResolutionResult(); var numStr = string.Empty; var unitStr = string.Empty; var durationRes = this.config.DurationExtractor.Extract(text, referenceDate); if (durationRes.Count > 0) { var match = this.config.UnitRegex.Match(text); if (match.Success) { var suffix = text.Substring((int)durationRes[0].Start + (int)durationRes[0].Length).Trim(); var srcUnit = match.Groups["unit"].Value; var numberStr = text.Substring((int)durationRes[0].Start, match.Index - (int)durationRes[0].Start).Trim(); var unitMatch = this.config.DurationRelativeDurationUnitRegex.Match(text); // set the inexact number "数" (few) to 3 for now var number = numberStr.Equals(unitMatch.Groups["few"].Value, StringComparison.Ordinal) ? 3 : ConvertCJKToNum(numberStr); if (!numberStr.Equals(unitMatch.Groups["few"].Value, StringComparison.Ordinal)) { if (suffix.Equals(unitMatch.Value, StringComparison.Ordinal)) { var pr = this.config.DurationParser.Parse(durationRes[0], referenceDate); var future = suffix.Equals(unitMatch.Groups["later"].Value, StringComparison.Ordinal); int swift = 0; if (pr != null) { var resultDateTime = DurationParsingUtil.ShiftDateTime(pr.TimexStr, referenceDate.AddDays(swift), future); ret.Timex = $"{DateTimeFormatUtil.LuisDate(resultDateTime)}"; ret.FutureValue = ret.PastValue = resultDateTime; ret.Success = true; return(ret); } } } if (this.config.UnitMap.ContainsKey(srcUnit)) { unitStr = this.config.UnitMap[srcUnit]; ret.Timex = TimexUtility.GenerateDurationTimex(number, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); DateObject date = Constants.InvalidDate; var beforeMatch = this.config.BeforeRegex.Match(suffix); if (beforeMatch.Success && suffix.StartsWith(beforeMatch.Value, StringComparison.Ordinal)) { date = DurationParsingUtil.ShiftDateTime(ret.Timex, referenceDate, future: false); } var afterMatch = this.config.AfterRegex.Match(suffix); if (afterMatch.Success && suffix.StartsWith(afterMatch.Value, StringComparison.Ordinal)) { date = DurationParsingUtil.ShiftDateTime(ret.Timex, referenceDate, future: true); } if (date != Constants.InvalidDate) { ret.Timex = $"{DateTimeFormatUtil.LuisDate(date)}"; ret.FutureValue = ret.PastValue = date; ret.Success = true; return(ret); } } } } return(ret); }