// Merge a Date entity and a Time entity, like "明天早上七点" public static List <Token> MergeDateAndTime(string text, DateObject referenceTime) { var ret = new List <Token>(); var ers = DatePointExtractor.Extract(text, referenceTime); if (ers.Count == 0) { return(ret); } ers.AddRange(TimePointExtractor.Extract(text, referenceTime)); if (ers.Count < 2) { return(ret); } ers = ers.OrderBy(o => o.Start).ToList(); var i = 0; while (i < ers.Count - 1) { var j = i + 1; while (j < ers.Count && ers[i].IsOverlap(ers[j])) { j++; } if (j >= ers.Count) { break; } if (ers[i].Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) && ers[j].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) { var middleBegin = ers[i].Start + ers[i].Length ?? 0; var middleEnd = ers[j].Start ?? 0; if (middleBegin > middleEnd) { break; } var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); if (string.IsNullOrEmpty(middleStr) || middleStr.Equals(",", StringComparison.Ordinal) || PrepositionRegex.IsMatch(middleStr)) { var begin = ers[i].Start ?? 0; var end = (ers[j].Start ?? 0) + (ers[j].Length ?? 0); ret.Add(new Token(begin, end)); } i = j + 1; continue; } i = j; } return(ret); }
// merge two date private static List <Token> MergeTwoTimePoints(string text, DateObject referenceTime) { var ret = new List <Token>(); var er = DatePointExtractor.Extract(text, referenceTime); if (er.Count <= 1) { return(ret); } // merge '{TimePoint} 到 {TimePoint}' var idx = 0; while (idx < er.Count - 1) { var middleBegin = er[idx].Start + er[idx].Length ?? 0; var middleEnd = er[idx + 1].Start ?? 0; if (middleBegin >= middleEnd) { idx++; continue; } var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); if (TillRegex.IsExactMatch(middleStr, trim: true)) { var periodBegin = er[idx].Start ?? 0; var periodEnd = (er[idx + 1].Start ?? 0) + (er[idx + 1].Length ?? 0); // handle "从" var beforeStr = text.Substring(0, periodBegin); if (beforeStr.Trim().EndsWith("从")) { periodBegin = beforeStr.LastIndexOf("从", StringComparison.Ordinal); } ret.Add(new Token(periodBegin, periodEnd)); idx += 2; continue; } idx++; } return(ret); }
public static List <Token> MatchEachDate(string text, DateObject referenceTime) { var ret = new List <Token>(); var ers = DateExtractor.Extract(text, referenceTime); foreach (var er in ers) { var beforeStr = text.Substring(0, er.Start ?? 0); var match = EachPrefixRegex.Match(beforeStr); if (match.Success) { ret.Add(new Token(match.Index, match.Index + match.Length + (er.Length ?? 0))); } } return(ret); }
public List <ExtractResult> Extract(string text, DateObject referenceTime) { var ret = DateExtractor.Extract(text, referenceTime); // the order is important, since there is a problem in merging AddTo(ret, TimeExtractor.Extract(text, referenceTime)); AddTo(ret, DurationExtractor.Extract(text, referenceTime)); AddTo(ret, DatePeriodExtractor.Extract(text, referenceTime)); AddTo(ret, DateTimeExtractor.Extract(text, referenceTime)); AddTo(ret, TimePeriodExtractor.Extract(text, referenceTime)); AddTo(ret, DateTimePeriodExtractor.Extract(text, referenceTime)); AddTo(ret, SetExtractor.Extract(text, referenceTime)); AddTo(ret, HolidayExtractor.Extract(text, referenceTime)); CheckBlackList(ref ret, text); AddMod(ret, text); ret = ret.OrderBy(p => p.Start).ToList(); return(ret); }
// merge Date and Time period private List <Token> MergeDateAndTimePeriod(string text, DateObject referenceTime) { var ret = new List <Token>(); var er1 = SingleDateExtractor.Extract(text, referenceTime); var er2 = TimePeriodExtractor.Extract(text, referenceTime); var timePoints = new List <ExtractResult>(); // handle the overlap problem var j = 0; for (var i = 0; i < er1.Count; i++) { timePoints.Add(er1[i]); while (j < er2.Count && er2[j].Start + er2[j].Length <= er1[i].Start) { timePoints.Add(er2[j]); j++; } while (j < er2.Count && er2[j].IsOverlap(er1[i])) { j++; } } for (; j < er2.Count; j++) { timePoints.Add(er2[j]); } timePoints = timePoints.OrderBy(o => o.Start).ToList(); // merge {Date} {TimePeriod} var idx = 0; while (idx < timePoints.Count - 1) { if (timePoints[idx].Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) && timePoints[idx + 1].Type.Equals(Constants.SYS_DATETIME_TIMEPERIOD, StringComparison.Ordinal)) { var middleBegin = timePoints[idx].Start + timePoints[idx].Length ?? 0; var middleEnd = timePoints[idx + 1].Start ?? 0; var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); if (string.IsNullOrWhiteSpace(middleStr) || PrepositionRegex.IsMatch(middleStr)) { var periodBegin = timePoints[idx].Start ?? 0; var periodEnd = (timePoints[idx + 1].Start ?? 0) + (timePoints[idx + 1].Length ?? 0); ret.Add(new Token(periodBegin, periodEnd)); idx += 2; continue; } idx++; } idx++; } return(ret); }