// Merge a Date entity and a Time entity, like "明天早上七点" public static List <Token> MergeDateAndTime(string text, DateObject referenceTime) { var ret = new List <Token>(); var ers = DatePointExtractor.Extract(text, referenceTime); if (ers.Count == 0) { return(ret); } ers.AddRange(TimePointExtractor.Extract(text, referenceTime)); if (ers.Count < 2) { return(ret); } ers = ers.OrderBy(o => o.Start).ToList(); var i = 0; while (i < ers.Count - 1) { var j = i + 1; while (j < ers.Count && ers[i].IsOverlap(ers[j])) { j++; } if (j >= ers.Count) { break; } if (ers[i].Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) && ers[j].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) { var middleBegin = ers[i].Start + ers[i].Length ?? 0; var middleEnd = ers[j].Start ?? 0; if (middleBegin > middleEnd) { break; } var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); if (string.IsNullOrEmpty(middleStr) || middleStr.Equals(",", StringComparison.Ordinal) || PrepositionRegex.IsMatch(middleStr)) { var begin = ers[i].Start ?? 0; var end = (ers[j].Start ?? 0) + (ers[j].Length ?? 0); ret.Add(new Token(begin, end)); } i = j + 1; continue; } i = j; } return(ret); }
public static List <Token> TimeEveryday(string text, DateObject referenceTime) { var ret = new List <Token>(); var ers = TimeExtractor.Extract(text, referenceTime); foreach (var er in ers) { var beforeStr = text.Substring(0, er.Start ?? 0); var match = EachDayRegex.Match(beforeStr); if (match.Success) { ret.Add(new Token(match.Index, match.Index + match.Length + (er.Length ?? 0))); } } return(ret); }
public List <ExtractResult> Extract(string text, DateObject referenceTime) { var ret = DateExtractor.Extract(text, referenceTime); // the order is important, since there is a problem in merging AddTo(ret, TimeExtractor.Extract(text, referenceTime)); AddTo(ret, DurationExtractor.Extract(text, referenceTime)); AddTo(ret, DatePeriodExtractor.Extract(text, referenceTime)); AddTo(ret, DateTimeExtractor.Extract(text, referenceTime)); AddTo(ret, TimePeriodExtractor.Extract(text, referenceTime)); AddTo(ret, DateTimePeriodExtractor.Extract(text, referenceTime)); AddTo(ret, SetExtractor.Extract(text, referenceTime)); AddTo(ret, HolidayExtractor.Extract(text, referenceTime)); CheckBlackList(ref ret, text); AddMod(ret, text); ret = ret.OrderBy(p => p.Start).ToList(); return(ret); }
private List <Token> MergeTwoTimePoints(string text, DateObject referenceTime) { var ret = new List <Token>(); var er1 = TimeWithDateExtractor.Extract(text, referenceTime); var er2 = SingleTimeExtractor.Extract(text, referenceTime); var timePoints = new List <ExtractResult>(); // handle the overlap problem var j = 0; for (var i = 0; i < er1.Count; i++) { timePoints.Add(er1[i]); while (j < er2.Count && er2[j].Start + er2[j].Length <= er1[i].Start) { timePoints.Add(er2[j]); j++; } while (j < er2.Count && er2[j].IsOverlap(er1[i])) { j++; } } for (; j < er2.Count; j++) { timePoints.Add(er2[j]); } timePoints = timePoints.OrderBy(o => o.Start).ToList(); // merge "{TimePoint} to {TimePoint}", "between {TimePoint} and {TimePoint}" var idx = 0; while (idx < timePoints.Count - 1) { // if both ends are Time. then this is a TimePeriod, not a DateTimePeriod if (timePoints[idx].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal) && timePoints[idx + 1].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) { idx++; continue; } var middleBegin = timePoints[idx].Start + timePoints[idx].Length ?? 0; var middleEnd = timePoints[idx + 1].Start ?? 0; var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); // handle "{TimePoint} to {TimePoint}" if (TillRegex.IsExactMatch(middleStr, trim: true)) { var periodBegin = timePoints[idx].Start ?? 0; var periodEnd = (timePoints[idx + 1].Start ?? 0) + (timePoints[idx + 1].Length ?? 0); // handle "from" var beforeStr = text.Substring(0, periodBegin); if (beforeStr.Trim().EndsWith("从")) { periodBegin = beforeStr.LastIndexOf("从", StringComparison.Ordinal); } ret.Add(new Token(periodBegin, periodEnd)); idx += 2; continue; } // handle "between {TimePoint} and {TimePoint}" if (middleStr.Equals("和") || middleStr.Equals("与") || middleStr.Equals("到")) { var periodBegin = timePoints[idx].Start ?? 0; var periodEnd = (timePoints[idx + 1].Start ?? 0) + (timePoints[idx + 1].Length ?? 0); // handle "between" var afterStr = text.Substring(periodEnd); var match = ZhijianRegex.Match(afterStr); if (match.Success) { ret.Add(new Token(periodBegin, periodEnd + match.Length)); idx += 2; continue; } } idx++; } return(ret); }