private IEnumerable <Token> MatchLocationTimes(string text) { var ret = new List <Token>(); if (config.LocationTimeSuffixRegex == null) { return(ret); } var timeMatch = config.LocationTimeSuffixRegex.Matches(text); if (timeMatch.Count != 0) { var lastMatchIndex = timeMatch[timeMatch.Count - 1].Index; var matches = config.LocationMatcher.Find(text.Substring(0, lastMatchIndex).ToLowerInvariant()); var locationMatches = MatchingUtil.RemoveSubMatches(matches); var i = 0; foreach (Match match in timeMatch) { var hasCityBefore = false; while (i < locationMatches.Count && locationMatches[i].End <= match.Index) { hasCityBefore = true; i++; if (i == locationMatches.Count) { break; } } if (hasCityBefore && locationMatches[i - 1].End == match.Index) { ret.Add(new Token(locationMatches[i - 1].Start, match.Index + match.Length)); } if (i == locationMatches.Count) { break; } } } return(ret); }
private IEnumerable <Token> MatchLocationTimes(string text, List <Token> tokens, string originalText, bool reIndex) { var ret = new List <Token>(); if (config.LocationTimeSuffixRegex == null) { return(ret); } var timeMatch = config.LocationTimeSuffixRegex.Matches(text); // Before calling a Find() in location matcher, check if all the matched suffixes by // LocationTimeSuffixRegex are already inside tokens extracted by TimeZone matcher. // If so, don't call the Find() as they have been extracted by TimeZone matcher, otherwise, call it. bool isAllSuffixInsideTokens = true; foreach (Match match in timeMatch) { bool isInside = false; foreach (Token token in tokens) { if (token.Start <= match.Index && token.End >= match.Index + match.Length) { isInside = true; break; } } if (!isInside) { isAllSuffixInsideTokens = false; } if (!isAllSuffixInsideTokens) { break; } } if (timeMatch.Count != 0 && !isAllSuffixInsideTokens) { var lastMatchIndex = timeMatch[timeMatch.Count - 1].Index; var matches = config.LocationMatcher.Find(text.Substring(0, lastMatchIndex)); var locationMatches = MatchingUtil.RemoveSubMatches(matches); if (reIndex) { foreach (var locMatch in locationMatches) { locMatch.Start = originalText.IndexOf(locMatch.CanonicalValues.FirstOrDefault(), locMatch.Start, StringComparison.Ordinal); } } var i = 0; foreach (Match match in timeMatch) { var hasCityBefore = false; var index = match.Index; if (reIndex) { index = originalText.IndexOf(match.Value, match.Index, StringComparison.Ordinal); } while (i < locationMatches.Count && locationMatches[i].End <= index) { hasCityBefore = true; i++; if (i == locationMatches.Count) { break; } } if (hasCityBefore && locationMatches[i - 1].End == index) { ret.Add(new Token(locationMatches[i - 1].Start, index + match.Length)); } if (i == locationMatches.Count) { break; } } } return(ret); }