private IEnumerable <Token> MatchLocationTimes(string text)
        {
            var ret = new List <Token>();

            if (config.LocationTimeSuffixRegex == null)
            {
                return(ret);
            }

            var timeMatch = config.LocationTimeSuffixRegex.Matches(text);

            if (timeMatch.Count != 0)
            {
                var lastMatchIndex  = timeMatch[timeMatch.Count - 1].Index;
                var matches         = config.LocationMatcher.Find(text.Substring(0, lastMatchIndex).ToLowerInvariant());
                var locationMatches = MatchingUtil.RemoveSubMatches(matches);

                var i = 0;
                foreach (Match match in timeMatch)
                {
                    var hasCityBefore = false;

                    while (i < locationMatches.Count && locationMatches[i].End <= match.Index)
                    {
                        hasCityBefore = true;
                        i++;

                        if (i == locationMatches.Count)
                        {
                            break;
                        }
                    }

                    if (hasCityBefore && locationMatches[i - 1].End == match.Index)
                    {
                        ret.Add(new Token(locationMatches[i - 1].Start, match.Index + match.Length));
                    }

                    if (i == locationMatches.Count)
                    {
                        break;
                    }
                }
            }

            return(ret);
        }
Ejemplo n.º 2
0
        private IEnumerable <Token> MatchLocationTimes(string text, List <Token> tokens, string originalText, bool reIndex)
        {
            var ret = new List <Token>();

            if (config.LocationTimeSuffixRegex == null)
            {
                return(ret);
            }

            var timeMatch = config.LocationTimeSuffixRegex.Matches(text);

            // Before calling a Find() in location matcher, check if all the matched suffixes by
            // LocationTimeSuffixRegex are already inside tokens extracted by TimeZone matcher.
            // If so, don't call the Find() as they have been extracted by TimeZone matcher, otherwise, call it.
            bool isAllSuffixInsideTokens = true;

            foreach (Match match in timeMatch)
            {
                bool isInside = false;
                foreach (Token token in tokens)
                {
                    if (token.Start <= match.Index && token.End >= match.Index + match.Length)
                    {
                        isInside = true;
                        break;
                    }
                }

                if (!isInside)
                {
                    isAllSuffixInsideTokens = false;
                }

                if (!isAllSuffixInsideTokens)
                {
                    break;
                }
            }

            if (timeMatch.Count != 0 && !isAllSuffixInsideTokens)
            {
                var lastMatchIndex = timeMatch[timeMatch.Count - 1].Index;

                var matches         = config.LocationMatcher.Find(text.Substring(0, lastMatchIndex));
                var locationMatches = MatchingUtil.RemoveSubMatches(matches);

                if (reIndex)
                {
                    foreach (var locMatch in locationMatches)
                    {
                        locMatch.Start = originalText.IndexOf(locMatch.CanonicalValues.FirstOrDefault(), locMatch.Start, StringComparison.Ordinal);
                    }
                }

                var i = 0;
                foreach (Match match in timeMatch)
                {
                    var hasCityBefore = false;

                    var index = match.Index;

                    if (reIndex)
                    {
                        index = originalText.IndexOf(match.Value, match.Index, StringComparison.Ordinal);
                    }

                    while (i < locationMatches.Count && locationMatches[i].End <= index)
                    {
                        hasCityBefore = true;
                        i++;

                        if (i == locationMatches.Count)
                        {
                            break;
                        }
                    }

                    if (hasCityBefore && locationMatches[i - 1].End == index)
                    {
                        ret.Add(new Token(locationMatches[i - 1].Start, index + match.Length));
                    }

                    if (i == locationMatches.Count)
                    {
                        break;
                    }
                }
            }

            return(ret);
        }