Пример #1
0
        // Cases like "3 days from today", "5 weeks before yesterday", "2 months after tomorrow"
        // Note that these cases are of type "date"
        private List <Token> ExtractRelativeDurationDate(string text, List <Token> tokens, DateObject reference)
        {
            var ret        = new List <Token>();
            var tempTokens = new List <Token>(tokens);
            var durationEr = Config.DurationExtractor.Extract(text, reference);

            foreach (var er in durationEr)
            {
                // if it is a multiple duration but its type is not equal to Date, skip it here
                if (IsMultipleDuration(er) && !IsMultipleDurationDate(er))
                {
                    continue;
                }

                // Some types of duration can be compounded with "before", "after" or "from" suffix to create a "date"
                // While some other types of durations, when compounded with such suffix, it will not create a "date", but create a "dateperiod"
                // For example, durations like "3 days", "2 weeks", "1 week and 2 days", can be compounded with such suffix to create a "date"
                // But "more than 3 days", "less than 2 weeks", when compounded with such suffix, it will become cases like "more than 3 days from today" which is a "dateperiod", not a "date"
                // As this parent method is aimed to extract RelativeDurationDate, so for cases with "more than" or "less than", we remove the prefix so as to extract the expected RelativeDurationDate
                if (IsInequalityDuration(er))
                {
                    StripInequalityDuration(er);
                }

                var match = Config.DateUnitRegex.Match(er.Text);

                if (match.Success)
                {
                    ret.AddRange(AgoLaterUtil.ExtractorDurationWithBeforeAndAfter(text, er, ret, Config.UtilityConfiguration));

                    // Take into account also holiday dates
                    if (ret.Count < 1)
                    {
                        var holidayEr = Config.HolidayExtractor.Extract(text, reference);
                        foreach (var holiday in holidayEr)
                        {
                            tempTokens.Add(new Token((int)holiday.Start, (int)(holiday.Start + holiday.Length)));
                        }
                    }

                    // Check for combined patterns Duration + Date, e.g. '3 days before Monday', '4 weeks after January 15th'
                    if (ret.Count < 1 && tempTokens.Count > 0 && er.Text != match.Value)
                    {
                        var afterStr  = text.Substring((int)er.Start + (int)er.Length);
                        var connector = Config.BeforeAfterRegex.MatchBegin(afterStr, trim: true);
                        if (connector.Success)
                        {
                            foreach (var token in tempTokens)
                            {
                                var start  = (int)er.Start + (int)er.Length + connector.Index + connector.Length;
                                var length = token.Start - start;
                                if (length > 0 && start + length < text.Length && string.IsNullOrWhiteSpace(text.Substring(start, length)))
                                {
                                    Token tok = new Token((int)er.Start, token.End);
                                    ret.Add(tok);
                                }
                            }
                        }
                    }
                }
            }

            // Extract cases like "in 3 weeks", which equals to "3 weeks from today"
            var relativeDurationDateWithInPrefix = ExtractRelativeDurationDateWithInPrefix(text, durationEr, reference);

            // For cases like "in 3 weeks from today", we should choose "3 weeks from today" as the extract result rather than "in 3 weeks" or "in 3 weeks from today"
            foreach (var extractResultWithInPrefix in relativeDurationDateWithInPrefix)
            {
                if (!IsOverlapWithExistExtractions(extractResultWithInPrefix, ret))
                {
                    ret.Add(extractResultWithInPrefix);
                }
            }

            return(ret);
        }
Пример #2
0
        // Used in ExtractRelativeDurationDateWithInPrefix to extract the connector "in" in cases like "In 3 days/weeks/months/years"
        private List <Token> ExtractInConnector(string text, string firstStr, string secondStr, Token duration, out bool success, bool inPrefix)
        {
            List <Token> ret = new List <Token>();

            var match = inPrefix ? Config.InConnectorRegex.MatchEnd(firstStr, trim: true) : Config.InConnectorRegex.MatchBegin(firstStr, trim: true);

            success = match.Success;

            if (match.Success)
            {
                var rangeUnitMatch = Config.RangeUnitRegex.Match(text.Substring(duration.Start, duration.Length));

                if (rangeUnitMatch.Success)
                {
                    var sinceYearMatch = Config.SinceYearSuffixRegex.Match(secondStr);

                    if (sinceYearMatch.Success)
                    {
                        var start = inPrefix ? match.Index : sinceYearMatch.Index;
                        var end   = inPrefix ? duration.End + sinceYearMatch.Length : duration.End + match.Index + match.Length;
                        ret.Add(new Token(start, end));
                    }
                    else
                    {
                        var start = inPrefix ? match.Index : duration.Start;
                        var end   = inPrefix ? duration.End : duration.End + match.Index + match.Length;
                        ret.Add(new Token(start, end));
                    }
                }
            }

            return(ret);
        }
Пример #3
0
        // Cases like "today after 2:00pm", "1/1/2015 before 2:00 in the afternoon"
        private IEnumerable <Token> MergeDateWithTimePeriodSuffix(string text, List <ExtractResult> dateErs, List <ExtractResult> timeErs)
        {
            var ret = new List <Token>();

            if (!dateErs.Any())
            {
                return(ret);
            }

            if (!timeErs.Any())
            {
                return(ret);
            }

            var ers = dateErs;

            ers.AddRange(timeErs);

            ers = ers.OrderBy(o => o.Start).ToList();

            var i = 0;

            while (i < ers.Count - 1)
            {
                var j = i + 1;
                while (j < ers.Count && ers[i].IsOverlap(ers[j]))
                {
                    j++;
                }

                if (j >= ers.Count)
                {
                    break;
                }

                if (ers[i].Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) &&
                    ers[j].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal))
                {
                    var middleBegin = ers[i].Start + ers[i].Length ?? 0;
                    var middleEnd   = ers[j].Start ?? 0;
                    if (middleBegin > middleEnd)
                    {
                        i = j + 1;
                        continue;
                    }

                    var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim();

                    if (IsValidConnectorForDateAndTimePeriod(middleStr))
                    {
                        var begin = ers[i].Start ?? 0;
                        var end   = (ers[j].Start ?? 0) + (ers[j].Length ?? 0);
                        ret.Add(new Token(begin, end));
                    }

                    i = j + 1;
                    continue;
                }

                i = j;
            }

            // Handle "in the afternoon" at the end of entity
            for (var idx = 0; idx < ret.Count; idx++)
            {
                var afterStr = text.Substring(ret[idx].End);
                var match    = this.config.SuffixRegex.Match(afterStr);
                if (match.Success)
                {
                    ret[idx] = new Token(ret[idx].Start, ret[idx].End + match.Length);
                }
            }

            return(ret);
        }