Esempio n. 1
0
        public static List <Token> ExtractorDurationWithBeforeAndAfter(string text, ExtractResult er, List <Token> ret,
                                                                       IDateTimeUtilityConfiguration utilityConfiguration)
        {
            var pos = (int)er.Start + (int)er.Length;

            if (pos <= text.Length)
            {
                var afterString    = text.Substring(pos);
                var beforeString   = text.Substring(0, (int)er.Start);
                var index          = -1;
                var isTimeDuration = utilityConfiguration.TimeUnitRegex.Match(er.Text).Success;

                if (MatchingUtil.GetAgoLaterIndex(afterString, utilityConfiguration.AgoRegex, out index))
                {
                    // We don't support cases like "5 minutes from today" for now
                    // Cases like "5 minutes ago" or "5 minutes from now" are supported
                    // Cases like "2 days before today" or "2 weeks from today" are also supported
                    var isDayMatchInAfterString = utilityConfiguration.AgoRegex.Match(afterString).Groups["day"].Success;

                    if (!(isTimeDuration && isDayMatchInAfterString))
                    {
                        ret.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + index));
                    }
                }
                else if (MatchingUtil.GetAgoLaterIndex(afterString, utilityConfiguration.LaterRegex, out index))
                {
                    var isDayMatchInAfterString = utilityConfiguration.LaterRegex.Match(afterString).Groups["day"].Success;

                    if (!(isTimeDuration && isDayMatchInAfterString))
                    {
                        ret.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + index));
                    }
                }
                else if (MatchingUtil.GetTermIndex(beforeString, utilityConfiguration.InConnectorRegex, out index))
                {
                    // For range unit like "week, month, year", it should output dateRange or datetimeRange
                    if (!utilityConfiguration.RangeUnitRegex.IsMatch(er.Text))
                    {
                        if (er.Start != null && er.Length != null && (int)er.Start >= index)
                        {
                            ret.Add(new Token((int)er.Start - index, (int)er.Start + (int)er.Length));
                        }
                    }
                }
                else if (MatchingUtil.GetTermIndex(beforeString, utilityConfiguration.WithinNextPrefixRegex, out index))
                {
                    // For range unit like "week, month, year, day, second, minute, hour", it should output dateRange or datetimeRange
                    if (!utilityConfiguration.DateUnitRegex.IsMatch(er.Text) && !utilityConfiguration.TimeUnitRegex.IsMatch(er.Text))
                    {
                        if (er.Start != null && er.Length != null && (int)er.Start >= index)
                        {
                            ret.Add(new Token((int)er.Start - index, (int)er.Start + (int)er.Length));
                        }
                    }
                }
            }

            return(ret);
        }
Esempio n. 2
0
        public static List <Token> ExtractorDurationWithBeforeAndAfter(string text, ExtractResult er, List <Token> ret,
                                                                       IDateTimeUtilityConfiguration utilityConfiguration)
        {
            var pos = (int)er.Start + (int)er.Length;

            if (pos <= text.Length)
            {
                var afterString  = text.Substring(pos);
                var beforeString = text.Substring(0, (int)er.Start);
                var index        = -1;

                if (MatchingUtil.GetTermIndex(beforeString, utilityConfiguration.InConnectorRegex, out index))
                {
                    // For range unit like "week, month, year", it should output dateRange or datetimeRange
                    if (!utilityConfiguration.RangeUnitRegex.IsMatch(er.Text))
                    {
                        if (er.Start != null && er.Length != null && (int)er.Start >= index)
                        {
                            ret.Add(new Token((int)er.Start - index, (int)er.Start + (int)er.Length));
                        }
                    }
                }
                else if (MatchingUtil.GetTermIndex(beforeString, utilityConfiguration.WithinNextPrefixRegex, out index))
                {
                    // For range unit like "week, month, year, day, second, minute, hour", it should output dateRange or datetimeRange
                    if (!utilityConfiguration.DateUnitRegex.IsMatch(er.Text) && !utilityConfiguration.TimeUnitRegex.IsMatch(er.Text))
                    {
                        if (er.Start != null && er.Length != null && (int)er.Start >= index)
                        {
                            ret.Add(new Token((int)er.Start - index, (int)er.Start + (int)er.Length));
                        }
                    }
                }
                else if (MatchingUtil.GetAgoLaterIndex(afterString, utilityConfiguration.AgoRegex, out index))
                {
                    ret.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + index));
                }
                else if (MatchingUtil.GetAgoLaterIndex(afterString, utilityConfiguration.LaterRegex, out index))
                {
                    ret.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + index));
                }
            }

            return(ret);
        }
Esempio n. 3
0
        public static List <Token> ExtractorDurationWithBeforeAndAfter(string text, ExtractResult er, List <Token> ret,
                                                                       IDateTimeUtilityConfiguration utilityConfiguration)
        {
            var pos = (int)er.Start + (int)er.Length;

            if (pos <= text.Length)
            {
                var  afterString    = text.Substring(pos);
                var  beforeString   = text.Substring(0, (int)er.Start);
                var  isTimeDuration = utilityConfiguration.TimeUnitRegex.Match(er.Text).Success;
                int  index;
                bool isMatch         = false;
                var  agoLaterRegexes = new List <Regex>
                {
                    utilityConfiguration.AgoRegex,
                    utilityConfiguration.LaterRegex,
                };

                foreach (var regex in agoLaterRegexes)
                {
                    Token tokAfter = null, tokBefore = null;
                    bool  isDayMatch = false;

                    // Check afterString
                    if (MatchingUtil.GetAgoLaterIndex(afterString, regex, out index, inSuffix: true))
                    {
                        // We don't support cases like "5 minutes from today" for now
                        // Cases like "5 minutes ago" or "5 minutes from now" are supported
                        // Cases like "2 days before today" or "2 weeks from today" are also supported
                        isDayMatch = regex.Match(afterString).Groups["day"].Success;

                        if (!(isTimeDuration && isDayMatch))
                        {
                            tokAfter = new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + index);
                            isMatch  = true;
                        }
                    }

                    if (utilityConfiguration.CheckBothBeforeAfter)
                    {
                        // Check if regex match is split between beforeString and afterString
                        if (!isDayMatch && isMatch)
                        {
                            string beforeAfterStr = beforeString + afterString.Substring(0, index);
                            var    isRangeMatch   = utilityConfiguration.RangePrefixRegex.MatchBegin(afterString.Substring(index), trim: true).Success;
                            if (!isRangeMatch && MatchingUtil.GetAgoLaterIndex(beforeAfterStr, regex, out var indexStart, inSuffix: false))
                            {
                                isDayMatch = regex.Match(beforeAfterStr).Groups["day"].Success;

                                if (isDayMatch && !(isTimeDuration && isDayMatch))
                                {
                                    ret.Add(new Token(indexStart, (er.Start + er.Length ?? 0) + index));
                                    isMatch = true;
                                }
                            }
                        }

                        // Check also beforeString
                        if (MatchingUtil.GetAgoLaterIndex(beforeString, regex, out index, inSuffix: false))
                        {
                            isDayMatch = regex.Match(beforeString).Groups["day"].Success;
                            if (!(isTimeDuration && isDayMatch))
                            {
                                tokBefore = new Token(index, er.Start + er.Length ?? 0);
                                isMatch   = true;
                            }
                        }
                    }

                    if (tokAfter != null && tokBefore != null && tokBefore.Start + tokBefore.Length > tokAfter.Start)
                    {
                        // Merge overlapping tokens
                        ret.Add(new Token(tokBefore.Start, tokAfter.Start + tokAfter.Length - tokBefore.Start));
                    }
                    else if (tokAfter != null)
                    {
                        ret.Add(tokAfter);
                    }
                    else if (tokBefore != null)
                    {
                        ret.Add(tokBefore);
                    }

                    if (isMatch)
                    {
                        break;
                    }
                }

                if (!isMatch)
                {
                    var inWithinRegexes = new List <(Regex, List <Regex>)>
                    {
                        (utilityConfiguration.InConnectorRegex, new List <Regex> {
                            utilityConfiguration.RangeUnitRegex
                        }),
                        (utilityConfiguration.WithinNextPrefixRegex, new List <Regex> {
                            utilityConfiguration.DateUnitRegex, utilityConfiguration.TimeUnitRegex
                        }),
                    };

                    foreach (var regex in inWithinRegexes)
                    {
                        bool isMatchAfter = false;
                        if (MatchingUtil.GetTermIndex(beforeString, regex.Item1, out index))
                        {
                            isMatch = true;
                        }
                        else if (utilityConfiguration.CheckBothBeforeAfter && MatchingUtil.GetAgoLaterIndex(afterString, regex.Item1, out index, inSuffix: true))
                        {
                            // Check also afterString
                            isMatch = isMatchAfter = true;
                        }

                        if (isMatch)
                        {
                            // For InConnectorRegex and range unit like "week, month, year", it should output dateRange or datetimeRange
                            // For WithinNextPrefixRegex and range unit like "week, month, year, day, second, minute, hour", it should output dateRange or datetimeRange
                            bool isUnitMatch = false;
                            foreach (var unitRegex in regex.Item2)
                            {
                                isUnitMatch = isUnitMatch || unitRegex.IsMatch(er.Text);
                            }

                            if (!isUnitMatch)
                            {
                                if (er.Start != null && er.Length != null && ((int)er.Start >= index || isMatchAfter))
                                {
                                    int start = (int)er.Start - (!isMatchAfter ? index : 0);
                                    int end   = (int)er.Start + (int)er.Length + (isMatchAfter ? index : 0);
                                    ret.Add(new Token(start, end));
                                }
                            }

                            break;
                        }
                    }
                }
            }

            return(ret);
        }
Esempio n. 4
0
        public static List <Token> ExtractorDurationWithBeforeAndAfter(string text, ExtractResult er, List <Token> ret,
                                                                       IDateTimeUtilityConfiguration utilityConfiguration)
        {
            var pos = (int)er.Start + (int)er.Length;

            if (pos <= text.Length)
            {
                var afterString    = text.Substring(pos);
                var beforeString   = text.Substring(0, (int)er.Start);
                var isTimeDuration = utilityConfiguration.TimeUnitRegex.Match(er.Text).Success;

                if (MatchingUtil.GetAgoLaterIndex(afterString, utilityConfiguration.AgoRegex, out var index))
                {
                    // We don't support cases like "5 minutes from today" for now
                    // Cases like "5 minutes ago" or "5 minutes from now" are supported
                    // Cases like "2 days before today" or "2 weeks from today" are also supported
                    var isDayMatchInAfterString = utilityConfiguration.AgoRegex.Match(afterString).Groups["day"].Success;

                    if (!(isTimeDuration && isDayMatchInAfterString))
                    {
                        ret.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + index));
                    }

                    if (utilityConfiguration.CheckBothBeforeAfter && !isDayMatchInAfterString)
                    {
                        // check if regex match is split between beforeString and afterString
                        string beforeAfterStr = beforeString + afterString.Substring(0, index);
                        if (MatchingUtil.GetAgoLaterIndexInBeforeString(beforeAfterStr, utilityConfiguration.AgoRegex, out var indexStart))
                        {
                            isDayMatchInAfterString = utilityConfiguration.AgoRegex.Match(beforeAfterStr).Groups["day"].Success;

                            if (isDayMatchInAfterString && !(isTimeDuration && isDayMatchInAfterString))
                            {
                                ret.Add(new Token(indexStart, (er.Start + er.Length ?? 0) + index));
                            }
                        }
                    }
                }
                else if (utilityConfiguration.CheckBothBeforeAfter && MatchingUtil.GetAgoLaterIndexInBeforeString(beforeString, utilityConfiguration.AgoRegex, out index))
                {
                    // Check also beforeString
                    var isDayMatchInBeforeString = utilityConfiguration.AgoRegex.Match(beforeString).Groups["day"].Success;
                    if (!(isTimeDuration && isDayMatchInBeforeString))
                    {
                        ret.Add(new Token(index, (er.Start + er.Length ?? 0) + index));
                    }
                }
                else if (MatchingUtil.GetAgoLaterIndex(afterString, utilityConfiguration.LaterRegex, out index) || (utilityConfiguration.CheckBothBeforeAfter &&
                                                                                                                    MatchingUtil.GetAgoLaterIndexInBeforeString(beforeString, utilityConfiguration.LaterRegex, out index)))
                {
                    Token tokAfter = null, tokBefore = null;
                    if (MatchingUtil.GetAgoLaterIndex(afterString, utilityConfiguration.LaterRegex, out index))
                    {
                        var isDayMatchInAfterString = utilityConfiguration.LaterRegex.Match(afterString).Groups["day"].Success;

                        if (!(isTimeDuration && isDayMatchInAfterString))
                        {
                            tokAfter = new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + index);
                        }
                    }

                    // Check also beforeString
                    if (utilityConfiguration.CheckBothBeforeAfter && MatchingUtil.GetAgoLaterIndexInBeforeString(beforeString, utilityConfiguration.LaterRegex, out index))
                    {
                        var isDayMatchInBeforeString = utilityConfiguration.LaterRegex.Match(beforeString).Groups["day"].Success;
                        if (!(isTimeDuration && isDayMatchInBeforeString))
                        {
                            tokBefore = new Token(index, er.Start + er.Length ?? 0);
                        }
                    }

                    if (tokAfter != null && tokBefore != null && tokBefore.Start + tokBefore.Length > tokAfter.Start)
                    {
                        // merge overlapping tokens
                        ret.Add(new Token(tokBefore.Start, tokAfter.Start + tokAfter.Length - tokBefore.Start));
                    }
                    else if (tokAfter != null)
                    {
                        ret.Add(tokAfter);
                    }
                    else if (tokBefore != null)
                    {
                        ret.Add(tokBefore);
                    }
                }
                else if (MatchingUtil.GetTermIndex(beforeString, utilityConfiguration.InConnectorRegex, out index))
                {
                    // For range unit like "week, month, year", it should output dateRange or datetimeRange
                    if (!utilityConfiguration.RangeUnitRegex.IsMatch(er.Text))
                    {
                        if (er.Start != null && er.Length != null && (int)er.Start >= index)
                        {
                            ret.Add(new Token((int)er.Start - index, (int)er.Start + (int)er.Length));
                        }
                    }
                }
                else if (utilityConfiguration.CheckBothBeforeAfter && MatchingUtil.GetAgoLaterIndex(afterString, utilityConfiguration.InConnectorRegex, out index))
                {
                    // Check also afterString
                    // For range unit like "week, month, year", it should output dateRange or datetimeRange
                    if (!utilityConfiguration.RangeUnitRegex.IsMatch(er.Text))
                    {
                        if (er.Start != null && er.Length != null)
                        {
                            ret.Add(new Token((int)er.Start, (int)er.Start + (int)er.Length + index));
                        }
                    }
                }
                else if (MatchingUtil.GetTermIndex(beforeString, utilityConfiguration.WithinNextPrefixRegex, out index))
                {
                    // For range unit like "week, month, year, day, second, minute, hour", it should output dateRange or datetimeRange
                    if (!utilityConfiguration.DateUnitRegex.IsMatch(er.Text) && !utilityConfiguration.TimeUnitRegex.IsMatch(er.Text))
                    {
                        if (er.Start != null && er.Length != null && (int)er.Start >= index)
                        {
                            ret.Add(new Token((int)er.Start - index, (int)er.Start + (int)er.Length));
                        }
                    }
                }
                else if (utilityConfiguration.CheckBothBeforeAfter && MatchingUtil.GetAgoLaterIndex(afterString, utilityConfiguration.WithinNextPrefixRegex, out index))
                {
                    // Check also afterString
                    // For range unit like "week, month, year, day, second, minute, hour", it should output dateRange or datetimeRange
                    if (!utilityConfiguration.DateUnitRegex.IsMatch(er.Text) && !utilityConfiguration.TimeUnitRegex.IsMatch(er.Text))
                    {
                        if (er.Start != null && er.Length != null)
                        {
                            ret.Add(new Token((int)er.Start, (int)er.Start + (int)er.Length + index));
                        }
                    }
                }
            }

            return(ret);
        }