Exemplo n.º 1
0
        public List <ExtractResult> Extract(string text, DateObject reference)
        {
            var ret = new List <ExtractResult>();

            var originText = text;
            List <MatchResult <string> > superfluousWordMatches = null;

            if ((this.config.Options & DateTimeOptions.EnablePreview) != 0)
            {
                text = MatchingUtil.PreProcessTextRemoveSuperfluousWords(text, this.config.SuperfluousWordMatcher, out superfluousWordMatches);
            }

            // The order is important, since there can be conflicts in merging
            AddTo(ret, this.config.DateExtractor.Extract(text, reference), text);
            AddTo(ret, this.config.TimeExtractor.Extract(text, reference), text);
            AddTo(ret, this.config.DatePeriodExtractor.Extract(text, reference), text);
            AddTo(ret, this.config.DurationExtractor.Extract(text, reference), text);
            AddTo(ret, this.config.TimePeriodExtractor.Extract(text, reference), text);
            AddTo(ret, this.config.DateTimePeriodExtractor.Extract(text, reference), text);
            AddTo(ret, this.config.DateTimeExtractor.Extract(text, reference), text);
            AddTo(ret, this.config.SetExtractor.Extract(text, reference), text);
            AddTo(ret, this.config.HolidayExtractor.Extract(text, reference), text);

            if ((this.config.Options & DateTimeOptions.EnablePreview) != 0)
            {
                AddTo(ret, this.config.TimeZoneExtractor.Extract(text, reference), text);
                ret = this.config.TimeZoneExtractor.RemoveAmbiguousTimezone(ret);
            }

            // This should be at the end since if need the extractor to determine the previous text contains time or not
            AddTo(ret, NumberEndingRegexMatch(text, ret), text);

            // Modify time entity to an alternative DateTime expression if it follows a DateTime entity
            if ((this.config.Options & DateTimeOptions.ExtendedTypes) != 0)
            {
                ret = this.config.DateTimeAltExtractor.Extract(ret, text, reference);
            }

            ret = FilterUnspecificDatePeriod(ret);

            ret = FilterAmbiguity(ret, text);

            ret = AddMod(ret, text);

            // Filtering
            if ((this.config.Options & DateTimeOptions.CalendarMode) != 0)
            {
                ret = CheckCalendarFilterList(ret, text);
            }

            ret = ret.OrderBy(p => p.Start).ToList();

            if ((this.config.Options & DateTimeOptions.EnablePreview) != 0)
            {
                ret = MatchingUtil.PosProcessExtractionRecoverSuperfluousWords(ret, superfluousWordMatches, originText);
            }

            return(ret);
        }
        public DateTimeParseResult Parse(ExtractResult er, DateObject refTime)
        {
            var referenceTime      = refTime;
            DateTimeParseResult pr = null;

            var originText = er.Text;

            if ((this.Config.Options & DateTimeOptions.EnablePreview) != 0)
            {
                er.Text    = MatchingUtil.PreProcessTextRemoveSuperfluousWords(er.Text, Config.SuperfluousWordMatcher, out var _);
                er.Length += er.Text.Length - originText.Length;
            }

            bool hasBefore = false, hasAfter = false, hasSince = false, hasAround = false, hasEqual = false, hasDateAfter = false;

            // "InclusiveModifier" means MOD should include the start/end time
            // For example, cases like "on or later than", "earlier than or in" have inclusive modifier
            var hasInclusiveModifier = false;
            var matchIsAfter         = false;
            var modStr = string.Empty;

            // Analyze and process modifiers
            // Push, save the MOD string
            if (er.Metadata != null && er.Metadata.HasMod)
            {
                var beforeMatch = Config.BeforeRegex.MatchBegin(er.Text, trim: true);
                var afterMatch  = Config.AfterRegex.MatchBegin(er.Text, trim: true);
                var sinceMatch  = Config.SinceRegex.MatchBegin(er.Text, trim: true);
                var aroundMatch = Config.AroundRegex.MatchBegin(er.Text, trim: true);
                var equalMatch  = Config.EqualRegex.MatchBegin(er.Text, trim: true);

                // check also after match
                if (this.Config.CheckBothBeforeAfter && er.Data != null && er.Data.Equals(Constants.HAS_MOD))
                {
                    if (!beforeMatch.Success)
                    {
                        beforeMatch  = Config.BeforeRegex.MatchEnd(er.Text, trim: true);
                        matchIsAfter = matchIsAfter || beforeMatch.Success;
                    }

                    if (!afterMatch.Success)
                    {
                        afterMatch   = Config.AfterRegex.MatchEnd(er.Text, trim: true);
                        matchIsAfter = matchIsAfter || afterMatch.Success;
                    }

                    if (!sinceMatch.Success)
                    {
                        sinceMatch   = Config.SinceRegex.MatchEnd(er.Text, trim: true);
                        matchIsAfter = matchIsAfter || sinceMatch.Success;
                    }

                    if (!aroundMatch.Success)
                    {
                        aroundMatch  = Config.AroundRegex.MatchEnd(er.Text, trim: true);
                        matchIsAfter = matchIsAfter || aroundMatch.Success;
                    }

                    if (!equalMatch.Success)
                    {
                        equalMatch   = Config.EqualRegex.MatchEnd(er.Text, trim: true);
                        matchIsAfter = matchIsAfter || equalMatch.Success;
                    }
                }

                if (beforeMatch.Success)
                {
                    hasBefore  = true;
                    er.Start  += matchIsAfter ? 0 : beforeMatch.Length;
                    er.Length -= beforeMatch.Length;
                    er.Text    = matchIsAfter ? er.Text.Substring(0, (int)er.Length) : er.Text.Substring(beforeMatch.Length);
                    modStr     = beforeMatch.Value;

                    if (!string.IsNullOrEmpty(beforeMatch.Groups[Constants.IncludeGroupName].Value))
                    {
                        hasInclusiveModifier = true;
                    }
                }
                else if (afterMatch.Success)
                {
                    hasAfter   = true;
                    er.Start  += matchIsAfter ? 0 : afterMatch.Length;
                    er.Length -= afterMatch.Length;
                    er.Text    = matchIsAfter ? er.Text.Substring(0, (int)er.Length) : er.Text.Substring(afterMatch.Length);
                    modStr     = afterMatch.Value;

                    if (!string.IsNullOrEmpty(afterMatch.Groups[Constants.IncludeGroupName].Value))
                    {
                        hasInclusiveModifier = true;
                    }
                }
                else if (sinceMatch.Success)
                {
                    hasSince   = true;
                    er.Start  += matchIsAfter ? 0 : sinceMatch.Length;
                    er.Length -= sinceMatch.Length;
                    er.Text    = matchIsAfter ? er.Text.Substring(0, (int)er.Length) : er.Text.Substring(sinceMatch.Length);
                    modStr     = sinceMatch.Value;
                }
                else if (aroundMatch.Success)
                {
                    hasAround  = true;
                    er.Start  += matchIsAfter ? 0 : aroundMatch.Length;
                    er.Length -= aroundMatch.Length;
                    er.Text    = matchIsAfter ? er.Text.Substring(0, (int)er.Length) : er.Text.Substring(aroundMatch.Length);
                    modStr     = aroundMatch.Value;
                }
                else if (equalMatch.Success)
                {
                    hasEqual   = true;
                    er.Start  += matchIsAfter ? 0 : equalMatch.Length;
                    er.Length -= equalMatch.Length;
                    er.Text    = matchIsAfter ? er.Text.Substring(0, (int)er.Length) : er.Text.Substring(equalMatch.Length);
                    modStr     = equalMatch.Value;
                }
                else if ((er.Type.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal) &&
                          Config.YearRegex.Match(er.Text).Success) ||
                         er.Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) ||
                         er.Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal))
                {
                    // This has to be put at the end of the if, or cases like "before 2012" and "after 2012" would fall into this
                    // 2012 or after/above
                    // 3 pm or later
                    var match = Config.SuffixAfter.MatchEnd(er.Text, trim: true);
                    if (match.Success)
                    {
                        hasDateAfter = true;
                        er.Length   -= match.Length;
                        er.Text      = er.Text.Substring(0, er.Length ?? 0);
                        modStr       = match.Value;
                    }
                }
            }

            // Parse extracted datetime mention
            pr = ParseResult(er, referenceTime);
            if (pr == null)
            {
                return(null);
            }

            // Apply processed modifiers
            // Pop, restore the MOD string
            if (hasBefore && pr.Value != null)
            {
                pr.Length += modStr.Length;
                pr.Start  -= matchIsAfter ? 0 : modStr.Length;
                pr.Text    = matchIsAfter ? pr.Text + modStr : modStr + pr.Text;
                var val = (DateTimeResolutionResult)pr.Value;

                val.Mod = CombineMod(val.Mod, !hasInclusiveModifier ? Constants.BEFORE_MOD : Constants.UNTIL_MOD);

                pr.Value = val;
            }

            if (hasAfter && pr.Value != null)
            {
                pr.Length += modStr.Length;
                pr.Start  -= matchIsAfter ? 0 : modStr.Length;
                pr.Text    = matchIsAfter ? pr.Text + modStr : modStr + pr.Text;
                var val = (DateTimeResolutionResult)pr.Value;

                if (!hasInclusiveModifier)
                {
                    val.Mod = CombineMod(val.Mod, Constants.AFTER_MOD);
                }
                else
                {
                    val.Mod = CombineMod(val.Mod, Constants.SINCE_MOD);
                }

                pr.Value = val;
            }

            if (hasSince && pr.Value != null)
            {
                pr.Length += modStr.Length;
                pr.Start  -= matchIsAfter ? 0 : modStr.Length;
                pr.Text    = matchIsAfter ? pr.Text + modStr : modStr + pr.Text;
                var val = (DateTimeResolutionResult)pr.Value;
                val.Mod  = CombineMod(val.Mod, Constants.SINCE_MOD);
                pr.Value = val;
            }

            if (hasAround && pr.Value != null)
            {
                pr.Length += modStr.Length;
                pr.Start  -= matchIsAfter ? 0 : modStr.Length;
                pr.Text    = matchIsAfter ? pr.Text + modStr : modStr + pr.Text;
                var val = (DateTimeResolutionResult)pr.Value;
                val.Mod  = CombineMod(val.Mod, Constants.APPROX_MOD);
                pr.Value = val;
            }

            if (hasEqual && pr.Value != null)
            {
                pr.Length += modStr.Length;
                pr.Start  -= matchIsAfter ? 0 : modStr.Length;
                pr.Text    = matchIsAfter ? pr.Text + modStr : modStr + pr.Text;
            }

            if (hasDateAfter && pr.Value != null)
            {
                pr.Length += modStr.Length;
                pr.Text    = pr.Text + modStr;
                var val = (DateTimeResolutionResult)pr.Value;
                val.Mod  = CombineMod(val.Mod, Constants.SINCE_MOD);
                pr.Value = val;
                hasSince = true;
            }

            // For cases like "3 pm or later on monday"
            if (pr.Value != null && Config.SuffixAfter.Match(pr.Text)?.Index != 0 &&
                pr.Type.Equals(Constants.SYS_DATETIME_DATETIME, StringComparison.Ordinal) && !this.Config.CheckBothBeforeAfter)
            {
                var val = (DateTimeResolutionResult)pr.Value;
                val.Mod  = CombineMod(val.Mod, Constants.SINCE_MOD);
                pr.Value = val;
                hasSince = true;
            }

            if ((Config.Options & DateTimeOptions.SplitDateAndTime) != 0 &&
                ((DateTimeResolutionResult)pr?.Value)?.SubDateTimeEntities != null)
            {
                pr.Value = DateTimeResolutionForSplit(pr);
            }
            else
            {
                var hasRangeChangingMod = hasBefore || hasAfter || hasSince;
                if (pr.Value != null)
                {
                    ((DateTimeResolutionResult)pr.Value).HasRangeChangingMod = hasRangeChangingMod;
                }

                pr = SetParseResult(pr, hasRangeChangingMod);
            }

            // In this version, ExperimentalMode only cope with the "IncludePeriodEnd" case
            if ((this.Config.Options & DateTimeOptions.ExperimentalMode) != 0)
            {
                if (pr?.Metadata != null && pr.Metadata.PossiblyIncludePeriodEnd)
                {
                    pr = SetInclusivePeriodEnd(pr);
                }
            }

            if ((this.Config.Options & DateTimeOptions.EnablePreview) != 0)
            {
                if (pr != null)
                {
                    pr.Length += originText.Length - pr.Text.Length;
                    pr.Text    = originText;
                }
            }

            return(pr);
        }
        public DateTimeParseResult Parse(ExtractResult er, DateObject refTime)
        {
            var referenceTime      = refTime;
            DateTimeParseResult pr = null;

            var originText = er.Text;

            if ((this.Config.Options & DateTimeOptions.EnablePreview) != 0)
            {
                er.Text    = MatchingUtil.PreProcessTextRemoveSuperfluousWords(er.Text, Config.SuperfluousWordMatcher, out var _);
                er.Length += er.Text.Length - originText.Length;
            }

            // Push, save the MOD string
            bool hasBefore = false, hasAfter = false, hasSince = false, hasAround = false, hasDateAfter = false;

            // "InclusiveModifier" means MOD should include the start/end time
            // For example, cases like "on or later than", "earlier than or in" have inclusive modifier
            bool hasInclusiveModifier = false;
            var  modStr      = string.Empty;
            var  beforeMatch = Config.BeforeRegex.MatchBegin(er.Text, trim: true);
            var  afterMatch  = Config.AfterRegex.MatchBegin(er.Text, trim: true);
            var  sinceMatch  = Config.SinceRegex.MatchBegin(er.Text, trim: true);
            var  aroundMatch = Config.AroundRegex.MatchBegin(er.Text, trim: true);

            if (beforeMatch.Success)
            {
                hasBefore  = true;
                er.Start  += beforeMatch.Length;
                er.Length -= beforeMatch.Length;
                er.Text    = er.Text.Substring(beforeMatch.Length);
                modStr     = beforeMatch.Value;

                if (!string.IsNullOrEmpty(beforeMatch.Groups["include"].Value))
                {
                    hasInclusiveModifier = true;
                }
            }
            else if (afterMatch.Success)
            {
                hasAfter   = true;
                er.Start  += afterMatch.Length;
                er.Length -= afterMatch.Length;
                er.Text    = er.Text.Substring(afterMatch.Length);
                modStr     = afterMatch.Value;

                if (!string.IsNullOrEmpty(afterMatch.Groups["include"].Value))
                {
                    hasInclusiveModifier = true;
                }
            }
            else if (sinceMatch.Success)
            {
                hasSince   = true;
                er.Start  += sinceMatch.Length;
                er.Length -= sinceMatch.Length;
                er.Text    = er.Text.Substring(sinceMatch.Length);
                modStr     = sinceMatch.Value;
            }
            else if (aroundMatch.Success)
            {
                hasAround  = true;
                er.Start  += aroundMatch.Length;
                er.Length -= aroundMatch.Length;
                er.Text    = er.Text.Substring(aroundMatch.Length);
                modStr     = aroundMatch.Value;
            }
            else if ((er.Type.Equals(Constants.SYS_DATETIME_DATEPERIOD) && Config.YearRegex.Match(er.Text).Success) || er.Type.Equals(Constants.SYS_DATETIME_DATE))
            {
                // This has to be put at the end of the if, or cases like "before 2012" and "after 2012" would fall into this
                // 2012 or after/above
                var match = Config.DateAfter.MatchEnd(er.Text, trim: true);
                if (match.Success)
                {
                    hasDateAfter = true;
                    er.Length   -= match.Length;
                    er.Text      = er.Text.Substring(0, er.Length ?? 0);
                    modStr       = match.Value;
                }
            }

            if (er.Type.Equals(Constants.SYS_DATETIME_DATE))
            {
                pr = this.Config.DateParser.Parse(er, referenceTime);
                if (pr.Value == null)
                {
                    pr = Config.HolidayParser.Parse(er, referenceTime);
                }
            }
            else if (er.Type.Equals(Constants.SYS_DATETIME_TIME))
            {
                pr = this.Config.TimeParser.Parse(er, referenceTime);
            }
            else if (er.Type.Equals(Constants.SYS_DATETIME_DATETIME))
            {
                pr = this.Config.DateTimeParser.Parse(er, referenceTime);
            }
            else if (er.Type.Equals(Constants.SYS_DATETIME_DATEPERIOD))
            {
                pr = this.Config.DatePeriodParser.Parse(er, referenceTime);
            }
            else if (er.Type.Equals(Constants.SYS_DATETIME_TIMEPERIOD))
            {
                pr = this.Config.TimePeriodParser.Parse(er, referenceTime);
            }
            else if (er.Type.Equals(Constants.SYS_DATETIME_DATETIMEPERIOD))
            {
                pr = this.Config.DateTimePeriodParser.Parse(er, referenceTime);
            }
            else if (er.Type.Equals(Constants.SYS_DATETIME_DURATION))
            {
                pr = this.Config.DurationParser.Parse(er, referenceTime);
            }
            else if (er.Type.Equals(Constants.SYS_DATETIME_SET))
            {
                pr = this.Config.SetParser.Parse(er, referenceTime);
            }
            else if (er.Type.Equals(Constants.SYS_DATETIME_DATETIMEALT))
            {
                pr = this.Config.DateTimeAltParser.Parse(er, referenceTime);
            }
            else if (er.Type.Equals(Constants.SYS_DATETIME_TIMEZONE))
            {
                if ((Config.Options & DateTimeOptions.EnablePreview) != 0)
                {
                    pr = this.Config.TimeZoneParser.Parse(er, referenceTime);
                }
            }
            else
            {
                return(null);
            }

            // Pop, restore the MOD string
            if (hasBefore && (pr != null && pr.Value != null))
            {
                pr.Length += modStr.Length;
                pr.Start  -= modStr.Length;
                pr.Text    = modStr + pr.Text;
                var val = (DateTimeResolutionResult)pr.Value;

                if (!hasInclusiveModifier)
                {
                    val.Mod = Constants.BEFORE_MOD;
                }
                else
                {
                    val.Mod = Constants.UNTIL_MOD;
                }

                pr.Value = val;
            }

            if (hasAfter && (pr != null && pr.Value != null))
            {
                pr.Length += modStr.Length;
                pr.Start  -= modStr.Length;
                pr.Text    = modStr + pr.Text;
                var val = (DateTimeResolutionResult)pr.Value;

                if (!hasInclusiveModifier)
                {
                    val.Mod = Constants.AFTER_MOD;
                }
                else
                {
                    val.Mod = Constants.SINCE_MOD;
                }

                pr.Value = val;
            }

            if (hasSince && (pr != null && pr.Value != null))
            {
                pr.Length += modStr.Length;
                pr.Start  -= modStr.Length;
                pr.Text    = modStr + pr.Text;
                var val = (DateTimeResolutionResult)pr.Value;
                val.Mod  = Constants.SINCE_MOD;
                pr.Value = val;
            }

            if (hasAround && (pr != null && pr.Value != null))
            {
                pr.Length += modStr.Length;
                pr.Start  -= modStr.Length;
                pr.Text    = modStr + pr.Text;
                var val = (DateTimeResolutionResult)pr.Value;
                val.Mod  = Constants.APPROX_MOD;
                pr.Value = val;
            }

            if (hasDateAfter && (pr != null && pr.Value != null))
            {
                pr.Length += modStr.Length;
                pr.Text    = pr.Text + modStr;
                var val = (DateTimeResolutionResult)pr.Value;
                val.Mod  = Constants.SINCE_MOD;
                pr.Value = val;
                hasSince = true;
            }

            if ((Config.Options & DateTimeOptions.SplitDateAndTime) != 0 &&
                ((DateTimeResolutionResult)pr?.Value)?.SubDateTimeEntities != null)
            {
                if (pr != null)
                {
                    pr.Value = DateTimeResolutionForSplit(pr);
                }
            }
            else
            {
                var hasModifier = hasBefore || hasAfter || hasSince;
                pr = SetParseResult(pr, hasModifier);
            }

            // In this version, ExperimentalMode only cope with the "IncludePeriodEnd" case
            if ((this.Config.Options & DateTimeOptions.ExperimentalMode) != 0)
            {
                if (pr?.Metadata != null && pr.Metadata.PossiblyIncludePeriodEnd)
                {
                    pr = SetInclusivePeriodEnd(pr);
                }
            }

            if ((this.Config.Options & DateTimeOptions.EnablePreview) != 0)
            {
                pr.Length += originText.Length - pr.Text.Length;
                pr.Text    = originText;
            }

            return(pr);
        }