public List <ExtractResult> Extract(string source, DateObject referenceTime)
        {
            // Use Unit to extract
            var retList = this.config.InternalExtractor.Extract(source);
            var res     = new List <ExtractResult>();

            foreach (var ret in retList)
            {
                // filter
                var match = this.config.YearRegex.Match(ret.Text);
                if (match.Success)
                {
                    continue;
                }

                res.Add(ret);
            }

            // handle "all day", "more days", "few days"
            res.AddRange(ImplicitDuration(source));

            res = ExtractResultExtension.MergeAllResults(res);

            if (this.merge)
            {
                res = MergeMultipleDuration(source, res);
            }

            return(res);
        }
        public List <ExtractResult> Extract(string text, DateObject referenceTime)
        {
            var tokens = new List <Token>();

            tokens.AddRange(MatchSimpleCases(text));
            var simpleCasesResults = Token.MergeAllTokens(tokens, text, ExtractorName);

            tokens.AddRange(MatchComplexCases(text, simpleCasesResults, referenceTime));
            tokens.AddRange(MergeTwoTimePoints(text, referenceTime));
            tokens.AddRange(MatchNumberWithUnit(text));
            tokens.AddRange(MatchDurations(text, referenceTime));

            var rets = Token.MergeAllTokens(tokens, text, ExtractorName);

            // Remove common ambiguous cases
            rets = ExtractResultExtension.FilterAmbiguity(rets, text, this.config.AmbiguityFiltersDict);

            return(rets);
        }
Пример #3
0
        public virtual List <ExtractResult> ExtractImpl(string text, DateObject reference)
        {
            var tokens = new List <Token>();

            tokens.AddRange(BasicRegexMatch(text));
            tokens.AddRange(AtRegexMatch(text));
            tokens.AddRange(BeforeAfterRegexMatch(text));
            tokens.AddRange(SpecialCasesRegexMatch(text, reference));

            var timeErs = Token.MergeAllTokens(tokens, text, ExtractorName);

            if ((this.config.Options & DateTimeOptions.EnablePreview) != 0)
            {
                timeErs = TimeZoneUtility.MergeTimeZones(timeErs, config.TimeZoneExtractor.Extract(text, reference), text);
            }

            // Remove common ambiguous cases
            timeErs = ExtractResultExtension.FilterAmbiguity(timeErs, text, this.config.AmbiguityFiltersDict);

            return(timeErs);
        }
Пример #4
0
        public List <ExtractResult> Extract(string text, DateObject referenceTime)
        {
            var ret = this.config.DateExtractor.Extract(text, referenceTime);

            // the order is important, since there is a problem in merging
            AddTo(ret, this.config.TimeExtractor.Extract(text, referenceTime));
            AddTo(ret, this.config.DurationExtractor.Extract(text, referenceTime));
            AddTo(ret, this.config.DatePeriodExtractor.Extract(text, referenceTime));
            AddTo(ret, this.config.DateTimeExtractor.Extract(text, referenceTime));
            AddTo(ret, this.config.TimePeriodExtractor.Extract(text, referenceTime));
            AddTo(ret, this.config.DateTimePeriodExtractor.Extract(text, referenceTime));
            AddTo(ret, this.config.SetExtractor.Extract(text, referenceTime));
            AddTo(ret, this.config.HolidayExtractor.Extract(text, referenceTime));

            ret = ExtractResultExtension.FilterAmbiguity(ret, text, this.config.AmbiguityFiltersDict);

            AddMod(ret, text);

            ret = ret.OrderBy(p => p.Start).ToList();

            return(ret);
        }
Пример #5
0
        public List <ExtractResult> Extract(string text, DateObject reference)
        {
            var tokens = new List <Token>();
            var numberWithUnitTokens = NumberWithUnit(text);

            tokens.AddRange(numberWithUnitTokens);
            tokens.AddRange(NumberWithUnitAndSuffix(text, numberWithUnitTokens));
            tokens.AddRange(ImplicitDuration(text));

            var rets = Token.MergeAllTokens(tokens, text, ExtractorName);

            // Remove common ambiguous cases
            rets = ExtractResultExtension.FilterAmbiguity(rets, text, this.config.AmbiguityFiltersDict);

            // First MergeMultipleDuration then ResolveMoreThanOrLessThanPrefix so cases like "more than 4 days and less than 1 week" will not be merged into one "multipleDuration"
            if (this.merge)
            {
                rets = MergeMultipleDuration(text, rets);
            }

            rets = TagInequalityPrefix(text, rets);

            return(rets);
        }