Example #1
0
        public virtual List <ExtractResult> Extract(string source)
        {
            if (string.IsNullOrEmpty(source))
            {
                return(new List <ExtractResult>());
            }

            var result      = new List <ExtractResult>();
            var matchSource = new Dictionary <Match, TypeTag>();
            var matched     = new bool[source.Length];

            var collections = Regexes.ToDictionary(o => o.Key.Matches(source), p => p.Value);

            foreach (var collection in collections)
            {
                foreach (Match m in collection.Key)
                {
                    // In ExperimentalMode, AmbiguousFraction like "30000 in 2009" needs to be skipped
                    if ((Options & NumberOptions.ExperimentalMode) != 0 && AmbiguousFractionConnectorsRegex.Match(m.Value).Success)
                    {
                        continue;
                    }

                    // In EnablePreview, cases like "last", "next" should not be skipped
                    if ((Options & NumberOptions.EnablePreview) == 0 && IsRelativeOrdinal(m.Value))
                    {
                        continue;
                    }

                    for (var j = 0; j < m.Length; j++)
                    {
                        matched[m.Index + j] = true;
                    }

                    // Fliter out cases like "first two", "last one"
                    // only support in English now
                    if (ExtractType.Contains(Constants.MODEL_ORDINAL) && RelativeOrdinalFilterRegex != null && RelativeOrdinalFilterRegex.IsMatch(source))
                    {
                        continue;
                    }

                    // Keep Source Data for extra information
                    matchSource.Add(m, collection.Value);
                }
            }

            var last = -1;

            for (var i = 0; i < source.Length; i++)
            {
                if (matched[i])
                {
                    if (i + 1 == source.Length || !matched[i + 1])
                    {
                        var start  = last + 1;
                        var length = i - last;
                        var substr = source.Substring(start, length);

                        if (matchSource.Keys.Any(o => o.Index == start && o.Length == length))
                        {
                            var type = matchSource.Where(p => p.Key.Index == start && p.Key.Length == length)
                                       .Select(p => (p.Value.Priority, p.Value.Name)).Min().Item2;

                            // Extract negative numbers
                            if (NegativeNumberTermsRegex != null)
                            {
                                var match = NegativeNumberTermsRegex.Match(source.Substring(0, start));
                                if (match.Success)
                                {
                                    start  = match.Index;
                                    length = length + match.Length;
                                    substr = match.Value + substr;
                                }
                            }

                            var er = new ExtractResult
                            {
                                Start  = start,
                                Length = length,
                                Text   = substr,
                                Type   = ExtractType,
                                Data   = type,
                            };

                            // Add Metadata information for Ordinal
                            if (ExtractType.Contains(Constants.MODEL_ORDINAL))
                            {
                                er.Metadata = new Metadata();
                                if (IsRelativeOrdinal(substr))
                                {
                                    er.Metadata.IsOrdinalRelative = true;
                                }
                            }

                            result.Add(er);
                        }
                    }
                }
                else
                {
                    last = i;
                }
            }

            result = FilterAmbiguity(result, source);

            return(result);
        }
        public virtual List <ExtractResult> Extract(string source)
        {
            if (string.IsNullOrEmpty(source))
            {
                return(new List <ExtractResult>());
            }

            var result      = new List <ExtractResult>();
            var matchSource = new Dictionary <Match, TypeTag>();
            var matched     = new bool[source.Length];

            var collections = Regexes.ToDictionary(o => o.Key.Matches(source), p => p.Value);

            foreach (var collection in collections)
            {
                foreach (Match m in collection.Key)
                {
                    // In ExperimentalMode, AmbigiuousFraction like "30000 in 2009" needs to be skipped
                    if (Options == NumberOptions.ExperimentalMode && AmbiguousFractionConnectorsRegex.Match(m.Value).Success)
                    {
                        continue;
                    }

                    for (var j = 0; j < m.Length; j++)
                    {
                        matched[m.Index + j] = true;
                    }

                    // Keep Source Data for extra information
                    matchSource.Add(m, collection.Value);
                }
            }

            var last = -1;

            for (var i = 0; i < source.Length; i++)
            {
                if (matched[i])
                {
                    if (i + 1 == source.Length || !matched[i + 1])
                    {
                        var start  = last + 1;
                        var length = i - last;
                        var substr = source.Substring(start, length);

                        if (matchSource.Keys.Any(o => o.Index == start && o.Length == length))
                        {
                            var type = matchSource.Where(p => p.Key.Index == start && p.Key.Length == length)
                                       .Select(p => (p.Value.Priority, p.Value.Name)).Min().Item2;

                            // Extract negative numbers
                            if (NegativeNumberTermsRegex != null)
                            {
                                var match = NegativeNumberTermsRegex.Match(source.Substring(0, start));
                                if (match.Success)
                                {
                                    start  = match.Index;
                                    length = length + match.Length;
                                    substr = match.Value + substr;
                                }
                            }

                            var er = new ExtractResult
                            {
                                Start  = start,
                                Length = length,
                                Text   = substr,
                                Type   = ExtractType,
                                Data   = type
                            };
                            result.Add(er);
                        }
                    }
                }
                else
                {
                    last = i;
                }
            }

            result = FilterAmbiguity(result, source);

            return(result);
        }
Example #3
0
        public virtual List <ExtractResult> Extract(string source)
        {
            if (string.IsNullOrEmpty(source))
            {
                return(new List <ExtractResult>());
            }

            var result      = new List <ExtractResult>();
            var matchSource = new Dictionary <Match, TypeTag>();
            var matched     = new bool[source.Length];

            var collections = Regexes.ToDictionary(o => o.Key.Matches(source), p => p.Value);

            foreach (var collection in collections)
            {
                foreach (Match m in collection.Key)
                {
                    // In ExperimentalMode, AmbiguousFraction like "30000 in 2009" needs to be skipped
                    if ((Options & NumberOptions.ExperimentalMode) != 0 && AmbiguousFractionConnectorsRegex.Match(m.Value).Success)
                    {
                        continue;
                    }

                    // If SuppressExtendedTypes is on, cases like "last", "next" should be skipped
                    if ((Options & NumberOptions.SuppressExtendedTypes) != 0 && m.Groups[Constants.RelativeOrdinalGroupName].Success)
                    {
                        continue;
                    }

                    // Matches containing separators 'in', 'out of' should be considered fractions only when numerator < denominator
                    if (m.Groups["ambiguousSeparator"].Success)
                    {
                        var numerator   = m.Groups["numerator"];
                        var denominator = m.Groups["denominator"];
                        int num         = ParseNumber(numerator);
                        int den         = ParseNumber(denominator);

                        if (num > den)
                        {
                            continue;
                        }
                    }

                    for (var j = 0; j < m.Length; j++)
                    {
                        matched[m.Index + j] = true;
                    }

                    // Keep Source Data for extra information
                    matchSource.Add(m, collection.Value);
                }
            }

            var last = -1;

            for (var i = 0; i < source.Length; i++)
            {
                if (matched[i])
                {
                    if (i + 1 == source.Length || !matched[i + 1])
                    {
                        var start  = last + 1;
                        var length = i - last;
                        var substr = source.Substring(start, length);

                        if (matchSource.Keys.Any(o => o.Index == start && o.Length == length))
                        {
                            var(_, type, originalMatch) = matchSource.Where(p => p.Key.Index == start && p.Key.Length == length)
                                                          .Select(p => (p.Value.Priority, p.Value.Name, p.Key)).Min();

                            // Extract negative numbers
                            if (NegativeNumberTermsRegex != null)
                            {
                                var match = NegativeNumberTermsRegex.Match(source.Substring(0, start));
                                if (match.Success)
                                {
                                    start   = match.Index;
                                    length += match.Length;
                                    substr  = match.Value + substr;
                                }
                            }

                            var er = new ExtractResult
                            {
                                Start  = start,
                                Length = length,
                                Text   = substr,
                                Type   = ExtractType,
                                Data   = type,
                            };

                            // Add Metadata information for Ordinal
                            if (ExtractType.Contains(Constants.MODEL_ORDINAL))
                            {
                                er.Metadata = new Metadata();
                                if ((Options & NumberOptions.SuppressExtendedTypes) == 0 &&
                                    originalMatch.Groups[Constants.RelativeOrdinalGroupName].Success)
                                {
                                    er.Metadata.IsOrdinalRelative = true;
                                }
                            }

                            result.Add(er);
                        }
                    }
                }
                else
                {
                    last = i;
                }
            }

            result = FilterAmbiguity(result, source);

            return(result);
        }