示例#1
0
        public virtual ParseResult Parse(ExtractResult extResult)
        {
            // check if the parser is configured to support specific types
            if (SupportedTypes != null && !SupportedTypes.Any(t => extResult.Type.Equals(t)))
            {
                return(null);
            }

            string      extra;
            ParseResult ret = null;

            if ((extra = extResult.Data as string) == null)
            {
                if (LongFormatRegex.Match(extResult.Text).Success)
                {
                    extra = "Num";
                }
                else
                {
                    extra = Config.LangMarker;
                }
            }

            if (extra.Contains("Num"))
            {
                ret = DigitNumberParse(extResult);
            }
            else if (extra.Contains($"Frac{Config.LangMarker}")) //Frac is a special number, parse via another method
            {
                ret = FracLikeNumberParse(extResult);
            }
            else if (extra.Contains(Config.LangMarker))
            {
                ret = TextNumberParse(extResult);
            }
            else if (extra.Contains("Pow"))
            {
                ret = PowerNumberParse(extResult);
            }

            if (ret?.Value != null)
            {
                ret.ResolutionStr = Config.CultureInfo != null
                    ? ((double)ret.Value).ToString(Config.CultureInfo)
                    : ret.Value.ToString();
            }

            return(ret);
        }
示例#2
0
        public virtual ParseResult Parse(ExtractResult extResult)
        {
            // Check if the parser is configured to support specific types
            if (SupportedTypes != null && !SupportedTypes.Any(t => extResult.Type.Equals(t)))
            {
                return(null);
            }

            ParseResult ret = null;

            if (!(extResult.Data is string extra))
            {
                extra = LongFormatRegex.Match(extResult.Text).Success ? Constants.NUMBER_SUFFIX : Config.LangMarker;
            }

            // Resolve symbol prefix
            var isNegative    = false;
            var matchNegative = Config.NegativeNumberSignRegex.Match(extResult.Text);

            if (matchNegative.Success)
            {
                isNegative     = true;
                extResult.Text = extResult.Text.Substring(matchNegative.Groups[1].Length);
            }

            // Assign resolution value
            if (extResult.Data is List <ExtractResult> ers)
            {
                var innerPrs  = ers.Select(Parse).ToList();
                var mergedPrs = new List <ParseResult>();

                double val   = 0;
                var    count = 0;

                for (var idx = 0; idx < innerPrs.Count; idx++)
                {
                    val += (double)innerPrs[idx].Value;

                    if (idx + 1 >= innerPrs.Count || !IsMergeable((double)innerPrs[idx].Value, (double)innerPrs[idx + 1].Value))
                    {
                        var start  = (int)ers[idx - count].Start;
                        var length = (int)(ers[idx].Start + ers[idx].Length - start);
                        mergedPrs.Add(new ParseResult
                        {
                            Start  = start,
                            Length = length,
                            Text   = extResult.Text.Substring((int)(start - extResult.Start), length),
                            Type   = extResult.Type,
                            Value  = val,
                            Data   = null,
                        });

                        val   = 0;
                        count = 0;
                    }
                    else
                    {
                        count++;
                    }
                }

                ret = new ParseResult(extResult)
                {
                    Value = val, Data = mergedPrs
                };
            }
            else if (extra.Contains(Constants.NUMBER_SUFFIX))
            {
                ret = DigitNumberParse(extResult);
            }
            else if (extra.Contains($"{Constants.FRACTION_PREFIX}{Config.LangMarker}"))
            {
                // Such fractions are special cases, parse via another method
                ret = FracLikeNumberParse(extResult);
            }
            else if (extra.Contains(Config.LangMarker))
            {
                ret = TextNumberParse(extResult);
            }
            else if (extra.Contains(Constants.POWER_SUFFIX))
            {
                ret = PowerNumberParse(extResult);
            }

            if (ret?.Data is List <ParseResult> prs)
            {
                foreach (var parseResult in prs)
                {
                    parseResult.ResolutionStr = GetResolutionStr(parseResult.Value);
                }
            }
            else if (ret?.Value != null)
            {
                if (isNegative)
                {
                    // Recover the original extracted Text
                    ret.Text  = matchNegative.Groups[1].Value + extResult.Text;
                    ret.Value = -(double)ret.Value;
                }

                ret.ResolutionStr = GetResolutionStr(ret.Value);
            }

            // Add "offset" and "relativeTo" for ordinal
            if (!string.IsNullOrEmpty(ret.Type) && ret.Type.Contains(Constants.MODEL_ORDINAL))
            {
                if (Config.RelativeReferenceOffsetMap.ContainsKey(extResult.Text) &&
                    Config.RelativeReferenceRelativeToMap.ContainsKey(extResult.Text))
                {
                    ret.Metadata.Offset     = Config.RelativeReferenceOffsetMap[extResult.Text];
                    ret.Metadata.RelativeTo = Config.RelativeReferenceRelativeToMap[extResult.Text];
                }
                else
                {
                    ret.Metadata.Offset = ret.ResolutionStr;

                    // Every ordinal number is relative to the start
                    ret.Metadata.RelativeTo = Constants.RELATIVE_START;
                }
            }

            if (ret != null)
            {
                ret.Type = DetermineType(extResult);
                ret.Text = ret.Text.ToLowerInvariant();
            }

            return(ret);
        }
        public virtual ParseResult Parse(ExtractResult extResult)
        {
            // check if the parser is configured to support specific types
            if (SupportedTypes != null && !SupportedTypes.Any(t => extResult.Type.Equals(t)))
            {
                return(null);
            }

            ParseResult ret = null;

            if (!(extResult.Data is string extra))
            {
                extra = LongFormatRegex.Match(extResult.Text).Success ? "Num" : Config.LangMarker;
            }

            // Resolve symbol prefix
            var isNegative    = false;
            var matchNegative = Config.NegativeNumberSignRegex.Match(extResult.Text);

            if (matchNegative.Success)
            {
                isNegative     = true;
                extResult.Text = extResult.Text.Substring(matchNegative.Groups[1].Length);
            }

            if (extResult.Data is List <ExtractResult> ers)
            {
                var innerPrs  = ers.Select(Parse).ToList();
                var mergedPrs = new List <ParseResult>();

                double val   = 0;
                var    count = 0;

                for (var idx = 0; idx < innerPrs.Count; idx++)
                {
                    val += (double)innerPrs[idx].Value;

                    if (idx + 1 >= innerPrs.Count || !IsMergeable((double)innerPrs[idx].Value, (double)innerPrs[idx + 1].Value))
                    {
                        var start  = (int)ers[idx - count].Start;
                        var length = (int)(ers[idx].Start + ers[idx].Length - start);
                        mergedPrs.Add(new ParseResult
                        {
                            Start  = start,
                            Length = length,
                            Text   = extResult.Text.Substring((int)(start - extResult.Start), length),
                            Type   = extResult.Type,
                            Value  = val,
                            Data   = null
                        });

                        val   = 0;
                        count = 0;
                    }
                    else
                    {
                        count++;
                    }
                }

                ret = new ParseResult(extResult)
                {
                    Value = val, Data = mergedPrs
                };
            }
            else if (extra.Contains(Constants.NUMBER_SUFFIX))
            {
                ret = DigitNumberParse(extResult);
            }
            else if (extra.Contains($"{Constants.FRACTION_PREFIX}{Config.LangMarker}")) //Frac is a special number, parse via another method
            {
                ret = FracLikeNumberParse(extResult);
            }
            else if (extra.Contains(Config.LangMarker))
            {
                ret = TextNumberParse(extResult);
            }
            else if (extra.Contains(Constants.POWER_SUFFIX))
            {
                ret = PowerNumberParse(extResult);
            }

            if (ret?.Data is List <ParseResult> prs)
            {
                foreach (var parseResult in prs)
                {
                    parseResult.ResolutionStr = GetResolutionStr(parseResult.Value);
                }
            }
            else if (ret?.Value != null)
            {
                if (isNegative)
                {
                    // Recover to the original extracted Text
                    ret.Text  = matchNegative.Groups[1].Value + extResult.Text;
                    ret.Value = -(double)ret.Value;
                }

                ret.ResolutionStr = GetResolutionStr(ret.Value);
            }

            ret.Type = DetermineType(extResult);

            return(ret);
        }
        public virtual ParseResult Parse(ExtractResult extResult)
        {
            // check if the parser is configured to support specific types
            if (SupportedTypes != null && !SupportedTypes.Any(t => extResult.Type.Equals(t)))
            {
                return(null);
            }

            string      extra;
            ParseResult ret = null;

            if ((extra = extResult.Data as string) == null)
            {
                if (LongFormatRegex.Match(extResult.Text).Success)
                {
                    extra = "Num";
                }
                else
                {
                    extra = Config.LangMarker;
                }
            }

            // Resolve symbol prefix
            bool isNegative    = false;
            var  matchNegative = Config.NegativeNumberSignRegex.Match(extResult.Text);

            if (matchNegative.Success)
            {
                isNegative     = true;
                extResult.Text = extResult.Text.Substring(matchNegative.Groups[1].Length);
            }

            if (extra.Contains("Num"))
            {
                ret = DigitNumberParse(extResult);
            }
            else if (extra.Contains($"Frac{Config.LangMarker}")) //Frac is a special number, parse via another method
            {
                ret = FracLikeNumberParse(extResult);
            }
            else if (extra.Contains(Config.LangMarker))
            {
                ret = TextNumberParse(extResult);
            }
            else if (extra.Contains("Pow"))
            {
                ret = PowerNumberParse(extResult);
            }

            if (ret?.Value != null)
            {
                if (isNegative)
                {
                    // Recover to the original extracted Text
                    ret.Text  = matchNegative.Groups[1].Value + extResult.Text;
                    ret.Value = -(double)ret.Value;
                }

                ret.ResolutionStr = Config.CultureInfo != null
                    ? ((double)ret.Value).ToString(Config.CultureInfo)
                    : ret.Value.ToString();
            }

            return(ret);
        }