public IntegerExtractor(ChineseNumberExtractorMode mode = ChineseNumberExtractorMode.Default)
        {
            var regexes = new Dictionary <Regex, TypeTag>()
            {
                {
                    // 123456,  -123456
                    new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexFlags),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)
                },
                {
                    // 15k,  16 G
                    new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexFlags),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)
                },
                {
                    // 1,234,  2,332,111
                    new Regex(NumbersDefinitions.DottedNumbersSpecialsChar, RegexFlags),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)
                },
                {
                    // 半百  半打
                    new Regex(NumbersDefinitions.NumbersWithHalfDozen, RegexFlags),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE)
                },
                {
                    // 半
                    new Regex(NumbersDefinitions.HalfUnitRegex, RegexFlags),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE)
                },
                {
                    // 一打  五十打
                    new Regex(NumbersDefinitions.NumbersWithDozen, RegexFlags),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE)
                },
            };

            switch (mode)
            {
            case ChineseNumberExtractorMode.Default:
                // 一百五十五, 负一亿三百二十二.
                // Uses an allow list to avoid extracting "四" from "四川"
                regexes.Add(
                    new Regex(NumbersDefinitions.NumbersWithAllowListRegex, RegexFlags),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE));
                break;

            case ChineseNumberExtractorMode.ExtractAll:
                // 一百五十五, 负一亿三百二十二, "四" from "四川".
                // Uses no allow lists and extracts all potential integers (useful in Units, for example).
                regexes.Add(
                    new Regex(NumbersDefinitions.NumbersAggressiveRegex, RegexFlags),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE));
                break;
            }

            Regexes = regexes.ToImmutableDictionary();
        }
Esempio n. 2
0
        // CardinalExtractor = Int + Double
        public CardinalExtractor(ChineseNumberExtractorMode mode = ChineseNumberExtractorMode.Default)
        {
            var builder = ImmutableDictionary.CreateBuilder <Regex, TypeTag>();

            var intExtractChs = new IntegerExtractor(mode);

            builder.AddRange(intExtractChs.Regexes);

            var douExtractorChs = new DoubleExtractor();

            builder.AddRange(douExtractorChs.Regexes);

            Regexes = builder.ToImmutable();
        }
        public IntegerExtractor(ChineseNumberExtractorMode mode = ChineseNumberExtractorMode.Default)
        {
            var regexes = new Dictionary <Regex, string>()
            {
                {
                    // 123456,  -123456
                    new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexOptions.IgnoreCase | RegexOptions.Singleline),
                    "IntegerNum"
                },
                {
                    //15k,  16 G
                    new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexOptions.Singleline),
                    "IntegerNum"
                },
                {
                    //1,234,  2,332,111
                    new Regex(NumbersDefinitions.DottedNumbersSpecialsChar, RegexOptions.IgnoreCase | RegexOptions.Singleline),
                    "IntegerNum"
                },
                {
                    //半百  半打
                    new Regex(NumbersDefinitions.NumbersWithHalfDozen, RegexOptions.Singleline),
                    "IntegerChs"
                },
                {
                    //一打  五十打
                    new Regex(NumbersDefinitions.NumbersWithDozen, RegexOptions.Singleline),
                    "IntegerChs"
                }
            };

            switch (mode)
            {
            case ChineseNumberExtractorMode.Default:
                // 一百五十五, 负一亿三百二十二.
                // Uses an allow list to avoid extracting "四" from "四川"
                regexes.Add(new Regex(NumbersDefinitions.NumbersWithAllowListRegex, RegexOptions.Singleline),
                            "IntegerChs");
                break;

            case ChineseNumberExtractorMode.ExtractAll:
                // 一百五十五, 负一亿三百二十二, "四" from "四川".
                // Uses no allow lists and extracts all potential integers (useful in Units, for example).
                regexes.Add(new Regex(NumbersDefinitions.NumbersAggressiveRegex, RegexOptions.Singleline),
                            "IntegerChs");
                break;
            }

            Regexes = regexes.ToImmutableDictionary();
        }
Esempio n. 4
0
        public NumberExtractor(ChineseNumberExtractorMode mode = ChineseNumberExtractorMode.Default)
        {
            var builder = ImmutableDictionary.CreateBuilder <Regex, TypeTag>();

            // Add Cardinal
            var cardExtractChs = new CardinalExtractor(mode);

            builder.AddRange(cardExtractChs.Regexes);

            // Add Fraction
            var fracExtractChs = new FractionExtractor();

            builder.AddRange(fracExtractChs.Regexes);

            Regexes = builder.ToImmutable();
        }