示例#1
0
        //CardinalExtractor = Int + Double
        public CardinalExtractor(ChineseNumberMode mode = ChineseNumberMode.Default)
        {
            var builder = ImmutableDictionary.CreateBuilder <Regex, string>();

            var intExtractChs = new IntegerExtractor(mode);

            builder.AddRange(intExtractChs.Regexes);

            var douExtractorChs = new DoubleExtractor();

            builder.AddRange(douExtractorChs.Regexes);

            Regexes = builder.ToImmutable();
        }
示例#2
0
        public NumberExtractor(ChineseNumberMode mode = ChineseNumberMode.Default)
        {
            var builder = ImmutableDictionary.CreateBuilder <Regex, string>();
            //Add Cardinal
            var cardExtractChs = new CardinalExtractor(mode);

            builder.AddRange(cardExtractChs.Regexes);
            //Add Fraction
            var fracExtractChs = new FractionExtractor();

            builder.AddRange(fracExtractChs.Regexes);

            Regexes = builder.ToImmutable();
        }
示例#3
0
        public IntegerExtractor(ChineseNumberMode mode = ChineseNumberMode.Default)
        {
            var regexes = new Dictionary <Regex, string>()
            {
                {
                    // 123456,  -123456
                    new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexOptions.IgnoreCase | RegexOptions.Singleline)
                    , "IntegerNum"
                },
                {
                    //15k,  16 G
                    new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexOptions.Singleline)
                    , "IntegerNum"
                },
                {
                    //1,234,  2,332,111
                    new Regex(NumbersDefinitions.DottedNumbersSpecialsChar, RegexOptions.IgnoreCase | RegexOptions.Singleline),
                    "IntegerNum"
                },
                {
                    //半百  半打
                    new Regex(NumbersDefinitions.NumbersWithHalfDozen, RegexOptions.Singleline)
                    , "IntegerChs"
                },
                {
                    //一打  五十打
                    new Regex(NumbersDefinitions.NumbersWithDozen, RegexOptions.Singleline)
                    , "IntegerChs"
                }
            };

            switch (mode)
            {
            case ChineseNumberMode.Default:
                //一百五十五,  负一亿三百二十二, avoid 五十五点五个百分点
                regexes.Add(
                    new Regex(NumbersDefinitions.NumbersWithoutPercent, RegexOptions.Singleline),
                    "IntegerChs");
                break;

            case ChineseNumberMode.ExtractAll:
                //一百五十五,  负一亿三百二十二, avoid 五十五点五个百分点
                regexes.Add(
                    new Regex(NumbersDefinitions.NumbersWithPercent, RegexOptions.Singleline),
                    "IntegerChs");
                break;
            }

            Regexes = regexes.ToImmutableDictionary();
        }
示例#4
0
        public IntegerExtractor(ChineseNumberMode mode = ChineseNumberMode.Default)
        {
            var regexes = new Dictionary <Regex, string>()
            {
                // 123456,  -123456
                {
                    new Regex(
                        $@"({SignSymbolRegexNum}\s*)?{ZeroToNineChsFullHalfRegexChs}+",
                        RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline)
                    , "IntegerNum"
                },
                //15k,  16 G
                {
                    new Regex($@"{SignSymbolRegexNum}?{ZeroToNineChsFullHalfRegexChs}+\s*(K|k|M|G|T|M|K|k|G|T)",
                              RegexOptions.Compiled | RegexOptions.Singleline)
                    , "IntegerNum"
                },
                //1,234,  2,332,111
                {
                    new Regex(
                        $@"{SignSymbolRegexNum}?{ZeroToNineChsFullHalfRegexChs}" + @"{1,3}([,,]" +
                        $@"{ZeroToNineChsFullHalfRegexChs}" + @"{3})+",
                        RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline),
                    "IntegerNum"
                },
                //半百  半打
                {
                    new Regex($@"半({RoundNumberIntegerRegexChs}|打)", RegexOptions.Compiled | RegexOptions.Singleline)
                    , "IntegerChs"
                },
                //一打  五十打
                {
                    new Regex($@"{AllIntRegexChs}[双雙对對打](?!{AllIntRegexChs})",
                              RegexOptions.Compiled | RegexOptions.Singleline)
                    , "IntegerChs"
                }
            };

            switch (mode)
            {
            case ChineseNumberMode.Default:
                regexes.Add(
                    //一百五十五,  负一亿三百二十二, avoid 五十五点五个百分点
                    new Regex(
                        $@"(?<![百佰]\s*分\s*之\s*({AllIntRegexChs}[点點]*|{DoubleExtractor.AllFloatRegexChs})*){SignSymbolRegexChs}?({NotSingleRegexChs}|{SingleRegexChs})(?!({AllIntRegexChs}*([点點]{ZeroToNineIntegerRegexChs}+)*|{DoubleExtractor
                                .AllFloatRegexChs})*\s*[个個]\s*[百佰]\s*分\s*[点點])",
                        RegexOptions.Compiled | RegexOptions.Singleline),
                    "IntegerChs");
                break;

            case ChineseNumberMode.ExtractAll:
                regexes.Add(
                    //一百五十五,  负一亿三百二十二, avoid 五十五点五个百分点
                    new Regex(
                        $@"(?<![百佰]\s*分\s*之\s*({AllIntRegexChs}[点點]*|{DoubleExtractor.AllFloatRegexChs})*){SignSymbolRegexChs}?{AllIntRegexChs}(?!({AllIntRegexChs}*([点點]{ZeroToNineIntegerRegexChs}+)*|{DoubleExtractor
                                .AllFloatRegexChs})*\s*[个個]\s*[百佰]\s*分\s*[点點])",
                        RegexOptions.Compiled | RegexOptions.Singleline),
                    "IntegerChs");
                break;
            }
            Regexes = regexes.ToImmutableDictionary();
        }