Esempio n. 1
0
        public IntegerExtractor(JapaneseNumberExtractorMode mode = JapaneseNumberExtractorMode.Default)
        {
            var regexes = new Dictionary <Regex, TypeTag>
            {
                {
                    // 123456,  -123456
                    new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexFlags),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)
                },
                {
                    // 15k,  16 G
                    new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexFlags),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)
                },
                {
                    // 1,234,  2,332,111
                    new Regex(NumbersDefinitions.DottedNumbersSpecialsChar, RegexFlags),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)
                },
                {
                    // 半百  半ダース
                    new Regex(NumbersDefinitions.NumbersWithHalfDozen, RegexFlags),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.JAPANESE)
                },
                {
                    // 一ダース  五十ダース
                    new Regex(NumbersDefinitions.NumbersWithDozen, RegexFlags),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.JAPANESE)
                },
            };

            switch (mode)
            {
            case JapaneseNumberExtractorMode.Default:
                // 一百五十五, 负一亿三百二十二.
                // Uses an allow list to avoid extracting "西九条" from "九"
                regexes.Add(
                    new Regex(NumbersDefinitions.NumbersWithAllowListRegex, RegexFlags),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.JAPANESE));
                break;

            case JapaneseNumberExtractorMode.ExtractAll:
                // 一百五十五, 负一亿三百二十二, "西九条" from "九"
                // Uses no allow lists and extracts all potential integers (useful in Units, for example).
                regexes.Add(
                    new Regex(NumbersDefinitions.NumbersAggressiveRegex, RegexFlags),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.JAPANESE));
                break;
            }

            Regexes = regexes.ToImmutableDictionary();
        }
        public IntegerExtractor(JapaneseNumberExtractorMode mode = JapaneseNumberExtractorMode.Default)
        {
            var regexes = new Dictionary <Regex, string>()
            {
                {
                    // 123456,  -123456
                    new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexOptions.IgnoreCase | RegexOptions.Singleline),
                    "IntegerNum"
                },
                {
                    //15k,  16 G
                    new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexOptions.Singleline),
                    "IntegerNum"
                },
                {
                    //1,234,  2,332,111
                    new Regex(NumbersDefinitions.DottedNumbersSpecialsChar, RegexOptions.IgnoreCase | RegexOptions.Singleline),
                    "IntegerNum"
                },
                {
                    //半百  半ダース
                    new Regex(NumbersDefinitions.NumbersWithHalfDozen, RegexOptions.Singleline),
                    "IntegerJpn"
                },
                {
                    //一ダース  五十ダース
                    new Regex(NumbersDefinitions.NumbersWithDozen, RegexOptions.Singleline),
                    "IntegerJpn"
                }
            };

            switch (mode)
            {
            case JapaneseNumberExtractorMode.Default:
                // 一百五十五, 负一亿三百二十二.
                // Uses an allow list to avoid extracting "西九条" from "九"
                regexes.Add(new Regex(NumbersDefinitions.NumbersWithAllowListRegex, RegexOptions.Singleline),
                            "IntegerJpn");
                break;

            case JapaneseNumberExtractorMode.ExtractAll:
                // 一百五十五, 负一亿三百二十二, "西九条" from "九"
                // Uses no allow lists and extracts all potential integers (useful in Units, for example).
                regexes.Add(new Regex(NumbersDefinitions.NumbersAggressiveRegex, RegexOptions.Singleline),
                            "IntegerJpn");
                break;
            }

            Regexes = regexes.ToImmutableDictionary();
        }
Esempio n. 3
0
        // CardinalExtractor = Int + Double
        public CardinalExtractor(JapaneseNumberExtractorMode mode = JapaneseNumberExtractorMode.Default)
        {
            var builder = ImmutableDictionary.CreateBuilder <Regex, string>();

            var intExtract = new IntegerExtractor(mode);

            builder.AddRange(intExtract.Regexes);

            var douExtractor = new DoubleExtractor();

            builder.AddRange(douExtractor.Regexes);

            Regexes = builder.ToImmutable();
        }
Esempio n. 4
0
        public NumberExtractor(JapaneseNumberExtractorMode mode = JapaneseNumberExtractorMode.Default)
        {
            var builder = ImmutableDictionary.CreateBuilder <Regex, TypeTag>();

            // Add Cardinal
            var cardExtract = new CardinalExtractor(mode);

            builder.AddRange(cardExtract.Regexes);

            // Add Fraction
            var fracExtract = new FractionExtractor();

            builder.AddRange(fracExtract.Regexes);

            Regexes = builder.ToImmutable();
        }