// CardinalExtractor = Int + Double
        public CardinalExtractor(KoreanNumberExtractorMode mode = KoreanNumberExtractorMode.Default)
        {
            var builder = ImmutableDictionary.CreateBuilder <Regex, TypeTag>();

            var intExtract = new IntegerExtractor(mode);

            builder.AddRange(intExtract.Regexes);

            var douExtractor = new DoubleExtractor();

            builder.AddRange(douExtractor.Regexes);

            Regexes = builder.ToImmutable();
        }
예제 #2
0
        public NumberExtractor(KoreanNumberExtractorMode mode = KoreanNumberExtractorMode.Default)
        {
            var builder = ImmutableDictionary.CreateBuilder <Regex, TypeTag>();

            // Add Cardinal
            var cardExtract = new CardinalExtractor(mode);

            builder.AddRange(cardExtract.Regexes);

            // Add Fraction
            var fracExtract = new FractionExtractor();

            builder.AddRange(fracExtract.Regexes);

            Regexes = builder.ToImmutable();
        }
예제 #3
0
        public IntegerExtractor(KoreanNumberExtractorMode mode = KoreanNumberExtractorMode.Default)
        {
            var regexes = new Dictionary <Regex, TypeTag>
            {
                {
                    // 123456,  -123456
                    new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexOptions.Singleline),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)
                },
                {
                    // 15k,  16 G
                    new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexOptions.Singleline),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)
                },
                {
                    // 1,234,  2,332,111
                    new Regex(NumbersDefinitions.DottedNumbersSpecialsChar, RegexOptions.Singleline),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)
                },
                {
                    // 마이너스 일, 마이너스 오
                    new Regex(NumbersDefinitions.ZeroToNineIntegerSpecialsChars, RegexOptions.Singleline),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.KOREAN)
                },
            };

            switch (mode)
            {
            case KoreanNumberExtractorMode.Default:
                // 일백오십오
                regexes.Add(
                    new Regex(NumbersDefinitions.NumbersWithAllowListRegex, RegexOptions.Singleline),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.KOREAN));
                break;

            case KoreanNumberExtractorMode.ExtractAll:
                // 일백오십오, 사직구장, "사직구장" from "사(it is homonym, seems like four(4) or other chinese character)"
                // Uses no allow lists and extracts all potential integers (useful in Units, for example).
                regexes.Add(
                    new Regex(NumbersDefinitions.NumbersAggressiveRegex, RegexOptions.Singleline),
                    RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.KOREAN));
                break;
            }

            Regexes = regexes.ToImmutableDictionary();
        }
예제 #4
0
        public IntegerExtractor(KoreanNumberExtractorMode mode = KoreanNumberExtractorMode.Default)
        {
            var regexes = new Dictionary <Regex, string>()
            {
                {
                    // 123456,  -123456
                    new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexOptions.IgnoreCase | RegexOptions.Singleline),
                    "IntegerNum"
                },
                {
                    //15k,  16 G
                    new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexOptions.Singleline),
                    "IntegerNum"
                },
                {
                    //1,234,  2,332,111
                    new Regex(NumbersDefinitions.DottedNumbersSpecialsChar, RegexOptions.IgnoreCase | RegexOptions.Singleline),
                    "IntegerNum"
                }
            };

            switch (mode)
            {
            case KoreanNumberExtractorMode.Default:
                // 一百五十五, 负一亿三百二十二.
                regexes.Add(new Regex(NumbersDefinitions.NumbersWithAllowListRegex, RegexOptions.Singleline),
                            "IntegerKor");
                break;

            case KoreanNumberExtractorMode.ExtractAll:
                // 一百五十五, 负一亿三百二十二, "西九条" from "九"
                // Uses no allow lists and extracts all potential integers (useful in Units, for example).
                regexes.Add(new Regex(NumbersDefinitions.NumbersAggressiveRegex, RegexOptions.Singleline),
                            "IntegerKor");
                break;
            }

            Regexes = regexes.ToImmutableDictionary();
        }