// CardinalExtractor = Int + Double public CardinalExtractor(KoreanNumberExtractorMode mode = KoreanNumberExtractorMode.Default) { var builder = ImmutableDictionary.CreateBuilder <Regex, TypeTag>(); var intExtract = new IntegerExtractor(mode); builder.AddRange(intExtract.Regexes); var douExtractor = new DoubleExtractor(); builder.AddRange(douExtractor.Regexes); Regexes = builder.ToImmutable(); }
public NumberExtractor(KoreanNumberExtractorMode mode = KoreanNumberExtractorMode.Default) { var builder = ImmutableDictionary.CreateBuilder <Regex, TypeTag>(); // Add Cardinal var cardExtract = new CardinalExtractor(mode); builder.AddRange(cardExtract.Regexes); // Add Fraction var fracExtract = new FractionExtractor(); builder.AddRange(fracExtract.Regexes); Regexes = builder.ToImmutable(); }
public IntegerExtractor(KoreanNumberExtractorMode mode = KoreanNumberExtractorMode.Default) { var regexes = new Dictionary <Regex, TypeTag> { { // 123456, -123456 new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 15k, 16 G new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 1,234, 2,332,111 new Regex(NumbersDefinitions.DottedNumbersSpecialsChar, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 마이너스 일, 마이너스 오 new Regex(NumbersDefinitions.ZeroToNineIntegerSpecialsChars, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.KOREAN) }, }; switch (mode) { case KoreanNumberExtractorMode.Default: // 일백오십오 regexes.Add( new Regex(NumbersDefinitions.NumbersWithAllowListRegex, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.KOREAN)); break; case KoreanNumberExtractorMode.ExtractAll: // 일백오십오, 사직구장, "사직구장" from "사(it is homonym, seems like four(4) or other chinese character)" // Uses no allow lists and extracts all potential integers (useful in Units, for example). regexes.Add( new Regex(NumbersDefinitions.NumbersAggressiveRegex, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.KOREAN)); break; } Regexes = regexes.ToImmutableDictionary(); }
public IntegerExtractor(KoreanNumberExtractorMode mode = KoreanNumberExtractorMode.Default) { var regexes = new Dictionary <Regex, string>() { { // 123456, -123456 new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexOptions.IgnoreCase | RegexOptions.Singleline), "IntegerNum" }, { //15k, 16 G new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexOptions.Singleline), "IntegerNum" }, { //1,234, 2,332,111 new Regex(NumbersDefinitions.DottedNumbersSpecialsChar, RegexOptions.IgnoreCase | RegexOptions.Singleline), "IntegerNum" } }; switch (mode) { case KoreanNumberExtractorMode.Default: // 一百五十五, 负一亿三百二十二. regexes.Add(new Regex(NumbersDefinitions.NumbersWithAllowListRegex, RegexOptions.Singleline), "IntegerKor"); break; case KoreanNumberExtractorMode.ExtractAll: // 一百五十五, 负一亿三百二十二, "西九条" from "九" // Uses no allow lists and extracts all potential integers (useful in Units, for example). regexes.Add(new Regex(NumbersDefinitions.NumbersAggressiveRegex, RegexOptions.Singleline), "IntegerKor"); break; } Regexes = regexes.ToImmutableDictionary(); }