private FractionExtractor() { var regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.FractionNounRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.GERMAN) }, { new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.GERMAN) }, { new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.GERMAN) }, }; Regexes = regexes.ToImmutableDictionary(); }
} = Constants.SYS_NUM_INTEGER; // "Integer"; public IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) { this.Regexes = new Dictionary <Regex, string> { { new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexOptions.IgnoreCase | RegexOptions.Singleline) , RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.NumbersWithSuffix, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, placeholder), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexOptions.IgnoreCase | RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexOptions.IgnoreCase | RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexOptions.IgnoreCase | RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.FRENCH) }, { new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexOptions.IgnoreCase | RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.FRENCH) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, placeholder), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, placeholder), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) } }.ToImmutableDictionary(); }
private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) { var regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.IndianNumberingSystemRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.HINDI) }, { new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.HINDI) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumComma, placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.NegativeHinglishRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.HINDI) }, { new Regex(NumbersDefinitions.CompoundEnglishNumberRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.HINDI) }, { new Regex(NumbersDefinitions.DecimalUnitsWithRoundNumberRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.HINDI) }, }; Regexes = regexes.ToImmutableDictionary(); }
private OrdinalExtractor(BaseNumberOptionsConfiguration config) : base(config.Options) { keyPrefix = string.Intern(ExtractType + "_" + config.Options.ToString() + "_" + config.Culture); this.Regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.OrdinalNumericRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.OrdinalGermanRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.GERMAN) }, { new Regex(NumbersDefinitions.OrdinalRoundNumberRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.GERMAN) }, }.ToImmutableDictionary(); }
private OrdinalExtractor(BaseNumberOptionsConfiguration config) : base(config.Options) { keyPrefix = string.Intern(ExtractType + "_" + config.Options.ToString() + "_" + config.Culture); AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags); var regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.OrdinalNumericRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.OrdinalSwedishRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.SWEDISH) }, { new Regex(NumbersDefinitions.OrdinalRoundNumberRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.SWEDISH) }, }; Regexes = regexes.ToImmutableDictionary(); }
public NumberExtractor(NumberMode mode = NumberMode.Default) { var builder = ImmutableDictionary.CreateBuilder <Regex, TypeTag>(); CardinalExtractor cardExtract = null; switch (mode) { case NumberMode.PureNumber: cardExtract = new CardinalExtractor(NumbersDefinitions.PlaceHolderPureNumber); break; case NumberMode.Currency: builder.Add(new Regex(NumbersDefinitions.CurrencyRegex, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)); break; case NumberMode.Default: break; } if (cardExtract == null) { cardExtract = new CardinalExtractor(); } builder.AddRange(cardExtract.Regexes); var fracExtract = new FractionExtractor(); builder.AddRange(fracExtract.Regexes); Regexes = builder.ToImmutable(); }
} = Constants.SYS_NUM_FRACTION; // "Fraction"; public FractionExtractor() { var regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.FractionNotationRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline) , RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline) , RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.FractionNounRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline) , RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.SPANISH) }, { new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline) , RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.SPANISH) }, { new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline) , RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.SPANISH) }, }; this.Regexes = regexes.ToImmutableDictionary(); }
private OrdinalExtractor(NumberOptions options) : base(options) { RelativeReferenceRegex = RegexCache.Get(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags); var regexes = new Dictionary <Regex, TypeTag> { { RegexCache.Get(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.OrdinalNumericRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.OrdinalTurkishRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.TURKISH) }, { RegexCache.Get(NumbersDefinitions.RoundNumberOrdinalRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.TURKISH) }, }; Regexes = regexes.ToImmutableDictionary(); }
private OrdinalExtractor(BaseNumberOptionsConfiguration config) : base(config.Options) { keyPrefix = string.Intern(ExtractType + "_" + config.Options.ToString() + "_" + config.Culture); var regexes = new Dictionary <Regex, TypeTag> { { RegexCache.Get(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.OrdinalNumericRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.OrdinalDutchRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.DUTCH) }, { RegexCache.Get(NumbersDefinitions.OrdinalRoundNumberRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.DUTCH) }, }; Regexes = regexes.ToImmutableDictionary(); }
private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) { var regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexOptions.IgnoreCase | RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexOptions.IgnoreCase | RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexOptions.IgnoreCase | RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.GERMAN) }, { new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, placeholder), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, placeholder), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) } }; Regexes = regexes.ToImmutableDictionary(); }
private OrdinalExtractor(NumberOptions options) : base(options) { AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags); var regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.OrdinalNumericRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.OrdinalEnglishRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.ENGLISH) }, { new Regex(NumbersDefinitions.OrdinalRoundNumberRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.ENGLISH) }, }; Regexes = regexes.ToImmutableDictionary(); }
public IntegerExtractor(ChineseNumberExtractorMode mode = ChineseNumberExtractorMode.Default) { var regexes = new Dictionary <Regex, TypeTag>() { { // 123456, -123456 new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 15k, 16 G new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 1,234, 2,332,111 new Regex(NumbersDefinitions.DottedNumbersSpecialsChar, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 半百 半打 new Regex(NumbersDefinitions.NumbersWithHalfDozen, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE) }, { // 半 new Regex(NumbersDefinitions.HalfUnitRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE) }, { // 一打 五十打 new Regex(NumbersDefinitions.NumbersWithDozen, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE) }, }; switch (mode) { case ChineseNumberExtractorMode.Default: // 一百五十五, 负一亿三百二十二. // Uses an allow list to avoid extracting "四" from "四川" regexes.Add( new Regex(NumbersDefinitions.NumbersWithAllowListRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE)); break; case ChineseNumberExtractorMode.ExtractAll: // 一百五十五, 负一亿三百二十二, "四" from "四川". // Uses no allow lists and extracts all potential integers (useful in Units, for example). regexes.Add( new Regex(NumbersDefinitions.NumbersAggressiveRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE)); break; } Regexes = regexes.ToImmutableDictionary(); }
private IntegerExtractor(BaseNumberOptionsConfiguration config) : base(config.Options) { keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); var regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.NumbersWithPlaceHolder(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.NumbersWithSpace(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.ARABIC) }, { new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.ARABIC) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumInvertedComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, }; Regexes = regexes.ToImmutableDictionary(); }
public OrdinalExtractor() { var regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.OrdinalKoreanRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, }; Regexes = regexes.ToImmutableDictionary(); }
private DoubleExtractor(BaseNumberOptionsConfiguration config) : base(config.Options) { var regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.DoubleDecimalPointRegex(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.DoubleIndianDecimalPointRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.ENGLISH) }, { new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumCommaDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumBlankDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, }; Regexes = regexes.ToImmutableDictionary(); }
public IntegerExtractor(CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) { var regexes = new Dictionary <Regex, TypeTag> { { // 123456, -123456 RegexCache.Get(NumbersDefinitions.NumbersSpecialsChars, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 15k, 16 G RegexCache.Get(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 1,234, 2,332,111 RegexCache.Get(NumbersDefinitions.DottedNumbersSpecialsChar, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 半百 半ダース RegexCache.Get(NumbersDefinitions.NumbersWithHalfDozen, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.JAPANESE) }, { // 一ダース 五十ダース RegexCache.Get(NumbersDefinitions.NumbersWithDozen, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.JAPANESE) }, }; switch (mode) { case CJKNumberExtractorMode.Default: // 一百五十五, 负一亿三百二十二. // Uses an allow list to avoid extracting "西九条" from "九" regexes.Add( RegexCache.Get(NumbersDefinitions.NumbersWithAllowListRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.JAPANESE)); break; case CJKNumberExtractorMode.ExtractAll: // 一百五十五, 负一亿三百二十二, "西九条" from "九" // Uses no allow lists and extracts all potential integers (useful in Units, for example). regexes.Add( RegexCache.Get(NumbersDefinitions.NumbersAggressiveRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.JAPANESE)); break; } Regexes = regexes.ToImmutableDictionary(); }
private OrdinalExtractor(BaseNumberOptionsConfiguration config) : base(config.Options) { var regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.OrdinalKoreanRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.KOREAN) }, }; Regexes = regexes.ToImmutableDictionary(); }
public OrdinalExtractor() { var regexes = new Dictionary <Regex, TypeTag> { { // だい一百五十四 RegexCache.Get(NumbersDefinitions.AllOrdinalRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.JAPANESE) }, }; Regexes = regexes.ToImmutableDictionary(); }
public DoubleExtractor() { var regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.DoubleSpecialsChars, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // (-)2.5, can avoid cases like ip address xx.xx.xx.xx new Regex(NumbersDefinitions.DoubleSpecialsCharsWithNegatives, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // (-).2 new Regex(NumbersDefinitions.SimpleDoubleSpecialsChars, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // えは九・二三二一三一二 new Regex(NumbersDefinitions.DoubleRoundNumberSpecialsChars, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // 1.0 K new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // 15.2万 new Regex(NumbersDefinitions.DoubleWithThousandsRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.JAPANESE) }, { // 2e6, 21.2e0 new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { new Regex(NumbersDefinitions.DoubleExponentialNotationKanjiRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { // 2^5 new Regex(NumbersDefinitions.DoubleScientificNotationRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, }; Regexes = regexes.ToImmutableDictionary(); }
private OrdinalExtractor() { this.Regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.OrdinalFrenchRegex, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.FRENCH) } }.ToImmutableDictionary(); }
private OrdinalExtractor() { this.Regexes = new Dictionary <Regex, TypeTag> { { RegexCache.Get(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.OrdinalFrenchRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.FRENCH) }, }.ToImmutableDictionary(); }
} = Constants.SYS_NUM_ORDINAL; // "Ordinal"; public OrdinalExtractor() { this.Regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.OrdinalItalianRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.ITALIAN) } }.ToImmutableDictionary(); }
public IntegerExtractor(JapaneseNumberExtractorMode mode = JapaneseNumberExtractorMode.Default) { var regexes = new Dictionary <Regex, string>() { { // 123456, -123456 new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexOptions.IgnoreCase | RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { //15k, 16 G new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { //1,234, 2,332,111 new Regex(NumbersDefinitions.DottedNumbersSpecialsChar, RegexOptions.IgnoreCase | RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { //半百 半ダース new Regex(NumbersDefinitions.NumbersWithHalfDozen, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.JAPANESE) }, { //一ダース 五十ダース new Regex(NumbersDefinitions.NumbersWithDozen, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.JAPANESE) } }; switch (mode) { case JapaneseNumberExtractorMode.Default: // 一百五十五, 负一亿三百二十二. // Uses an allow list to avoid extracting "西九条" from "九" regexes.Add(new Regex(NumbersDefinitions.NumbersWithAllowListRegex, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.JAPANESE)); break; case JapaneseNumberExtractorMode.ExtractAll: // 一百五十五, 负一亿三百二十二, "西九条" from "九" // Uses no allow lists and extracts all potential integers (useful in Units, for example). regexes.Add(new Regex(NumbersDefinitions.NumbersAggressiveRegex, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.JAPANESE)); break; } Regexes = regexes.ToImmutableDictionary(); }
private OrdinalExtractor() { this.Regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.OrdinalItalianRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.ITALIAN) }, }.ToImmutableDictionary(); }
private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) { var regexes = new Dictionary <Regex, TypeTag> { { RegexCache.Get(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.HINDI) }, { RegexCache.Get(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumCommaDot, placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumBlankDot, placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceDot, placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, }; Regexes = regexes.ToImmutableDictionary(); }
private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) { var regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.TURKISH) }, { new Regex(NumbersDefinitions.NegativeAllIntRegexWithLocks, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.TURKISH) }, { new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.TURKISH) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, placeholder), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, placeholder), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, placeholder), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, }; Regexes = regexes.ToImmutableDictionary(); }
private OrdinalExtractor() { var regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.OrdinalNounRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.SPANISH) }, }; this.Regexes = regexes.ToImmutableDictionary(); }
private OrdinalExtractor() { var regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex(NumbersDefinitions.OrdinalEnglishRegex, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.PORTUGUESE) } }; this.Regexes = regexes.ToImmutableDictionary(); }
public DoubleExtractor(BaseNumberOptionsConfiguration config) { var regexes = new Dictionary <Regex, TypeTag> { { RegexCache.Get(NumbersDefinitions.DoubleSpecialsChars, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // (-)2.5, can avoid cases like ip address xx.xx.xx.xx RegexCache.Get(NumbersDefinitions.DoubleSpecialsCharsWithNegatives, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // (-).2 RegexCache.Get(NumbersDefinitions.SimpleDoubleSpecialsChars, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // 1.0 K RegexCache.Get(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // 15.2万 RegexCache.Get(NumbersDefinitions.DoubleWithThousandsRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.CHINESE) }, { // 四十五点三三 RegexCache.Get(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.CHINESE) }, { // 2e6, 21.2e0 RegexCache.Get(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { // 2^5 RegexCache.Get(NumbersDefinitions.DoubleScientificNotationRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, }; Regexes = regexes.ToImmutableDictionary(); }
public DoubleExtractor() { var regexes = new Dictionary <Regex, TypeTag> { { new Regex(NumbersDefinitions.DoubleSpecialsChars, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // (-)2.5, can avoid cases like ip address xx.xx.xx.xx new Regex(NumbersDefinitions.DoubleSpecialsCharsWithNegatives, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { //(-).2 new Regex(NumbersDefinitions.SimpleDoubleSpecialsChars, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // 1.0 K new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { //15.2万 new Regex(NumbersDefinitions.DoubleWithThousandsRegex, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.CHINESE) }, { //四十五点三三 new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.CHINESE) }, { // 2e6, 21.2e0 new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { //2^5 new Regex(NumbersDefinitions.DoubleScientificNotationRegex, RegexOptions.Singleline), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) } }; Regexes = regexes.ToImmutableDictionary(); }