public void RegexCacheGetMethodOption() { RegexCache rc = new RegexCache(); var r = rc.Get("pista", RegexOptions.Multiline); Assert.IsTrue(r.Options == RegexOptions.Multiline); }
private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) { var regexes = new Dictionary <Regex, TypeTag> { { RegexCache.Get(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.IndianNumberingSystemRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.NumbersWithSuffix, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.HINDI) }, { RegexCache.Get(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.HINDI) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumComma, placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.NegativeHinglishRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.HINDI) }, { RegexCache.Get(NumbersDefinitions.CompoundEnglishNumberRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.HINDI) }, { RegexCache.Get(NumbersDefinitions.DecimalUnitsWithRoundNumberRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.HINDI) }, }; Regexes = regexes.ToImmutableDictionary(); }
private Regex BuildTextNumberRegex() { var singleIntFrac = $"{this.Config.WordSeparatorToken}| -|" + GetKeyRegex(this.Config.CardinalNumberMap.Keys) + "|" + GetKeyRegex(this.Config.OrdinalNumberMap.Keys); // @TODO consider remodeling the creation of this regex // For Italian, we invert the order of Cardinal and Ordinal in singleIntFrac in order to correctly extract // ordinals that contain cardinals such as 'tredicesimo' (thirteenth) which starts with 'tre' (three). // With the standard order, the parser fails to return '13' since only the cardinal 'tre' (3) is extracted if (this.Config.CultureInfo.Name == "it-IT") { singleIntFrac = $"{this.Config.WordSeparatorToken}| -|" + GetKeyRegex(this.Config.OrdinalNumberMap.Keys) + "|" + GetKeyRegex(this.Config.CardinalNumberMap.Keys); } string textNumberPattern; // Checks for languages that use "compound numbers". I.e. written number parts are not separated by whitespaces or special characters (e.g., dreihundert in German). if (isCompoundNumberLanguage) { textNumberPattern = @"(" + singleIntFrac + @")"; } else { // Default case, like in English. textNumberPattern = @"(?<=\b)(" + singleIntFrac + @")(?=\b)"; } return(RegexCache.Get(textNumberPattern, RegexOptions.Singleline | RegexOptions.Compiled)); }
public NumberExtractor(BaseNumberOptionsConfiguration config, CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) { var builder = ImmutableDictionary.CreateBuilder<Regex, TypeTag>(); // Add Cardinal var cardExtractChs = new CardinalExtractor(config, mode); builder.AddRange(cardExtractChs.Regexes); // Add Fraction var fracExtractChs = new FractionExtractor(config); builder.AddRange(fracExtractChs.Regexes); Regexes = builder.ToImmutable(); var ambiguityBuilder = ImmutableDictionary.CreateBuilder<Regex, Regex>(); // Do not filter the ambiguous number cases like 'that one' in NumberWithUnit, otherwise they can't be resolved. if (config.Mode != NumberMode.Unit) { foreach (var item in NumbersDefinitions.AmbiguityFiltersDict) { ambiguityBuilder.Add(RegexCache.Get(item.Key, RegexFlags), RegexCache.Get(item.Value, RegexFlags)); } } AmbiguityFiltersDict = ambiguityBuilder.ToImmutable(); }
private OrdinalExtractor(BaseNumberOptionsConfiguration config) : base(config.Options) { keyPrefix = string.Intern(ExtractType + "_" + config.Options.ToString() + "_" + config.Culture); var regexes = new Dictionary <Regex, TypeTag> { { RegexCache.Get(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.OrdinalNumericRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.OrdinalDutchRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.DUTCH) }, { RegexCache.Get(NumbersDefinitions.OrdinalRoundNumberRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.DUTCH) }, }; Regexes = regexes.ToImmutableDictionary(); }
public BaseURLExtractor(URLConfiguration config) { this.config = config; var regexes = new Dictionary <Regex, string> { { config.UrlRegex, Constants.URL_REGEX }, { config.IpUrlRegex, Constants.URL_REGEX }, { RegexCache.Get(BaseURL.UrlRegex2, RegexOptions.Compiled), Constants.URL_REGEX }, }; Regexes = regexes.ToImmutableDictionary(); AmbiguousTimeTerm = RegexCache.Get(BaseURL.AmbiguousTimeTerm, RegexOptions.Compiled); TldMatcher = new StringMatcher(); TldMatcher.Init(BaseURL.TldList); }
public FrenchNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; this.IsMultiDecimalSeparatorCulture = NumbersDefinitions.MultiDecimalSeparatorCulture; this.DecimalSeparatorChar = NumbersDefinitions.DecimalSeparatorChar; this.FractionMarkerToken = NumbersDefinitions.FractionMarkerToken; this.NonDecimalSeparatorChar = NumbersDefinitions.NonDecimalSeparatorChar; this.HalfADozenText = NumbersDefinitions.HalfADozenText; this.WordSeparatorToken = NumbersDefinitions.WordSeparatorToken; this.WrittenDecimalSeparatorTexts = NumbersDefinitions.WrittenDecimalSeparatorTexts; this.WrittenGroupSeparatorTexts = NumbersDefinitions.WrittenGroupSeparatorTexts; this.WrittenIntegerSeparatorTexts = NumbersDefinitions.WrittenIntegerSeparatorTexts; this.WrittenFractionSeparatorTexts = NumbersDefinitions.WrittenFractionSeparatorTexts; this.CardinalNumberMap = NumbersDefinitions.CardinalNumberMap.ToImmutableDictionary(); this.RelativeReferenceOffsetMap = NumbersDefinitions.RelativeReferenceOffsetMap.ToImmutableDictionary(); this.RelativeReferenceRelativeToMap = NumbersDefinitions.RelativeReferenceRelativeToMap.ToImmutableDictionary(); this.OrdinalNumberMap = NumberMapGenerator.InitOrdinalNumberMap(NumbersDefinitions.OrdinalNumberMap, NumbersDefinitions.PrefixCardinalMap, NumbersDefinitions.SuffixOrdinalMap); this.RoundNumberMap = NumbersDefinitions.RoundNumberMap.ToImmutableDictionary(); // @TODO Change init to follow design in other languages this.HalfADozenRegex = RegexCache.Get(NumbersDefinitions.HalfADozenRegex, RegexFlags); this.DigitalNumberRegex = RegexCache.Get(NumbersDefinitions.DigitalNumberRegex, RegexFlags); this.NegativeNumberSignRegex = RegexCache.Get(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags); this.FractionPrepositionRegex = RegexCache.Get(NumbersDefinitions.FractionPrepositionRegex, RegexFlags); }
private OrdinalExtractor(NumberOptions options) : base(options) { RelativeReferenceRegex = RegexCache.Get(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags); var regexes = new Dictionary <Regex, TypeTag> { { RegexCache.Get(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.OrdinalNumericRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.OrdinalTurkishRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.TURKISH) }, { RegexCache.Get(NumbersDefinitions.RoundNumberOrdinalRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.TURKISH) }, }; Regexes = regexes.ToImmutableDictionary(); }
public HindiNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; this.IsMultiDecimalSeparatorCulture = NumbersDefinitions.MultiDecimalSeparatorCulture; this.DecimalSeparatorChar = NumbersDefinitions.DecimalSeparatorChar; this.FractionMarkerToken = NumbersDefinitions.FractionMarkerToken; this.NonDecimalSeparatorChar = NumbersDefinitions.NonDecimalSeparatorChar; this.HalfADozenText = NumbersDefinitions.HalfADozenText; this.WordSeparatorToken = NumbersDefinitions.WordSeparatorToken; this.WrittenDecimalSeparatorTexts = NumbersDefinitions.WrittenDecimalSeparatorTexts; this.WrittenGroupSeparatorTexts = NumbersDefinitions.WrittenGroupSeparatorTexts; this.WrittenIntegerSeparatorTexts = NumbersDefinitions.WrittenIntegerSeparatorTexts; this.WrittenFractionSeparatorTexts = NumbersDefinitions.WrittenFractionSeparatorTexts; this.CardinalNumberMap = NumbersDefinitions.CardinalNumberMap.ToImmutableDictionary(); this.OrdinalNumberMap = NumbersDefinitions.OrdinalNumberMap.ToImmutableDictionary(); this.DecimalUnitsMap = NumbersDefinitions.DecimalUnitsMap.ToImmutableDictionary(); this.RelativeReferenceOffsetMap = NumbersDefinitions.RelativeReferenceOffsetMap.ToImmutableDictionary(); this.RelativeReferenceRelativeToMap = NumbersDefinitions.RelativeReferenceRelativeToMap.ToImmutableDictionary(); this.RoundNumberMap = NumbersDefinitions.RoundNumberMap.ToImmutableDictionary(); this.ZeroToNineMap = NumbersDefinitions.ZeroToNineMap.ToImmutableDictionary(); this.AdditionTermsRegex = RegexCache.Get(NumbersDefinitions.AdditionTermsRegex, RegexFlags); this.HalfADozenRegex = RegexCache.Get(NumbersDefinitions.HalfADozenRegex, RegexFlags); this.DigitalNumberRegex = RegexCache.Get(NumbersDefinitions.DigitalNumberRegex, RegexFlags); this.NegativeNumberSignRegex = RegexCache.Get(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags); this.FractionPrepositionRegex = RegexCache.Get(NumbersDefinitions.FractionPrepositionRegex, RegexFlags); this.FractionPrepositionInverseRegex = RegexCache.Get(NumbersDefinitions.FractionPrepositionInverseRegex, RegexFlags); }
public GermanDateTimeParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; TokenBeforeTime = DateTimeDefinitions.TokenBeforeTime; DateExtractor = config.DateExtractor; TimeExtractor = config.TimeExtractor; DateParser = config.DateParser; TimeParser = config.TimeParser; NowRegex = GermanDateTimeExtractorConfiguration.NowRegex; AMTimeRegex = RegexCache.Get(DateTimeDefinitions.AMTimeRegex, RegexFlags); PMTimeRegex = RegexCache.Get(DateTimeDefinitions.PMTimeRegex, RegexFlags); SimpleTimeOfTodayAfterRegex = GermanDateTimeExtractorConfiguration.SimpleTimeOfTodayAfterRegex; SimpleTimeOfTodayBeforeRegex = GermanDateTimeExtractorConfiguration.SimpleTimeOfTodayBeforeRegex; SpecificTimeOfDayRegex = GermanDateTimeExtractorConfiguration.SpecificTimeOfDayRegex; SpecificEndOfRegex = GermanDateTimeExtractorConfiguration.SpecificEndOfRegex; UnspecificEndOfRegex = GermanDateTimeExtractorConfiguration.UnspecificEndOfRegex; UnitRegex = GermanTimeExtractorConfiguration.TimeUnitRegex; DateNumberConnectorRegex = GermanDateTimeExtractorConfiguration.DateNumberConnectorRegex; YearRegex = GermanDateTimeExtractorConfiguration.YearRegex; Numbers = config.Numbers; CardinalExtractor = config.CardinalExtractor; IntegerExtractor = config.IntegerExtractor; NumberParser = config.NumberParser; DurationExtractor = config.DurationExtractor; DurationParser = config.DurationParser; UnitMap = config.UnitMap; UtilityConfiguration = config.UtilityConfiguration; }
public FrenchDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { var numOptions = NumberOptions.None; if ((config.Options & DateTimeOptions.NoProtoCache) != 0) { numOptions = NumberOptions.NoProtoCache; } var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); IntegerExtractor = Number.French.IntegerExtractor.GetInstance(); OrdinalExtractor = Number.French.OrdinalExtractor.GetInstance(); NumberParser = new BaseNumberParser(new FrenchNumberParserConfiguration(numConfig)); DurationExtractor = new BaseDurationExtractor(new FrenchDurationExtractorConfiguration(this)); HolidayExtractor = new BaseHolidayExtractor(new FrenchHolidayExtractorConfiguration(this)); UtilityConfiguration = new FrenchDatetimeUtilityConfiguration(); // 3-23-2017 var dateRegex4 = RegexCache.Get(DateTimeDefinitions.DateExtractor4, RegexFlags); // 23-3-2015 var dateRegex5 = RegexCache.Get(DateTimeDefinitions.DateExtractor5, RegexFlags); // on 1.3 var dateRegex6 = RegexCache.Get(DateTimeDefinitions.DateExtractor6, RegexFlags); // on 24-12 var dateRegex8 = RegexCache.Get(DateTimeDefinitions.DateExtractor8, RegexFlags); // 7/23 var dateRegex7 = RegexCache.Get(DateTimeDefinitions.DateExtractor7, RegexFlags); // 23/7 var dateRegex9 = RegexCache.Get(DateTimeDefinitions.DateExtractor9, RegexFlags); // 2015-12-23 var dateRegexA = RegexCache.Get(DateTimeDefinitions.DateExtractorA, RegexFlags); DateRegexList = new List <Regex> { // (Sunday,)? April 5 RegexCache.Get(DateTimeDefinitions.DateExtractor1, RegexFlags), // (Sunday,)? April 5, 2016 RegexCache.Get(DateTimeDefinitions.DateExtractor2, RegexFlags), // (Sunday,)? 6th of April RegexCache.Get(DateTimeDefinitions.DateExtractor3, RegexFlags), }; var enableDmy = DmyDateFormat || DateTimeDefinitions.DefaultLanguageFallback == Constants.DefaultLanguageFallback_DMY; DateRegexList = DateRegexList.Concat(enableDmy ? new[] { dateRegex5, dateRegex8, dateRegex9, dateRegex4, dateRegex6, dateRegex7, dateRegexA } : new[] { dateRegex4, dateRegex6, dateRegex7, dateRegex5, dateRegex8, dateRegex9, dateRegexA }); }
private OrdinalExtractor(BaseNumberOptionsConfiguration config) : base(config.Options) { keyPrefix = string.Intern(ExtractType + "_" + config.Options.ToString() + "_" + config.Culture); AmbiguousFractionConnectorsRegex = RegexCache.Get(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); RelativeReferenceRegex = RegexCache.Get(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags); var regexes = new Dictionary <Regex, TypeTag> { { RegexCache.Get(NumbersDefinitions.OrdinalNumericRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.OrdinalEnglishRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.ARABIC) }, { RegexCache.Get(NumbersDefinitions.OrdinalRoundNumberRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.ARABIC) }, }; Regexes = regexes.ToImmutableDictionary(); }
private Regex BuildTextNumberRegex() { // For Hindi, there is a need for another NumberMap of the type double to handle values like 1.5. // As this cannot be included in either Cardinal or Ordinal NumberMap as they are of the type long, // DecimalUnitsList (type double) takes care of these entries and it needs to be added to the singleIntFrac // for extraction var singleIntFrac = $"{this.Config.WordSeparatorToken}| -|" + GetKeyRegex(this.Config.OrdinalNumberMap.Keys) + "|" + GetKeyRegex(this.Config.CardinalNumberMap.Keys) + "|" + GetKeyRegex(this.Config.DecimalUnitsMap.Keys); string textNumberPattern; // Checks for languages that use "compound numbers". I.e. written number parts are not separated by whitespaces or special characters (e.g., dreihundert in German). if (isCompoundNumberLanguage) { textNumberPattern = @"(" + singleIntFrac + @")"; } else { // Default case, like in English. textNumberPattern = @"(?<=\b)(" + singleIntFrac + @")(?=\b)"; } return(RegexCache.Get(textNumberPattern, RegexOptions.Singleline | RegexOptions.Compiled)); }
protected Regex BuildSeparateRegexFromSet(bool ignoreCase = false) { var separateWords = new HashSet <string>(); if (config.PrefixList?.Count > 0) { foreach (var addWord in config.PrefixList.Values) { foreach (var word in addWord.Split(separators, StringSplitOptions.RemoveEmptyEntries)) { if (ValidateUnit(word)) { separateWords.Add(word); } } } } if (config.SuffixList?.Count > 0) { foreach (var addWord in config.SuffixList.Values) { foreach (var word in addWord.Split(separators, StringSplitOptions.RemoveEmptyEntries)) { if (ValidateUnit(word)) { separateWords.Add(word); } } } } if (config.AmbiguousUnitList?.Count > 0) { var abandonWords = config.AmbiguousUnitList; foreach (var abandonWord in abandonWords) { if (separateWords.Contains(abandonWord)) { separateWords.Remove(abandonWord); } } } // Sort separateWords using descending length. var regexTokens = separateWords.Select(Regex.Escape).ToList(); if (regexTokens.Count == 0) { return(null); } regexTokens.Sort(new StringComparer()); var pattern = $@"{this.config.BuildPrefix}({string.Join("|", regexTokens)}){this.config.BuildSuffix}"; var options = RegexOptions.Singleline | RegexOptions.ExplicitCapture | (ignoreCase ? RegexOptions.IgnoreCase : RegexOptions.None); var regex = RegexCache.Get(pattern, options); return(regex); }
private static string SanitizeGroups(string source) { Regex matchGroup = RegexCache.Get(@"\?< (?<name>\w +) >"); var result = Regex.Replace(source, matchGroup.ToString(), ReplaceMatchGroup); return(result); }
public IntegerExtractor(BaseNumberOptionsConfiguration config, CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) { var regexes = new Dictionary <Regex, TypeTag>() { { // 123456, -123456 RegexCache.Get(NumbersDefinitions.NumbersSpecialsChars, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 15k, 16 G RegexCache.Get(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 1,234, 2,332,111 RegexCache.Get(NumbersDefinitions.DottedNumbersSpecialsChar, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 半百 半打 RegexCache.Get(NumbersDefinitions.NumbersWithHalfDozen, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE) }, { // 半 RegexCache.Get(NumbersDefinitions.HalfUnitRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE) }, { // 一打 五十打 RegexCache.Get(NumbersDefinitions.NumbersWithDozen, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE) }, }; switch (mode) { case CJKNumberExtractorMode.Default: // 一百五十五, 负一亿三百二十二. // Uses an allow list to avoid extracting "四" from "四川" regexes.Add( RegexCache.Get(NumbersDefinitions.NumbersWithAllowListRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE)); break; case CJKNumberExtractorMode.ExtractAll: // 一百五十五, 负一亿三百二十二, "四" from "四川". // Uses no allow lists and extracts all potential integers (useful in Units, for example). regexes.Add( RegexCache.Get(NumbersDefinitions.NumbersAggressiveRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE)); break; } Regexes = regexes.ToImmutableDictionary(); }
public ChinesePhoneNumberExtractorConfiguration(SequenceOptions options) : base(options) { WordBoundariesRegex = PhoneNumbersDefinitions.WordBoundariesRegex; NonWordBoundariesRegex = PhoneNumbersDefinitions.NonWordBoundariesRegex; EndWordBoundariesRegex = PhoneNumbersDefinitions.EndWordBoundariesRegex; ColonPrefixCheckRegex = RegexCache.Get(PhoneNumbersDefinitions.ColonPrefixCheckRegex); ForbiddenPrefixMarkers = (List <char>)PhoneNumbersDefinitions.ForbiddenPrefixMarkers; }
public DutchHolidayParserConfiguration(IDateTimeOptionsConfiguration config) : base(config) { ThisPrefixRegex = RegexCache.Get(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); NextPrefixRegex = RegexCache.Get(DateTimeDefinitions.NextPrefixRegex, RegexFlags); PreviousPrefixRegex = RegexCache.Get(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); this.HolidayRegexList = DutchHolidayExtractorConfiguration.HolidayRegexList; this.HolidayNames = DateTimeDefinitions.HolidayNames.ToImmutableDictionary(); }
public ItalianDateParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { DateTokenPrefix = DateTimeDefinitions.DateTokenPrefix; IntegerExtractor = config.IntegerExtractor; OrdinalExtractor = config.OrdinalExtractor; CardinalExtractor = config.CardinalExtractor; NumberParser = config.NumberParser; DurationExtractor = config.DurationExtractor; DateExtractor = config.DateExtractor; DurationParser = config.DurationParser; HolidayParser = new BaseHolidayParser(new ItalianHolidayParserConfiguration(this)); DateRegexes = new ItalianDateExtractorConfiguration(this).DateRegexList; OnRegex = ItalianDateExtractorConfiguration.OnRegex; SpecialDayRegex = ItalianDateExtractorConfiguration.SpecialDayRegex; SpecialDayWithNumRegex = ItalianDateExtractorConfiguration.SpecialDayWithNumRegex; NextRegex = ItalianDateExtractorConfiguration.NextDateRegex; ThisRegex = ItalianDateExtractorConfiguration.ThisRegex; LastRegex = ItalianDateExtractorConfiguration.LastDateRegex; UnitRegex = ItalianDateExtractorConfiguration.DateUnitRegex; WeekDayRegex = ItalianDateExtractorConfiguration.WeekDayRegex; StrictWeekDay = ItalianDateExtractorConfiguration.StrictWeekDay; MonthRegex = ItalianDateExtractorConfiguration.MonthRegex; WeekDayOfMonthRegex = ItalianDateExtractorConfiguration.WeekDayOfMonthRegex; ForTheRegex = ItalianDateExtractorConfiguration.ForTheRegex; WeekDayAndDayOfMothRegex = ItalianDateExtractorConfiguration.WeekDayAndDayOfMothRegex; WeekDayAndDayRegex = ItalianDateExtractorConfiguration.WeekDayAndDayRegex; RelativeMonthRegex = ItalianDateExtractorConfiguration.RelativeMonthRegex; StrictRelativeRegex = ItalianDateExtractorConfiguration.StrictRelativeRegex; YearSuffix = ItalianDateExtractorConfiguration.YearSuffix; RelativeWeekDayRegex = ItalianDateExtractorConfiguration.RelativeWeekDayRegex; BeforeAfterRegex = ItalianDateExtractorConfiguration.BeforeAfterRegex; // @TODO move to config RelativeDayRegex = RegexCache.Get(DateTimeDefinitions.RelativeDayRegex, RegexFlags); NextPrefixRegex = RegexCache.Get(DateTimeDefinitions.NextPrefixRegex, RegexFlags); PreviousPrefixRegex = RegexCache.Get(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); UpcomingPrefixRegex = RegexCache.Get(DateTimeDefinitions.UpcomingPrefixRegex, RegexFlags); PastPrefixRegex = RegexCache.Get(DateTimeDefinitions.PastPrefixRegex, RegexFlags); DayOfMonth = config.DayOfMonth; DayOfWeek = config.DayOfWeek; MonthOfYear = config.MonthOfYear; Numbers = config.Numbers; CardinalMap = config.CardinalMap; UnitMap = config.UnitMap; UtilityConfiguration = config.UtilityConfiguration; SameDayTerms = DateTimeDefinitions.SameDayTerms.ToImmutableList(); PlusOneDayTerms = DateTimeDefinitions.PlusOneDayTerms.ToImmutableList(); PlusTwoDayTerms = DateTimeDefinitions.PlusTwoDayTerms.ToImmutableList(); MinusOneDayTerms = DateTimeDefinitions.MinusOneDayTerms.ToImmutableList(); MinusTwoDayTerms = DateTimeDefinitions.MinusTwoDayTerms.ToImmutableList(); }
public LongFormTestConfiguration(char decimalSep, char nonDecimalSep) { this.DecimalSeparatorChar = decimalSep; this.NonDecimalSeparatorChar = nonDecimalSep; this.CultureInfo = new CultureInfo(Culture.English); this.CardinalNumberMap = ImmutableDictionary <string, long> .Empty; this.OrdinalNumberMap = ImmutableDictionary <string, long> .Empty; this.RoundNumberMap = ImmutableDictionary <string, long> .Empty; this.DigitalNumberRegex = RegexCache.Get( @"((?<=\b)(hundred|thousand|million|billion|trillion|dozen(s)?)(?=\b))|((?<=(\d|\b))(k|t|m|g|b)(?=\b))", RegexOptions.Singleline); }
public BasePhoneNumberExtractor(PhoneNumberConfiguration config) { this.config = config; var wordBoundariesRegex = config.WordBoundariesRegex; var nonWordBoundariesRegex = config.NonWordBoundariesRegex; var endWordBoundariesRegex = config.EndWordBoundariesRegex; var regexes = new Dictionary <Regex, string> { { RegexCache.Get(BasePhoneNumbers.GeneralPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex), RegexOptions.Compiled), Constants.PHONE_NUMBER_REGEX_GENERAL }, { RegexCache.Get(BasePhoneNumbers.BRPhoneNumberRegex(wordBoundariesRegex, nonWordBoundariesRegex, endWordBoundariesRegex), RegexOptions.Compiled), Constants.PHONE_NUMBER_REGEX_BR }, { RegexCache.Get(BasePhoneNumbers.UKPhoneNumberRegex(wordBoundariesRegex, nonWordBoundariesRegex, endWordBoundariesRegex), RegexOptions.Compiled), Constants.PHONE_NUMBER_REGEX_UK }, { RegexCache.Get(BasePhoneNumbers.DEPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex), RegexOptions.Compiled), Constants.PHONE_NUMBER_REGEX_DE }, { RegexCache.Get(BasePhoneNumbers.USPhoneNumberRegex(wordBoundariesRegex, nonWordBoundariesRegex, endWordBoundariesRegex), RegexOptions.Compiled), Constants.PHONE_NUMBER_REGEX_US }, { RegexCache.Get(BasePhoneNumbers.CNPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex), RegexOptions.Compiled), Constants.PHONE_NUMBER_REGEX_CN }, { RegexCache.Get(BasePhoneNumbers.DKPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex), RegexOptions.Compiled), Constants.PHONE_NUMBER_REGEX_DK }, { RegexCache.Get(BasePhoneNumbers.ITPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex), RegexOptions.Compiled), Constants.PHONE_NUMBER_REGEX_IT }, { RegexCache.Get(BasePhoneNumbers.NLPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex), RegexOptions.Compiled), Constants.PHONE_NUMBER_REGEX_NL }, { RegexCache.Get(BasePhoneNumbers.SpecialPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex), RegexOptions.Compiled), Constants.PHONE_NUMBER_REGEX_SPECIAL }, }; Regexes = regexes.ToImmutableDictionary(); }
public BaseGUIDExtractor() { var regexes = new Dictionary <Regex, string> { { RegexCache.Get(BaseGUID.GUIDRegex), Constants.GUID_REGEX }, }; Regexes = regexes.ToImmutableDictionary(); }
public BaseHashtagExtractor() { var regexes = new Dictionary <Regex, string> { { RegexCache.Get(BaseHashtag.HashtagRegex), Constants.HASHTAG_REGEX }, }; Regexes = regexes.ToImmutableDictionary(); }
public BaseMentionExtractor() { var regexes = new Dictionary <Regex, string> { { RegexCache.Get(BaseMention.MentionRegex), Constants.MENTION_REGEX }, }; Regexes = regexes.ToImmutableDictionary(); }
private DoubleExtractor(BaseNumberOptionsConfiguration config) : base(config.Options) { var regexes = new Dictionary <Regex, TypeTag> { { RegexCache.Get(NumbersDefinitions.DoubleDecimalPointRegex(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.DoubleIndianDecimalPointRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.DoubleWithoutIntegralRegex(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.ENGLISH) }, { RegexCache.Get(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumCommaDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumBlankDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, }; Regexes = regexes.ToImmutableDictionary(); }
public OrdinalExtractor() { var regexes = new Dictionary <Regex, TypeTag> { { RegexCache.Get(NumbersDefinitions.OrdinalKoreanRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, }; Regexes = regexes.ToImmutableDictionary(); }
protected static Regex GenerateLongFormatNumberRegexes(LongFormatType type, string placeholder = BaseNumbers.PlaceHolderDefault, RegexOptions flags = RegexOptions.Singleline) { var thousandsMark = Regex.Escape(type.ThousandsMark.ToString(CultureInfo.InvariantCulture)); var decimalsMark = Regex.Escape(type.DecimalsMark.ToString(CultureInfo.InvariantCulture)); var regexDefinition = type.DecimalsMark.Equals('\0') ? BaseNumbers.IntegerRegexDefinition(placeholder, thousandsMark) : BaseNumbers.DoubleRegexDefinition(placeholder, thousandsMark, decimalsMark); return(RegexCache.Get(regexDefinition, flags)); }
public ChineseNumberParserConfiguration(INumberOptionsConfiguration config) { this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.Config = config; this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; this.IsMultiDecimalSeparatorCulture = NumbersDefinitions.MultiDecimalSeparatorCulture; this.DecimalSeparatorChar = NumbersDefinitions.DecimalSeparatorChar; this.FractionMarkerToken = NumbersDefinitions.FractionMarkerToken; this.NonDecimalSeparatorChar = NumbersDefinitions.NonDecimalSeparatorChar; this.HalfADozenText = NumbersDefinitions.HalfADozenText; this.WordSeparatorToken = NumbersDefinitions.WordSeparatorToken; this.ZeroChar = NumbersDefinitions.ZeroChar; this.PairChar = NumbersDefinitions.PairChar; this.WrittenDecimalSeparatorTexts = Enumerable.Empty <string>(); this.WrittenGroupSeparatorTexts = Enumerable.Empty <string>(); this.WrittenIntegerSeparatorTexts = Enumerable.Empty <string>(); this.WrittenFractionSeparatorTexts = Enumerable.Empty <string>(); this.CardinalNumberMap = new Dictionary <string, long>().ToImmutableDictionary(); this.OrdinalNumberMap = new Dictionary <string, long>().ToImmutableDictionary(); this.RelativeReferenceOffsetMap = NumbersDefinitions.RelativeReferenceOffsetMap.ToImmutableDictionary(); this.RelativeReferenceRelativeToMap = NumbersDefinitions.RelativeReferenceRelativeToMap.ToImmutableDictionary(); this.RoundNumberMap = NumbersDefinitions.RoundNumberMap.ToImmutableDictionary(); this.ZeroToNineMap = NumbersDefinitions.ZeroToNineMap.ToImmutableDictionary(); this.RoundNumberMapChar = NumbersDefinitions.RoundNumberMapChar.ToImmutableDictionary(); this.FullToHalfMap = NumbersDefinitions.FullToHalfMap.ToImmutableDictionary(); this.TratoSimMap = NumbersDefinitions.TratoSimMap.ToImmutableDictionary(); this.UnitMap = NumbersDefinitions.UnitMap.OrderBy(o => o.Key.Length).ToImmutableDictionary(o => o.Key, p => p.Value); this.RoundDirectList = NumbersDefinitions.RoundDirectList.ToImmutableList(); this.TenChars = NumbersDefinitions.TenChars.ToImmutableList(); this.HalfADozenRegex = null; // @TODO Change init to follow design in other languages this.DigitalNumberRegex = RegexCache.Get(NumbersDefinitions.DigitalNumberRegex, RegexFlags); this.DigitNumRegex = RegexCache.Get(NumbersDefinitions.DigitNumRegex, RegexFlags); this.DozenRegex = RegexCache.Get(NumbersDefinitions.DozenRegex, RegexFlags); this.PercentageRegex = RegexCache.Get(NumbersDefinitions.PercentageRegex, RegexFlags); this.DoubleAndRoundRegex = RegexCache.Get(NumbersDefinitions.DoubleAndRoundRegex, RegexFlags); this.FracSplitRegex = RegexCache.Get(NumbersDefinitions.FracSplitRegex, RegexFlags); this.NegativeNumberSignRegex = RegexCache.Get(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags); this.PointRegex = RegexCache.Get(NumbersDefinitions.PointRegex, RegexFlags); this.SpeGetNumberRegex = RegexCache.Get(NumbersDefinitions.SpeGetNumberRegex, RegexFlags); this.PairRegex = RegexCache.Get(NumbersDefinitions.PairRegex, RegexFlags); this.RoundNumberIntegerRegex = RegexCache.Get(NumbersDefinitions.RoundNumberIntegerRegex, RegexFlags); this.PercentageNumRegex = RegexCache.Get(NumbersDefinitions.PercentageNumRegex, RegexFlags); this.FractionPrepositionRegex = null; }
public OrdinalExtractor() { var regexes = new Dictionary <Regex, TypeTag> { { // だい一百五十四 RegexCache.Get(NumbersDefinitions.AllOrdinalRegex, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.JAPANESE) }, }; Regexes = regexes.ToImmutableDictionary(); }
private IntegerExtractor(BaseNumberOptionsConfiguration config) : base(config.Options) { keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); var regexes = new Dictionary <Regex, TypeTag> { { RegexCache.Get(NumbersDefinitions.NumbersWithPlaceHolder(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.NumbersWithSuffix, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.NumbersWithDozen2Suffix, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { RegexCache.Get(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.PORTUGUESE) }, { RegexCache.Get(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.PORTUGUESE) }, }; this.Regexes = regexes.ToImmutableDictionary(); }