コード例 #1
0
        public static Recognizer Create(System.Globalization.CultureInfo culture,
                                        Core.Wordlist currencySymbols,
                                        int priority)
        {
            try
            {
                // TODO support non-blank languages for unit separation

                Core.CharacterSet curFirst     = null;
                string            currenciesRx = currencySymbols.GetRegularExpression(out curFirst);

                int currencyPattern = culture.NumberFormat.CurrencyPositivePattern;

                bool currencyPrecedesNumber = (currencyPattern % 2) == 0;
                bool currencyIsSeparated    = (currencyPattern >= 2);

                CurrencyRegexRecognizer result = new CurrencyRegexRecognizer(100, "DEFAULT_CURRENCY_RECOGNIZER", culture);

                Core.CharacterSet first = null;
                // augmentation doesn't change FIRST()
                // TODO use currency pattern instead of number pattern?
                List <string> patterns = NumberRegexRecognizer.ComputeRXPatterns(culture, NumberSeparatorMode.CultureDefault, out first);

                if (currencyPrecedesNumber)
                {
                    first.Add(curFirst);
                }

                AugmentPatterns(patterns, currenciesRx, culture);

                foreach (string p in patterns)
                {
                    // use the same first for all patterns (the number regex pattern computer returns just one pattern anyway)
                    result.Add(p, first, 2);
                }

                /*
                 * Be strict for currencies (only flexible for measurements and numbers)
                 *
                 * if (NumberRegexRecognizer.CanSwapSeparators(culture))
                 * {
                 *      patterns = NumberRegexRecognizer.ComputeRXPatterns(culture, NumberSeparatorMode.Swapped, out first);
                 *      AugmentPatterns(patterns, currenciesRx, culture);
                 *
                 *      foreach (string p in patterns)
                 *      {
                 *              result.Add(p, first, 1);
                 *      }
                 * }
                 * if (NumberRegexRecognizer.AddENUSSeparators(culture))
                 * {
                 *      patterns = NumberRegexRecognizer.ComputeRXPatterns(culture, NumberSeparatorMode.EnUS, out first);
                 *      AugmentPatterns(patterns, currenciesRx, culture);
                 *
                 *      foreach (string p in patterns)
                 *      {
                 *              result.Add(p, first, 0);
                 *      }
                 * }
                 */

                result.OnlyIfFollowedByNonwordCharacter
                    = Core.CultureInfoExtensions.UseBlankAsWordSeparator(culture);

                return(result);
            }
            catch             // (System.Exception e)
            {
                return(null);
            }
        }
コード例 #2
0
        private Core.CharacterSet Parse()
        {
            Core.CharacterSet result = new Core.CharacterSet();

            Expect('[');
            if (LookingAt() == '^')
            {
                result.Negated = true;
                _Position.Advance();
            }

            const int startState     = 0;
            const int charclassState = 1;
            const int finalState     = 99;

            int state = startState;

            while (state != finalState)
            {
                char lookAt = LookingAt();

                switch (state)
                {
                case startState:
                    switch (lookAt)
                    {
                    case '[':
                        // character class
                        _Position.Advance();
                        if (LookingAt() == ':')
                        {
                            _Position.Advance();
                            state = charclassState;
                        }
                        else if (LookingAt() != '\0')
                        {
                            result.Add('[');
                            // don't advance
                        }
                        break;

                    case '\0':
                        // unexpected end
                        throw new Core.LanguagePlatformException(Core.ErrorCode.TokenizerInvalidCharacterSet, _Input);

                    case ']':
                        // right bracket - move on to final state
                        _Position.Advance();
                        state = finalState;
                        break;

                    default:
                    {
                        // a plain character (escaped, simple, or Unicode hex)
                        char lower = ScanChar();
                        char upper = '\0';

                        if (LookingAt() == '-')
                        {
                            _Position.Advance();
                            if (LookingAt() == ']')
                            {
                                // dash at end of input - add literal dash to charset, skip ], and go to final state
                                result.Add(lower);
                                result.Add('-');
                                _Position.Advance();
                                state = finalState;
                            }
                            else
                            {
                                upper = ScanChar();
                                result.Add(lower, upper);
                            }
                        }
                        else
                        {
                            result.Add(lower);
                            state = startState;
                        }
                    }
                    break;
                    }
                    break;

                case charclassState:
                    // just got '[' followed by ':'

                    if (LookingAt() == '\0')
                    {
                        throw new Core.LanguagePlatformException(Core.ErrorCode.TokenizerInvalidCharacterSet, _Input);
                    }
                    else
                    {
                        StringBuilder className = new StringBuilder();
                        while (char.IsLetter(LookingAt()))
                        {
                            className.Append(LookingAt());
                            _Position.Advance();
                        }

                        Nullable <System.Globalization.UnicodeCategory> category;

                        if (className.Length == 0 ||
                            (category = Core.CharacterProperties.GetUnicodeCategoryFromName(className.ToString().ToLower())) == null ||
                            !category.HasValue)
                        {
                            throw new Core.LanguagePlatformException(Core.ErrorCode.TokenizerInvalidCharacterSet, _Input);
                        }
                        else
                        {
                            result.Add(category.Value);
                        }

                        Expect(':');
                        Expect(']');

                        state = startState;
                    }
                    break;

                default:
                    // unexpected state
                    throw new Core.LanguagePlatformException(Core.ErrorCode.TokenizerInvalidCharacterSet, _Input);
                }
            }

            return(result);
        }