Beispiel #1
0
        private Core.CharacterSet Parse()
        {
            Core.CharacterSet result = new Core.CharacterSet();

            Expect('[');
            if (LookingAt() == '^')
            {
                result.Negated = true;
                _Position.Advance();
            }

            const int startState     = 0;
            const int charclassState = 1;
            const int finalState     = 99;

            int state = startState;

            while (state != finalState)
            {
                char lookAt = LookingAt();

                switch (state)
                {
                case startState:
                    switch (lookAt)
                    {
                    case '[':
                        // character class
                        _Position.Advance();
                        if (LookingAt() == ':')
                        {
                            _Position.Advance();
                            state = charclassState;
                        }
                        else if (LookingAt() != '\0')
                        {
                            result.Add('[');
                            // don't advance
                        }
                        break;

                    case '\0':
                        // unexpected end
                        throw new Core.LanguagePlatformException(Core.ErrorCode.TokenizerInvalidCharacterSet, _Input);

                    case ']':
                        // right bracket - move on to final state
                        _Position.Advance();
                        state = finalState;
                        break;

                    default:
                    {
                        // a plain character (escaped, simple, or Unicode hex)
                        char lower = ScanChar();
                        char upper = '\0';

                        if (LookingAt() == '-')
                        {
                            _Position.Advance();
                            if (LookingAt() == ']')
                            {
                                // dash at end of input - add literal dash to charset, skip ], and go to final state
                                result.Add(lower);
                                result.Add('-');
                                _Position.Advance();
                                state = finalState;
                            }
                            else
                            {
                                upper = ScanChar();
                                result.Add(lower, upper);
                            }
                        }
                        else
                        {
                            result.Add(lower);
                            state = startState;
                        }
                    }
                    break;
                    }
                    break;

                case charclassState:
                    // just got '[' followed by ':'

                    if (LookingAt() == '\0')
                    {
                        throw new Core.LanguagePlatformException(Core.ErrorCode.TokenizerInvalidCharacterSet, _Input);
                    }
                    else
                    {
                        StringBuilder className = new StringBuilder();
                        while (char.IsLetter(LookingAt()))
                        {
                            className.Append(LookingAt());
                            _Position.Advance();
                        }

                        Nullable <System.Globalization.UnicodeCategory> category;

                        if (className.Length == 0 ||
                            (category = Core.CharacterProperties.GetUnicodeCategoryFromName(className.ToString().ToLower())) == null ||
                            !category.HasValue)
                        {
                            throw new Core.LanguagePlatformException(Core.ErrorCode.TokenizerInvalidCharacterSet, _Input);
                        }
                        else
                        {
                            result.Add(category.Value);
                        }

                        Expect(':');
                        Expect(']');

                        state = startState;
                    }
                    break;

                default:
                    // unexpected state
                    throw new Core.LanguagePlatformException(Core.ErrorCode.TokenizerInvalidCharacterSet, _Input);
                }
            }

            return(result);
        }