public void NonEmptyGrammarNonEmptyText()
        {
            LexerGrammar <LexerState> grammar = new LexerGrammar <LexerState>(
                new List <LexerTokenRule <LexerState> >()
            {
                new LexerTokenRule <LexerState>(1, "Text", "[a-zA-Z0-9]*"),
                new LexerTokenRule <LexerState>(
                    2,
                    "NewLine",
                    "\n",
                    (LexerState state, string lexem) =>
                {
                    state.LineNumber++;
                    return(LexerRuleReturnDecision.ReturnToken);
                }),
            },
                new List <LexerDynamicRule>());

            Lexer <LexerState> lexer = new Lexer <LexerState>(grammar);
            var s      = new LexerState();
            var tokens = lexer.GetTokens("Line1\nLine2\n", s).Tokens.ToArray();

            Assert.Equal(5, tokens.Count());

            Assert.Equal(3, s.LineNumber);
        }
        public Token NextToken()
        {
            State = LexerState.Ready;
            char c;
            while (StringEnd != Input.Length)
            {
                c = read();
                switchState(c);
                if (State == LexerState.Accepted)
                {
                    Token T = Accept(AcceptType);
                    if (T.Type != TokenType.Token_SKIP)
                        return T;
                }
            }

            if (State == LexerState.Ready)
            {
                Token T = new Token();
                T.Lexeme = "";
                return T;
            }
            else
                throw new LexerException("End of file reached");
        }
            // @formatter:on

            public Node(LexerState type, bool terminal, Dictionary <char, Pair <LexerState, int> > transitions)
            {
                // @formatter:off
                Type        = type;
                Terminal    = terminal;
                Transitions = transitions;
                // @formatter:on
            }
Example #4
0
 public Lexer(string source)
 {
     Source       = source;
     Tokens       = new List <Token>();
     CurrentLine  = 0;
     StartIndex   = CurrentIndex = -1;
     CurrentState = LexerState.Undetermined;
 }
Example #5
0
 /// <summary>
 /// Reset reader for parsing again.
 /// </summary>
 public void Reset()
 {
     _pos             = new LexerState();
     _pos.Pos         = -1;
     _pos.Line        = 1;
     _pos.Text        = string.Empty;
     _whiteSpaceChars = new Dictionary <char, char>();
     _escapeChar      = '\\';
 }
Example #6
0
        void EnterState(LexerState state)
        {
            currentState = state;

            if (currentState.setTrackNextIndentation)
            {
                shouldTrackNextIndentation = true;
            }
        }
Example #7
0
        private LexerState GetLexerState(int State)
        {
            LexerState state1 = (LexerState)this.lexerStates[State];

            if (state1 == null)
            {
                state1 = new LexerState();
                this.lexerStates[State] = state1;
            }
            return(state1);
        }
Example #8
0
 private void SingleTypeState()
 {
     SkipWhitespace();
     if (Peek(0) == 0)
     {
         return;
     }
     if (XmlCharType.Instance.IsNameChar(Peek(0)))
     {
         ConsumeQName();
         m_state = LexerState.Operator;
     }
 }
 protected Token Accept(TokenType Type)
 {
     string Lexeme = Input.Substring(StringBegin, StringEnd - StringBegin);
     StringBegin = StringEnd;
     Token Temp = new Token();
     Temp.Lexeme = Lexeme;
     Temp.Type = Type;
     State = LexerState.Ready;
     if (Temp.Lexeme == "if" || Temp.Lexeme == "while" || Temp.Lexeme == "\r\nif" || Temp.Lexeme == "\r\nwhile")
         Temp.Type = TokenType.Token_RESERVEDWORD;
     TokensList.Add(Temp);
     return Temp;
 }
Example #10
0
        private void ShowLexerOutput()
        {
            LexerState LexerState = new LexerState(tbEditor.Text);

            LexerState.Reset();
            var token = Lexer.GetToken(LexerState);

            while (token.Type != TokenType.EOF)
            {
                tbLexerOutput.AppendText(token.ToString());
                tbLexerOutput.AppendText("\r\n");
                token = Lexer.GetToken(LexerState);
            }
        }
Example #11
0
        private static Token GetToken(int line, int column, string lexeme, LexerState lexerState)
        {
            if (lexeme.ToLower() == "true" || lexeme.ToLower() == "false")
            {
                return(new BooleanToken(line, column, lexeme));
            }

            if (LexerStateTypeToTokenType.ContainsKey(lexerState))
            {
                return(Token.TokenConstructors[LexerStateTypeToTokenType[lexerState]](line, column, lexeme));
            }

            return(null);
        }
Example #12
0
            public override bool Equals(LexerState obj)
            {
                if (!base.Equals(obj))
                {
                    return(false);
                }
                MetaGeneratorLexerState other = obj as MetaGeneratorLexerState;

                if (other != null)
                {
                    return(this.templateBrackets == other.templateBrackets &&
                           this.templateParenthesis == other.templateParenthesis);
                }
                return(false);
            }
Example #13
0
        private void KindTestState()
        {
            SkipWhitespace();
            if (Peek(0) == 0)
            {
                return;
            }
            BeginToken();
            char c = Peek(0);

            if (c == '{')
            {
                ConsumeChar(Read());
                m_states.Push(LexerState.Operator);
                m_state = LexerState.Default;
            }
            else if (c == ')')
            {
                ConsumeChar(Read());
                m_state = m_states.Pop();
            }
            else if (c == '*')
            {
                ConsumeChar(Read());
                m_state = LexerState.CloseKindTest;
            }
            else if (MatchIdentifer("element", "("))
            {
                EndToken("element");
                ConsumeToken(Token.ELEMENT);
                BeginToken(m_bookmark[1]);
                ConsumeChar('(');
                m_states.Push(LexerState.KindTest);
            }
            else if (MatchIdentifer("schema-element", "("))
            {
                EndToken("schema-element");
                ConsumeToken(Token.SCHEMA_ELEMENT);
                BeginToken(m_bookmark[1]);
                ConsumeChar('(');
                m_states.Push(LexerState.KindTest);
            }
            else if (XmlCharType.Instance.IsNameChar(c))
            {
                ConsumeQName();
                m_state = LexerState.CloseKindTest;
            }
        }
Example #14
0
        private LexerState StateStringEscape(char ch, LexerState guess)
        {
            switch (ch)
            {
            case 'n':
                Append('\n');
                return(LexerState.String);

            default:
                if (guess == LexerState.EndOfLine)
                {
                    throw new TokenizationException("EOL during string literal");
                }
                Append(ch);
                return(LexerState.String);
            }
        }
        protected Token Accept(TokenType Type)
        {
            string Lexeme = Input.Substring(StringBegin, StringEnd - StringBegin);

            StringBegin = StringEnd;
            Token Temp = new Token();

            Temp.Lexeme = Lexeme;
            Temp.Type   = Type;
            State       = LexerState.Ready;
            if (Temp.Lexeme == "if" || Temp.Lexeme == "while" || Temp.Lexeme == "\r\nif" || Temp.Lexeme == "\r\nwhile")
            {
                Temp.Type = TokenType.Token_RESERVEDWORD;
            }
            TokensList.Add(Temp);
            return(Temp);
        }
Example #16
0
        private void _LeaveMode()
        {
            //throw new NotImplementedException();
            if (this._position > 0)
            {
                var text  = this._templateString.Substring(this._savedPosition, this._position - this._savedPosition);
                var token = this._CreateToken(text);

                if (token != null)
                {
                    this._tokens.Add(token);
                    this._Save();
                }
            }

            this._currentState = this._lexerModes.Pop();
        }
Example #17
0
        public void ErrorRecoveryTest()
        {
            Lexicon    lexicon = new Lexicon();
            LexerState global  = lexicon.DefaultLexer;


            var ID = global.DefineToken(RE.Range('a', 'z').Concat(
                                            (RE.Range('a', 'z') | RE.Range('0', '9')).Many()));
            var NUM        = global.DefineToken(RE.Range('0', '9').Many1());
            var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many());

            ScannerInfo     info    = lexicon.CreateScannerInfo();
            PeekableScanner scanner = new PeekableScanner(info);

            string       source = "asdf04a 1107 !@#$!@ Z if vvv xmlns 772737";
            StringReader sr     = new StringReader(source);

            scanner.SetSource(new SourceReader(sr));
            scanner.SetTriviaTokens(WHITESPACE.Index);
            scanner.RecoverErrors = true;

            CompilationErrorManager em = new CompilationErrorManager();

            em.DefineError(101, 0, CompilationStage.Scanning, "Invalid token: {0}");

            scanner.ErrorManager   = em;
            scanner.LexicalErrorId = 101;

            Lexeme l1 = scanner.Read();

            Assert.AreEqual(ID.Index, l1.TokenIndex);

            Lexeme l2 = scanner.Read();

            Assert.AreEqual(NUM.Index, l2.TokenIndex);

            Assert.AreEqual(0, em.Errors.Count);

            Lexeme l3 = scanner.Read();

            Assert.AreEqual(ID.Index, l3.TokenIndex);

            Assert.IsTrue(em.Errors.Count > 0);
            Assert.AreEqual(101, em.Errors[0].Info.Id);
        }
Example #18
0
        private LexerState StateSeparator(char ch, LexerState guess)
        {
            switch (guess)
            {
            case LexerState.Internal:
            case LexerState.KeywordOrIdent:
            case LexerState.Number:
            case LexerState.String:
                AddToken(TokenType.Separator, ",");
                Rewind();
                return(afterWord);

            case LexerState.Whitespace:
                AddToken(TokenType.Separator, ",");
                return(afterWord);

            default: throw new TokenizationException($"Invalid transition: {State} => {guess}");
            }
        }
Example #19
0
        private LexerState StateString(char ch, LexerState guess)
        {
            switch (ch)
            {
            case '\\': return(LexerState.StringEscape);

            case '"':
                AddToken(TokenType.String);
                return(afterWord);

            default:
                if (guess == LexerState.EndOfLine)
                {
                    throw new TokenizationException("EOL during string literal");
                }
                Append(ch);
                return(State);
            }
        }
Example #20
0
        private LexerState StateArgumentList(char ch, LexerState guess)
        {
            afterWord = LexerState.ArgumentList;

            switch (guess)
            {
            case LexerState.KeywordOrIdent:
            case LexerState.Number:
            case LexerState.Operator:
            case LexerState.Separator:
                Append(ch);
                return(guess);

            case LexerState.Reference:
            case LexerState.Internal:
                return(guess);

            case LexerState.Whitespace:
                return(State);

            case LexerState.LeftParens:
                parensDepth++;
                AddToken(TokenType.LeftParens, "(");
                return(State);

            case LexerState.RightParens:
                if (parensDepth > 0)
                {
                    parensDepth--;
                    AddToken(TokenType.RightParens, ")");
                    return(State);
                }

                Append(ch);
                AddOperatorToken();
                afterWord = LexerState.None;
                return(afterWord);

            default: throw new TokenizationException($"Invalid character in argument list: {ch}");
            }
        }
Example #21
0
        private void ReturnConst(LexerState state, Symbol trigger)
        {
            Log(LogEventLevel.Information, "Found a constant");
            var value = ConstantToken <float> .Parse(CurrentToken.ToString());

            var con = Constants.FirstOrDefault(x => Math.Abs(x.Value - value) < 1E-5)?.Clone() as ConstantToken <float>;

            if (con == null)
            {
                con = new ConstantToken <float>(CurrentToken.ToString())
                {
                    TokenIndex = ConstIndex,
                    Substring  = CurrentToken.ToString()
                };
                Constants.Add(con);
            }
            else
            {
                Log(LogEventLevel.Information, "The constant is already processed");
            }
            con.Line = Line;
            ReturnToken(con, trigger);
        }
Example #22
0
        private void KindTestForPiState()
        {
            SkipWhitespace();
            if (Peek(0) == 0)
            {
                return;
            }
            char c = Peek(0);

            BeginToken();
            if (c == ')')
            {
                ConsumeChar(Read());
                m_state = m_states.Pop();
            }
            else if (XmlCharType.Instance.IsNCNameChar(c))
            {
                ConsumeNCName();
            }
            else if (c == '\'' || c == '"')
            {
                ConsumeLiteral();
            }
        }
Example #23
0
        void CreateStates()
        {
            var patterns = new Dictionary <TokenType, string> ();

            patterns[TokenType.Text] = ".*";

            patterns[TokenType.Number]               = @"\-?[0-9]+(\.[0-9+])?";
            patterns[TokenType.String]               = @"""([^""\\]*(?:\\.[^""\\]*)*)""";
            patterns[TokenType.LeftParen]            = @"\(";
            patterns[TokenType.RightParen]           = @"\)";
            patterns[TokenType.EqualTo]              = @"(==|is(?!\w)|eq(?!\w))";
            patterns[TokenType.EqualToOrAssign]      = @"(=|to(?!\w))";
            patterns[TokenType.NotEqualTo]           = @"(\!=|neq(?!\w))";
            patterns[TokenType.GreaterThanOrEqualTo] = @"\>=";
            patterns[TokenType.GreaterThan]          = @"\>";
            patterns[TokenType.LessThanOrEqualTo]    = @"\<=";
            patterns[TokenType.LessThan]             = @"\<";
            patterns[TokenType.AddAssign]            = @"\+=";
            patterns[TokenType.MinusAssign]          = @"\-=";
            patterns[TokenType.MultiplyAssign]       = @"\*=";
            patterns[TokenType.DivideAssign]         = @"\/=";
            patterns[TokenType.Add]      = @"\+";
            patterns[TokenType.Minus]    = @"\-";
            patterns[TokenType.Multiply] = @"\*";
            patterns[TokenType.Divide]   = @"\/";
            patterns [TokenType.And]     = @"(\&\&|and(?!\w))";
            patterns [TokenType.Or]      = @"(\|\||or(?!\w))";
            patterns [TokenType.Xor]     = @"(\^|xor(?!\w))";
            patterns [TokenType.Not]     = @"(\!|not(?!\w))";
            patterns[TokenType.Variable] = @"\$([A-Za-z0-9_\.])+";
            patterns[TokenType.Comma]    = @",";
            patterns[TokenType.True]     = @"true(?!\w)";
            patterns[TokenType.False]    = @"false(?!\w)";
            patterns[TokenType.Null]     = @"null(?!\w)";

            patterns[TokenType.BeginCommand] = @"\<\<";
            patterns[TokenType.EndCommand]   = @"\>\>";

            patterns[TokenType.OptionStart]   = @"\[\[";
            patterns[TokenType.OptionEnd]     = @"\]\]";
            patterns[TokenType.OptionDelimit] = @"\|";

            patterns[TokenType.Identifier] = @"[a-zA-Z0-9_:\.]+";

            patterns[TokenType.If]     = @"if(?!\w)";
            patterns[TokenType.Else]   = @"else(?!\w)";
            patterns[TokenType.ElseIf] = @"elseif(?!\w)";
            patterns[TokenType.EndIf]  = @"endif(?!\w)";
            patterns[TokenType.Set]    = @"set(?!\w)";

            patterns[TokenType.ShortcutOption] = @"\-\>";

            states = new Dictionary <string, LexerState> ();

            states ["base"] = new LexerState(patterns);
            states ["base"].AddTransition(TokenType.BeginCommand, "command", delimitsText: true);
            states ["base"].AddTransition(TokenType.OptionStart, "link", delimitsText: true);
            states ["base"].AddTransition(TokenType.ShortcutOption, "shortcut-option");
            states ["base"].AddTextRule(TokenType.Text);

            states ["shortcut-option"] = new LexerState(patterns);
            states ["shortcut-option"].setTrackNextIndentation = true;
            states ["shortcut-option"].AddTransition(TokenType.BeginCommand, "expression", delimitsText: true);
            states ["shortcut-option"].AddTextRule(TokenType.Text, "base");

            states ["command"] = new LexerState(patterns);
            states ["command"].AddTransition(TokenType.If, "expression");
            states ["command"].AddTransition(TokenType.Else);
            states ["command"].AddTransition(TokenType.ElseIf, "expression");
            states ["command"].AddTransition(TokenType.EndIf);
            states ["command"].AddTransition(TokenType.Set, "assignment");
            states ["command"].AddTransition(TokenType.EndCommand, "base", delimitsText: true);
            states ["command"].AddTransition(TokenType.Identifier, "command-or-expression");
            states ["command"].AddTextRule(TokenType.Text);

            states ["command-or-expression"] = new LexerState(patterns);
            states ["command-or-expression"].AddTransition(TokenType.LeftParen, "expression");
            states ["command-or-expression"].AddTransition(TokenType.EndCommand, "base", delimitsText: true);
            states ["command-or-expression"].AddTextRule(TokenType.Text);


            states ["assignment"] = new LexerState(patterns);
            states ["assignment"].AddTransition(TokenType.Variable);
            states ["assignment"].AddTransition(TokenType.EqualToOrAssign, "expression");
            states ["assignment"].AddTransition(TokenType.AddAssign, "expression");
            states ["assignment"].AddTransition(TokenType.MinusAssign, "expression");
            states ["assignment"].AddTransition(TokenType.MultiplyAssign, "expression");
            states ["assignment"].AddTransition(TokenType.DivideAssign, "expression");


            states ["expression"] = new LexerState(patterns);
            states ["expression"].AddTransition(TokenType.EndCommand, "base");
            states ["expression"].AddTransition(TokenType.Number);
            states ["expression"].AddTransition(TokenType.String);
            states ["expression"].AddTransition(TokenType.LeftParen);
            states ["expression"].AddTransition(TokenType.RightParen);
            states ["expression"].AddTransition(TokenType.EqualTo);
            states ["expression"].AddTransition(TokenType.EqualToOrAssign);
            states ["expression"].AddTransition(TokenType.NotEqualTo);
            states ["expression"].AddTransition(TokenType.GreaterThanOrEqualTo);
            states ["expression"].AddTransition(TokenType.GreaterThan);
            states ["expression"].AddTransition(TokenType.LessThanOrEqualTo);
            states ["expression"].AddTransition(TokenType.LessThan);
            states ["expression"].AddTransition(TokenType.Add);
            states ["expression"].AddTransition(TokenType.Minus);
            states ["expression"].AddTransition(TokenType.Multiply);
            states ["expression"].AddTransition(TokenType.Divide);
            states ["expression"].AddTransition(TokenType.And);
            states ["expression"].AddTransition(TokenType.Or);
            states ["expression"].AddTransition(TokenType.Xor);
            states ["expression"].AddTransition(TokenType.Not);
            states ["expression"].AddTransition(TokenType.Variable);
            states ["expression"].AddTransition(TokenType.Comma);
            states ["expression"].AddTransition(TokenType.True);
            states ["expression"].AddTransition(TokenType.False);
            states ["expression"].AddTransition(TokenType.Null);
            states ["expression"].AddTransition(TokenType.Identifier);


            states ["link"] = new LexerState(patterns);
            states ["link"].AddTransition(TokenType.OptionEnd, "base", delimitsText: true);
            states ["link"].AddTransition(TokenType.OptionDelimit, "link-destination", delimitsText: true);
            states ["link"].AddTextRule(TokenType.Text);

            states ["link-destination"] = new LexerState(patterns);
            states ["link-destination"].AddTransition(TokenType.Identifier);
            states ["link-destination"].AddTransition(TokenType.OptionEnd, "base");

            defaultState = states ["base"];
        }
Example #24
0
    public LexerState Next(NFA nf, int ch)
    {
        LexerState l;
        if (dfc.TryGetValue(ch, out l))
            return l;
        l = new LexerState(nf);
        for (int i = 0; i < nstates.Length; i++) {
            int bm = nstates[i];
            for (int j = 0; j < 32; j++) {
                if ((bm & (1 << j)) == 0)
                    continue;
                foreach (NFA.Edge e in nf.nodes[32*i + j].edges) {
                    if (e.when != null && e.when.Accepts(ch))
                        l.Add(e.to);
                }
            }
        }

        nf.Close(l);
        LexerState cl;

        if (!nf.dfashare.TryGetValue(l, out cl)) {
            nf.dfashare[l] = cl = l;
        }
        dfc[ch] = cl;
        return cl;
    }
Example #25
0
 private void SingleTypeState()
 {
     SkipWhitespace();
     if (Peek(0) == 0)
         return;
     if (XmlCharType.Instance.IsNameChar(Peek(0)))
     {
         ConsumeQName();
         m_state = LexerState.Operator;
     }
     //else if (MatchText("(:"))
     //{
     //    m_states.Push(m_state);
     //    m_state = LexerState.ExprComment;
     //    ExprCommentState();
     //}
 }
Example #26
0
        // Parset een Mustache-achtige-template-string (met max. 1 parameter) en print deze op de console
        // met de parameter in de accentedColor.
        public static void Write(string str, ConsoleColor basicColor, ConsoleColor accentedColor)
        {
            string before   = "";
            string accented = "";
            string after    = "";

            LexerState ps = LexerState.BEFORE;

            foreach (char c in str)
            {
                switch (ps)
                {
                case LexerState.BEFORE:
                    if (c != '{')
                    {
                        before += c;
                    }
                    else
                    {
                        ps = LexerState.FIRST_OPENING_BRACE;
                    }
                    break;

                case LexerState.FIRST_OPENING_BRACE:
                    if (c == '{')
                    {
                        ps = LexerState.SECOND_OPENING_BRACE;
                    }
                    else
                    {
                        ps      = LexerState.BEFORE;
                        before += '{';
                        before += c;
                    }
                    break;

                case LexerState.SECOND_OPENING_BRACE:
                    if (c == '}')
                    {
                        ps = LexerState.FIRST_CLOSING_BRACE;
                    }
                    else
                    {
                        ps        = LexerState.ACCENTED;
                        accented += c;
                    }
                    break;

                case LexerState.ACCENTED:
                    if (c == '}')
                    {
                        ps = LexerState.FIRST_CLOSING_BRACE;
                    }
                    else
                    {
                        accented += c;
                    }
                    break;

                case LexerState.FIRST_CLOSING_BRACE:
                    if (c == '}')
                    {
                        ps = LexerState.SECOND_CLOSING_BRACE;
                    }
                    else
                    {
                        accented += '}';
                        accented += c;
                    }
                    break;

                case LexerState.SECOND_CLOSING_BRACE:
                    ps     = LexerState.AFTER;
                    after += c;
                    break;

                case LexerState.AFTER:
                    after += c;
                    break;
                }
            }

            Console2.Write(before, basicColor);
            Console2.Write(accented, accentedColor);
            Console2.WriteLine(after, basicColor);
        }
Example #27
0
        private void ItemTypeState()
        {
            SkipWhitespace();
            if (Peek(0) == 0)
                return;
            BeginToken();
            char c = Peek(0);
            if (c == '$')
            {
                ConsumeChar(Read());
                m_state = LexerState.VarName;
            }
            else if (MatchIdentifer("empty-sequence", "(", ")"))
            {
                EndToken();
                ConsumeToken(Token.EMPTY_SEQUENCE);
                m_state = LexerState.Operator;
            }
            //else if (MatchText("(:"))
            //{
            //    m_states.Push(m_state);
            //    m_state = LexerState.ExprComment;
            //    ExprCommentState();
            //}
            else if (MatchIdentifer("element", "("))
            {
                EndToken("element");
                ConsumeToken(Token.ELEMENT);
                BeginToken(m_bookmark[1]);
                ConsumeChar('(');
                m_states.Push(LexerState.OccurrenceIndicator);
                m_state = LexerState.KindTest;
            }
            else if (MatchIdentifer("attribute", "("))
            {
                EndToken("attribute");
                ConsumeToken(Token.ATTRIBUTE);
                BeginToken(m_bookmark[1]);
                ConsumeChar('(');
                m_states.Push(LexerState.OccurrenceIndicator);
                m_state = LexerState.KindTest;
            }
            else if (MatchIdentifer("schema-element", "("))
            {
                EndToken("schema-element");
                ConsumeToken(Token.SCHEMA_ELEMENT);
                BeginToken(m_bookmark[1]);
                ConsumeChar('(');
                m_states.Push(LexerState.OccurrenceIndicator);
                m_state = LexerState.KindTest;
            }
            else if (MatchIdentifer("schema-attribute", "("))
            {
                EndToken("schema-attribute");
                ConsumeToken(Token.SCHEMA_ATTRIBUTE);
                BeginToken(m_bookmark[1]);
                ConsumeChar('(');
                m_states.Push(LexerState.OccurrenceIndicator);
                m_state = LexerState.KindTest;
            }
            else if (MatchIdentifer("comment", "("))
            {
                EndToken("comment");
                ConsumeToken(Token.COMMENT);
                BeginToken(m_bookmark[1]);
                ConsumeChar('(');
                m_states.Push(LexerState.OccurrenceIndicator);
                m_state = LexerState.KindTest;
            }
            else if (MatchIdentifer("text", "("))
            {
                EndToken("text");
                ConsumeToken(Token.TEXT);
                BeginToken(m_bookmark[1]);
                ConsumeChar('(');                
                m_states.Push(LexerState.OccurrenceIndicator);
                m_state = LexerState.KindTest;
            }
            else if (MatchIdentifer("node", "("))
            {
                EndToken("node");
                ConsumeToken(Token.NODE);
                BeginToken(m_bookmark[1]);
                ConsumeChar('(');                
                m_states.Push(LexerState.OccurrenceIndicator);
                m_state = LexerState.KindTest;
            }
            else if (MatchIdentifer("document-node", "("))
            {
                EndToken("document-node");
                ConsumeToken(Token.DOCUMENT_NODE);
                BeginToken(m_bookmark[1]);
                ConsumeChar('(');
                m_states.Push(LexerState.OccurrenceIndicator);
                m_state = LexerState.KindTest;
            }
            else if (MatchIdentifer("processing-instruction", "("))
            {
                EndToken("processing-instruction");
                ConsumeToken(Token.PROCESSING_INSTRUCTION);
                BeginToken(m_bookmark[1]);
                ConsumeChar('(');
                m_states.Push(LexerState.OccurrenceIndicator);
                m_state = LexerState.KindTestForPi;
            }
            else if (MatchIdentifer("item", "(", ")"))
            {
                EndToken();
                ConsumeToken(Token.ITEM);
                m_state = LexerState.OccurrenceIndicator;
            }
            else if (c == ';')
            {
                ConsumeChar(Read());
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("then"))
            {
                EndToken();
                ConsumeToken(Token.THEN);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("else"))
            {
                EndToken();
                ConsumeToken(Token.ELSE);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("at"))
            {
                EndToken();
                ConsumeToken(Token.AT);
                SkipWhitespace();
                c = Peek(0);
                if (c == '\'' || c == '"')
                {
                    ConsumeLiteral();
                    m_state = LexerState.NamespaceDecl;
                }
                else
                    m_state = LexerState.Default;
            }
            else if (c == '=' || c == '(' || c == '[' || c == '|')
            {
                ConsumeChar(Read());
                if (c == '[')
                    m_states.Push(m_state);
                m_state = LexerState.Default;
            }
            else if (c == ':' && Peek(1) == '=')
            {
                ConsumeChar(Read());
                BeginToken();
                ConsumeChar(Read());
                m_state = LexerState.Default;
            }
            else if (c == '!' && Peek(1) == '=')
            {
                ConsumeChar(Read());
                BeginToken();
                ConsumeChar(Read());
                m_state = LexerState.Default;
            }
            else if (c == '>')
            {
                ConsumeChar(Read());
                if (Peek(0) == '=' || Peek(0) == '>')
                {
                    BeginToken();
                    ConsumeChar(Read());
                }
                m_state = LexerState.Default;

            }
            else if (c == '<')
            {
                ConsumeChar(Read());
                if (Peek(0) == '=' || Peek(0) == '<')
                {
                    BeginToken();
                    ConsumeChar(Read());
                }
                m_state = LexerState.Default;

            }
            else if (c == ')')
            {
                ConsumeChar(Read());
                SkipWhitespace();
                BeginToken();
                if (MatchIdentifer("as"))
                {
                    EndToken();
                    ConsumeToken(Token.AS);
                    m_state = LexerState.ItemType;
                }
            }
            else if (MatchIdentifer("external"))
            {
                EndToken();
                ConsumeToken(Token.EXCEPT);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("and"))
            {
                EndToken();
                ConsumeToken(Token.AND);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("at"))
            {
                EndToken();
                ConsumeToken(Token.AT);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("div"))
            {
                EndToken();
                ConsumeToken(Token.DIV);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("except"))
            {
                EndToken();
                ConsumeToken(Token.EXCEPT);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("eq"))
            {
                EndToken();
                ConsumeToken(Token.EQ);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("ge"))
            {
                EndToken();
                ConsumeToken(Token.GE);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("gt"))
            {
                EndToken();
                ConsumeToken(Token.GT);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("le"))
            {
                EndToken();
                ConsumeToken(Token.LE);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("lt"))
            {
                EndToken();
                ConsumeToken(Token.LT);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("ne"))
            {
                EndToken();
                ConsumeToken(Token.NE);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("idiv"))
            {
                EndToken();
                ConsumeToken(Token.IDIV);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("intersect"))
            {
                EndToken();
                ConsumeToken(Token.INTERSECT);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("mod"))
            {
                EndToken();
                ConsumeToken(Token.MOD);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("order", "by"))
            {
                EndToken();
                ConsumeToken(Token.ORDER_BY);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("stable", "order", "by"))
            {
                EndToken();
                ConsumeToken(Token.STABLE_ORDER_BY);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("or"))
            {
                EndToken();
                ConsumeToken(Token.OR);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("return"))
            {
                EndToken();
                ConsumeToken(Token.RETURN);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("satisfies"))
            {
                EndToken();
                ConsumeToken(Token.SATISFIES);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("to"))
            {
                EndToken();
                ConsumeToken(Token.TO);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("union"))
            {
                EndToken();
                ConsumeToken(Token.UNION);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("where"))
            {
                EndToken();
                ConsumeToken(Token.WHERE);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("castable", "as"))
            {
                EndToken();
                ConsumeToken(Token.CASTABLE_AS);
                m_state = LexerState.SingleType;
            }
            else if (MatchIdentifer("cast", "as"))
            {
                EndToken();
                ConsumeToken(Token.CAST_AS);
                m_state = LexerState.SingleType;
            }
            else if (MatchIdentifer("instance", "of"))
            {
                EndToken();
                ConsumeToken(Token.INSTANCE_OF);
            }
            else if (MatchIdentifer("treat", "as"))
            {
                EndToken();
                ConsumeToken(Token.TREAT_AS);
            }
            else if (MatchIdentifer("case"))
            {
                EndToken();
                ConsumeToken(Token.CASE);
            }
            else if (MatchIdentifer("as"))
            {
                EndToken();
                ConsumeToken(Token.AS);
            }
            else if (MatchIdentifer("in"))
            {
                EndToken();
                ConsumeToken(Token.IN);
                m_state = LexerState.Default;
            }
            else if (MatchIdentifer("is"))
            {
                EndToken();
                ConsumeToken(Token.IS);
                m_state = LexerState.Default;
            }
            else if (XmlCharType.Instance.IsNameChar(c))
            {
                ConsumeQName();
                m_state = LexerState.OccurrenceIndicator;
            }
        }
Example #28
0
 private void CloseKindTestState()
 {
     SkipWhitespace();
     if (Peek(0) == 0)
         return;
     char c = Peek(0);
     BeginToken();
     if (c == ')')
     {
         ConsumeChar(Read());
         m_state = m_states.Pop();
     }
     else if (c == ',')
     {
         ConsumeChar(Read());
         m_state = LexerState.KindTest;
     }
     else if (c == '{')
     {
         ConsumeChar(Read());
         m_states.Push(LexerState.Operator);
         m_state = LexerState.Default;
     }
     else if (c == '?')
         ConsumeChar(Read());
     //else if (MatchText("(:"))
     //{
     //    m_states.Push(m_state);
     //    m_state = LexerState.ExprComment;
     //    ExprCommentState();
     //}
 }
Example #29
0
        public void Tokenize(IEnumerable <string> lines)
        {
            afterWord   = LexerState.None;
            parensDepth = 0;

            Line = 0;
            foreach (string line in lines)
            {
                Line++;
                if (line.Length == 0)
                {
                    continue;
                }

                CurrentLine  = line;
                Column       = 0;
                State        = LexerState.None;
                endOfLine    = false;
                currentToken = string.Empty;

                while (!endOfLine)
                {
                    char       ch    = Next();
                    LexerState guess = Guess(ch);
                    State = stateMachine[State](ch, guess);

                    if (!stateMachine.ContainsKey(State))
                    {
                        throw new TokenizationException($"Unknown tokenizer state: {State}");
                    }
                }

                // process end of line in case something is missing
                stateMachine[State]('\n', LexerState.EndOfLine);

                // special handling for 'Include'
                if (Tokens.Count >= 2 && Tokens[Tokens.Count - 2].Value == "Include")
                {
                    Token filename = Tokens[Tokens.Count - 1];
                    Tokens.RemoveRange(Tokens.Count - 2, 2);

                    Tokenizer jt = new Tokenizer(Context);
                    if (filename.Type != TokenType.String)
                    {
                        throw new ArgumentTypeException("Can only Include a (filename) string");
                    }

                    ScriptSource include = Context.Djn.FindByName <ScriptSource>(filename.Value);
                    if (include == null)
                    {
                        throw new MissingResourceException($"Unknown Source resource: {filename.Value}");
                    }

                    jt.Tokenize(include.Source.Split('\n'));
                    Tokens.AddRange(jt.Tokens);
                }
                else
                {
                    Tokens.Add(new Token {
                        Type = TokenType.EOL
                    });
                }
            }
        }
Example #30
0
 private void DefaultState()
 {
     SkipWhitespace();
     BeginToken();
     char c = Peek(0);
     if (c == '\0')
         ConsumeToken(0); // EOF
     //else if (MatchText("(:"))
     //{                
     //    m_states.Push(LexerState.Default);
     //    m_state = LexerState.ExprComment;
     //    ExprCommentState();
     //}
     else if (MatchText("(#"))
     {
         EndToken();
         ConsumeToken(Token.PRAGMA_BEGIN);
         m_state = LexerState.Pragma;
     }
     else if (c == '.')
     {
         if (Peek(1) == '.')
         {
             Read();
             Read();
             EndToken();
             ConsumeToken(Token.DOUBLE_PERIOD);
         }
         else if (XmlCharType.Instance.IsDigit(Peek(1)))
             ConsumeNumber();
         else
             ConsumeChar(Read());
         m_state = LexerState.Operator;
     }
     else if (c == ')')
     {
         ConsumeChar(Read());
         SkipWhitespace();
         BeginToken();
         if (MatchIdentifer("as"))
         {
             EndToken();
             ConsumeToken(Token.AS);
             m_state = LexerState.ItemType;
         }
         else
             m_state = LexerState.Operator;
     }
     else if (c == '*')
     {
         ConsumeChar(Read());
         if (Peek(0) == ':')
         {
             BeginToken();
             ConsumeChar(Read());
             c = Peek(0);
             if (c != 0 && XmlCharType.Instance.IsStartNCNameChar(c))
                 ConsumeNCName();
             else
                 throw new XQueryException(Properties.Resources.ExpectedNCName);
         }
         m_state = LexerState.Operator;
     }
     else if (c == ';' || c == ',' || c == '(' || c == '-' || c == '+' || c == '@' || c == '~')
         ConsumeChar(Read());
     else if (c == '/')
     {
         if (Peek(1) == '/')
         {
             Read();
             Read();
             EndToken();
             ConsumeToken(Token.DOUBLE_SLASH);
         }
         else
             ConsumeChar(Read());
     }
     else if (MatchIdentifer("if", "("))
     {
         EndToken("if");
         ConsumeToken(Token.IF);
         BeginToken(m_bookmark[1]);
         ConsumeChar('(');
     }
     else if (MatchIdentifer("declare", "construction"))
     {
         EndToken();
         ConsumeToken(Token.DECLARE_CONSTRUCTION);
         m_state = LexerState.Operator;
     }
     else if (MatchIdentifer("declare", "default", "order"))
     {
         EndToken();
         ConsumeToken(Token.DECLARE_DEFAULT_ORDER);
         m_state = LexerState.Operator;
     }
     else if (MatchIdentifer("declare", "default", "collation"))
     {
         EndToken();
         ConsumeToken(Token.DECLARE_DEFAULT_COLLATION);
         m_state = LexerState.NamespaceDecl;
     }
     else if (MatchIdentifer("declare", "namespace"))
     {
         EndToken();
         ConsumeToken(Token.DECLARE_NAMESPACE);
         m_state = LexerState.NamespaceDecl;
     }
     else if (MatchIdentifer("module", "namespace"))
     {
         EndToken();
         ConsumeToken(Token.MODULE_NAMESPACE);
         m_state = LexerState.NamespaceDecl;
     }
     else if (MatchIdentifer("declare", "base-uri"))
     {
         EndToken();
         ConsumeToken(Token.DECLARE_BASE_URI);
         m_state = LexerState.NamespaceDecl;
     }
     else if (MatchIdentifer("declare", "default", "element"))
     {
         EndToken();
         ConsumeToken(Token.DECLARE_DEFAULT_ELEMENT);
         m_state = LexerState.NamespaceKeyword;
     }
     else if (MatchIdentifer("declare", "default", "function"))
     {
         EndToken();
         ConsumeToken(Token.DECLARE_DEFAULT_FUNCTION);
         m_state = LexerState.NamespaceKeyword;
     }
     else if (MatchIdentifer("import", "schema"))
     {
         EndToken();
         ConsumeToken(Token.IMPORT_SCHEMA);
         m_state = LexerState.NamespaceKeyword;
     }
     else if (MatchIdentifer("import", "module"))
     {
         EndToken();
         ConsumeToken(Token.IMPORT_MODULE);
         m_state = LexerState.NamespaceKeyword;
     }
     else if (MatchIdentifer("declare", "copy-namespaces"))
     {
         EndToken();
         ConsumeToken(Token.DECLARE_COPY_NAMESPACES);
         m_state = LexerState.NamespaceKeyword;
     }
     else if (MatchIdentifer("for"))
     {
         EndToken();
         ConsumeToken(Token.FOR);
         SkipWhitespace();
         BeginToken();
         if (Peek(0) == '$')
             ConsumeChar(Read());
         else
             throw new XQueryException(Properties.Resources.ExpectedVariablePrefix, "for");
         m_state = LexerState.VarName;
     }
     else if (MatchIdentifer("parallel", "for"))
     {
         EndToken();
         ConsumeToken(Token.PFOR);
         SkipWhitespace();
         BeginToken();
         if (Peek(0) == '$')
             ConsumeChar(Read());
         else
             throw new XQueryException(Properties.Resources.ExpectedVariablePrefix, "for");
         m_state = LexerState.VarName;
     }
     else if (MatchIdentifer("let"))
     {
         EndToken();
         ConsumeToken(Token.LET);
         SkipWhitespace();
         BeginToken();
         if (Peek(0) == '$')
             ConsumeChar(Read());
         else
             throw new XQueryException(Properties.Resources.ExpectedVariablePrefix, "let");
         m_state = LexerState.VarName;
     }
     else if (MatchIdentifer("some"))
     {
         EndToken();
         ConsumeToken(Token.SOME);
         SkipWhitespace();
         BeginToken();
         if (Peek(0) == '$')
             ConsumeChar(Read());
         else
             throw new XQueryException(Properties.Resources.ExpectedVariablePrefix, "some");
         m_state = LexerState.VarName;
     }
     else if (MatchIdentifer("every"))
     {
         EndToken();
         ConsumeToken(Token.EVERY);
         SkipWhitespace();
         BeginToken();
         if (Peek(0) == '$')
             ConsumeChar(Read());
         else
             throw new XQueryException(Properties.Resources.ExpectedVariablePrefix, "every");
         m_state = LexerState.VarName;
     }
     else if (MatchIdentifer("declare", "variable"))
     {
         EndToken();
         ConsumeToken(Token.DECLARE_VARIABLE);
         SkipWhitespace();
         BeginToken();
         if (Peek(0) == '$')
             ConsumeChar(Read());
         else
             throw new XQueryException(Properties.Resources.ExpectedVariablePrefix, "declare variable");
         m_state = LexerState.VarName;
     }
     else if (c == '$')
     {
         ConsumeChar(Read());
         m_state = LexerState.VarName;
     }
     else if (MatchIdentifer("element", "("))
     {
         EndToken("element");
         ConsumeToken(Token.ELEMENT);
         BeginToken(m_bookmark[1]);
         ConsumeChar('(');
         m_states.Push(LexerState.Operator);
         m_state = LexerState.KindTest;
     }
     else if (MatchIdentifer("attribute", "("))
     {
         EndToken("attribute");
         ConsumeToken(Token.ATTRIBUTE);
         BeginToken(m_bookmark[1]);
         ConsumeChar('(');
         m_states.Push(LexerState.Operator);
         m_state = LexerState.KindTest;
     }
     else if (MatchIdentifer("schema-element", "("))
     {
         EndToken("schema-element");
         ConsumeToken(Token.SCHEMA_ELEMENT);
         BeginToken(m_bookmark[1]);
         ConsumeChar('(');
         m_states.Push(LexerState.Operator);
         m_state = LexerState.KindTest;
     }
     else if (MatchIdentifer("schema-attribute", "("))
     {
         EndToken("schema-attribute");
         ConsumeToken(Token.SCHEMA_ATTRIBUTE);
         BeginToken(m_bookmark[1]);
         ConsumeChar('(');
         m_states.Push(LexerState.Operator);
         m_state = LexerState.KindTest;
     }
     else if (MatchIdentifer("comment", "("))
     {
         EndToken("comment");
         ConsumeToken(Token.COMMENT);
         BeginToken(m_bookmark[1]);
         ConsumeChar('(');
         m_states.Push(LexerState.Operator);
         m_state = LexerState.KindTest;
     }
     else if (MatchIdentifer("text", "("))
     {
         EndToken("text");
         ConsumeToken(Token.TEXT);
         BeginToken(m_bookmark[1]);
         ConsumeChar('(');
         m_states.Push(LexerState.Operator);
         m_state = LexerState.KindTest;
     }
     else if (MatchIdentifer("node", "("))
     {
         EndToken("node");
         ConsumeToken(Token.NODE);
         BeginToken(m_bookmark[1]);
         ConsumeChar('(');
         m_states.Push(LexerState.Operator);
         m_state = LexerState.KindTest;
     }
     else if (MatchIdentifer("document-node", "("))
     {
         EndToken("document-node");
         ConsumeToken(Token.DOCUMENT_NODE);
         BeginToken(m_bookmark[1]);
         ConsumeChar('(');
         m_states.Push(LexerState.Operator);
         m_state = LexerState.KindTest;
     }
     else if (MatchIdentifer("processing-instruction", "("))
     {
         EndToken("processing-instruction");
         ConsumeToken(Token.PROCESSING_INSTRUCTION);
         BeginToken(m_bookmark[1]);
         ConsumeChar('(');
         m_states.Push(LexerState.Operator);
         m_state = LexerState.KindTestForPi;
     }
     else if (MatchText("<!--"))
     {
         EndToken();
         ConsumeToken(Token.COMMENT_BEGIN);
         m_states.Push(LexerState.Operator);
         m_state = LexerState.XmlComment;
     }
     else if (MatchText("<?"))
     {
         EndToken();
         ConsumeToken(Token.PI_BEGIN);
         m_states.Push(LexerState.Operator);
         m_state = LexerState.ProcessingInstruction;
     }
     else if (MatchText("<![CDATA["))
     {
         EndToken();
         ConsumeToken(Token.CDATA_BEGIN);
         m_states.Push(LexerState.Operator);
         m_state = LexerState.CDataSection;
     }
     else if (c == '<')
     {
         Read();
         EndToken();
         ConsumeToken(Token.BeginTag);
         m_states.Push(LexerState.Operator);
         m_state = LexerState.StartTag;
     }
     else if (MatchIdentifer("declare", "boundary-space"))
     {
         EndToken();
         ConsumeToken(Token.DECLARE_BOUNDARY_SPACE);
         m_state = LexerState.XmlSpace_Decl;
     }
     else if (c == '}')
     {
         ConsumeChar(Read());
         m_state = m_states.Pop();
     }
     else if (c == '{')
     {
         ConsumeChar(Read());
         m_states.Push(LexerState.Operator);
     }
     else if (MatchIdentifer("validate", "{"))
     {
         EndToken("validate");
         ConsumeToken(Token.VALIDATE);
         BeginToken(m_bookmark[1]);
         ConsumeChar('{');
         m_states.Push(LexerState.Operator);
     }
     else if (MatchIdentifer("validate", "lax"))
     {
         EndToken("validate");
         ConsumeToken(Token.VALIDATE);
         BeginToken(m_bookmark[1]);
         ConsumeToken(Token.LAX);
         m_states.Push(LexerState.Operator);
     }
     else if (MatchIdentifer("validate", "strict"))
     {
         EndToken("validate");
         ConsumeToken(Token.VALIDATE);
         BeginToken(m_bookmark[1]);
         ConsumeToken(Token.STRICT);
         m_states.Push(LexerState.Operator);
     }
     else if (MatchIdentifer("typeswitch", "("))
     {
         EndToken("typeswitch");
         ConsumeToken(Token.TYPESWITCH);
         BeginToken(m_bookmark[1]);
         ConsumeChar('(');
     }
     else if (MatchIdentifer("document", "{"))
     {
         EndToken("document");
         ConsumeToken(Token.DOCUMENT);
         BeginToken(m_bookmark[1]);
         ConsumeChar('{');
         m_states.Push(LexerState.Operator);
     }
     else if (MatchIdentifer("text", "{"))
     {
         EndToken("text");
         ConsumeToken(Token.TEXT);
         BeginToken(m_bookmark[1]);
         ConsumeChar('{');
         m_states.Push(LexerState.Operator);
     }
     else if (MatchIdentifer("comment", "{"))
     {
         EndToken("comment");
         ConsumeToken(Token.COMMENT);
         BeginToken(m_bookmark[1]);
         ConsumeChar('{');
         m_states.Push(LexerState.Operator);
     }
     else if (MatchIdentifer("declare", "function"))
     {
         EndToken();
         ConsumeToken(Token.DECLARE_FUNCTION);
     }
     else if (MatchIdentifer("ordered", "{"))
     {
         EndToken("ordered");
         ConsumeToken(Token.ORDERED);
         BeginToken(m_bookmark[1]);
         ConsumeChar('{');
         m_states.Push(LexerState.Default);
     }
     else if (MatchIdentifer("unordered", "{"))
     {
         EndToken("unordered");
         ConsumeToken(Token.UNORDERED);
         BeginToken(m_bookmark[1]);
         ConsumeChar('{');
         m_states.Push(LexerState.Default);
     }
     else if (MatchIdentifer("declare", "ordering"))
     {
         EndToken();
         ConsumeToken(Token.DECLARE_ORDERING);
         m_state = LexerState.DeclareOrdering;
     }
     else if (MatchIdentifer("xquery", "version"))
     {
         EndToken();
         ConsumeToken(Token.XQUERY_VERSION);
         m_state = LexerState.XQueryVersion;
     }
     else if (MatchText("(#"))
     {
         EndToken();
         ConsumeToken(Token.PRAGMA_BEGIN);
         m_state = LexerState.Pragma;
     }
     else if (MatchIdentifer("declare", "option"))
     {
         EndToken();
         ConsumeToken(Token.DECLARE_OPTION);
         m_state = LexerState.Option;
     }
     else if (MatchIdentifer("ancestor-or-self", "::"))
     {
         EndToken();
         ConsumeToken(Token.AXIS_ANCESTOR_OR_SELF);
     }
     else if (MatchIdentifer("ancestor", "::"))
     {
         EndToken();
         ConsumeToken(Token.AXIS_ANCESTOR);
     }
     else if (MatchIdentifer("attribute", "::"))
     {
         EndToken();
         ConsumeToken(Token.AXIS_ATTRIBUTE);
     }
     else if (MatchIdentifer("child", "::"))
     {
         EndToken();
         ConsumeToken(Token.AXIS_CHILD);
     }
     else if (MatchIdentifer("descendant-or-self", "::"))
     {
         EndToken();
         ConsumeToken(Token.AXIS_DESCENDANT_OR_SELF);
     }
     else if (MatchIdentifer("descendant", "::"))
     {
         EndToken();
         ConsumeToken(Token.AXIS_DESCENDANT);
     }
     else if (MatchIdentifer("following-sibling", "::"))
     {
         EndToken();
         ConsumeToken(Token.AXIS_FOLLOWING_SIBLING);
     }
     else if (MatchIdentifer("following", "::"))
     {
         EndToken();
         ConsumeToken(Token.AXIS_FOLLOWING);
     }
     else if (MatchIdentifer("parent", "::"))
     {
         EndToken();
         ConsumeToken(Token.AXIS_PARENT);
     }
     else if (MatchIdentifer("preceding-sibling", "::"))
     {
         EndToken();
         ConsumeToken(Token.AXIS_PRECEDING_SIBLING);
     }
     else if (MatchIdentifer("preceding", "::"))
     {
         EndToken();
         ConsumeToken(Token.AXIS_PRECEDING);
     }
     else if (MatchIdentifer("self", "::"))
     {
         EndToken();
         ConsumeToken(Token.AXIS_SELF);
     }
     else if (MatchIdentifer("namespace", "::"))
     {
         EndToken();
         ConsumeToken(Token.AXIS_NAMESPACE);
     }
     else if (MatchIdentifer("at"))
     {
         EndToken();
         SkipWhitespace();
         if (Peek(0) == '"' || Peek(0) == '\'')
         {
             ConsumeToken(Token.AT);
             ConsumeLiteral();
             m_state = LexerState.NamespaceDecl;
         }
         else
         {
             ConsumeToken(Token.QName, new Qname("at"));
             if (Peek(0) != ')')
                 m_state = LexerState.Operator;
         }
     }
     else if (c == '"' || c == '\'')
     {
         ConsumeLiteral();
         m_state = LexerState.Operator;
     }
     else if (XmlCharType.Instance.IsDigit(c))
     {
         ConsumeNumber();
         m_state = LexerState.Operator;
     }
     else if (XmlCharType.Instance.IsStartNameChar(c))
     {
         StringBuilder sb = new StringBuilder();
         while ((c = Peek(0)) != 0 && XmlCharType.Instance.IsNCNameChar(c))
             sb.Append(Read());
         if (Peek(0) == ':')
         {
             if (Peek(1) == '*')
             {
                 EndToken();
                 ConsumeToken(Token.NCName, new Qname(sb.ToString()));
                 BeginToken();
                 ConsumeChar(Read());
                 BeginToken();
                 ConsumeChar(Read());
                 m_state = LexerState.Operator;
             }
             else
             {
                 while ((c = Peek(0)) != 0 && XmlCharType.Instance.IsNameChar(c))
                     sb.Append(Read());
                 EndToken();
                 ConsumeToken(Token.QName, new Qname(sb.ToString()));
                 SkipWhitespace();
                 if (Peek(0) != '(')
                     m_state = LexerState.Operator;
             }
         }
         else
         {
             EndToken();
             int anchor = m_anchor;
             int length = m_length;
             string ncname = sb.ToString();
             if (ncname == "element" || ncname == "attribute")
             {
                 SkipWhitespace();
                 if (Peek(0) == '{')
                 {
                     if (ncname == "element")
                         ConsumeToken(Token.ELEMENT, anchor, length);
                     else
                         ConsumeToken(Token.ATTRIBUTE, anchor, length);
                     BeginToken();
                     ConsumeChar(Read());
                     m_states.Push(LexerState.Operator);
                     return;
                 }
                 else if (XmlCharType.Instance.IsStartNameChar(Peek(0)))
                 {
                     BeginToken();
                     sb = new StringBuilder();
                     while ((c = Peek(0)) != 0 && XmlCharType.Instance.IsNameChar(c))
                         sb.Append(Read());
                     EndToken();
                     int anchor2 = m_anchor;
                     int length2 = m_length;
                     SkipWhitespace();
                     if (Peek(0) == '{')
                     {
                         if (ncname == "element")
                             ConsumeToken(Token.ELEMENT, anchor, length);
                         else
                             ConsumeToken(Token.ATTRIBUTE, anchor, length);
                         ConsumeToken(Token.QName, new Qname(sb.ToString()), anchor2, length2);
                         BeginToken();
                         ConsumeChar(Read());
                         m_states.Push(LexerState.Operator);
                         return;
                     }
                     else
                         throw new XQueryException(Properties.Resources.ExpectedBlockStart, ncname, sb.ToString());
                 }
             }
             else if (ncname == "processing-instruction")
             {
                 SkipWhitespace();
                 if (Peek(0) == '{')
                 {
                     ConsumeToken(Token.PROCESSING_INSTRUCTION, anchor, length);
                     BeginToken();
                     ConsumeChar(Read());
                     m_states.Push(LexerState.Operator);
                     return;
                 }
                 else if (XmlCharType.Instance.IsStartNameChar(Peek(0)))
                 {
                     sb = new StringBuilder();
                     BeginToken();
                     while ((c = Peek(0)) != 0 && XmlCharType.Instance.IsNameChar(c))
                         sb.Append(Read());
                     EndToken();
                     int anchor2 = m_anchor;
                     int length2 = m_length;
                     SkipWhitespace();
                     if (Peek(0) == '{')
                     {
                         ConsumeToken(Token.PROCESSING_INSTRUCTION, anchor, length);
                         ConsumeToken(Token.NCName, new Qname(sb.ToString()), anchor2, length2);
                         BeginToken();
                         ConsumeChar(Read());
                         m_states.Push(LexerState.Operator);
                         return;
                     }
                     else
                         throw new XQueryException(Properties.Resources.ExpectedBlockStart, ncname, sb.ToString());
                 }
             }
             ConsumeToken(Token.QName, new Qname(ncname));
             SkipWhitespace();
             if (Peek(0) != '(')
                 m_state = LexerState.Operator;
         }
     }
 }
Example #31
0
 private void EndTagState()
 {
     if (Peek(0) == 0)
         return;
     BeginToken();
     char c = Peek(0);
     if (c == '>')
     {
         ConsumeChar(Read());
         m_state = m_states.Pop();
     }
     else if (XmlCharType.Instance.IsWhiteSpace(c))
         ConsumeS();
     else if (XmlCharType.Instance.IsStartNameChar(c))
         ConsumeQName();
 }
Example #32
0
 public void RevertToState(object data)
 {
     TokenizerState stateData = (TokenizerState)data;
     m_state = stateData.current;
     m_states = new Stack<LexerState>(stateData.states);
     m_token = new Queue<CurrentToken>(stateData.tokens);
 }
Example #33
0
 public int token()
 {
     if (m_token.Count == 0)
     {
         EnterState();
         if (m_token.Count == 0)
         {
             m_value = null;                    
             return Token.yyErrorCode;
         }
     }
     CurrentToken curr = m_token.Dequeue();
     m_value = curr.value;
     CurrentPos = curr.anchor;
     CurrentLength = curr.length;
     CurrentState = curr.state;
     return curr.token;
 }
Example #34
0
 private void AposAttributeContentState()
 {
     if (Peek(0) == 0)
         return;
     char c = Peek(0);
     BeginToken();
     if (c == '\'' && Peek(1) != '\'')
     {
         Read();
         EndToken();
         ConsumeToken(Token.Apos);
         m_state = LexerState.AttributeState;
     }
     else if (MatchText("{{"))
     {
         ConsumeChar('{');
         m_anchor++;
         ConsumeChar('{');
     }
     else if (MatchText("}}"))
     {
         ConsumeChar('}');
         m_anchor++;
         ConsumeChar('}');
     }
     else if (c == '{')
     {
         ConsumeChar(Read());
         m_states.Push(m_state);
         m_state = LexerState.Default;
     }
     else if (MatchText("&gt;"))
     {
         EndToken();
         ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef("&gt;"));
     }
     else if (MatchText("&lt;"))
     {
         EndToken();
         ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef("&lt;"));
     }
     else if (MatchText("&amp;"))
     {
         EndToken();
         ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef("&amp;"));
     }
     else if (MatchText("&quot;"))
     {
         EndToken();
         ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef("&quot;"));
     }
     else if (MatchText("&apos;"))
     {
         EndToken();
         ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef("&apos;"));
     }
     else if (MatchText("&#x"))
         ConsumeCharRefHex();
     else if (MatchText("&#"))
         ConsumeCharRef();
     else if (c == '\'' && Peek(1) == '\'')
     {
         Read();
         Read();
         EndToken();
         ConsumeToken(Token.EscapeApos);
     }
     else
     {
         StringBuilder sb = new StringBuilder();
         while ((c = Peek(0)) != 0 && c != '{' && c != '&' && c != '\'')
             sb.Append(Read());
         EndToken();
         ConsumeToken(Token.Char, new Literal(sb.ToString()));
     }
 }
Example #35
0
 private void CDataSectionState()
 {
     StringBuilder sb = new StringBuilder();
     char c;
     BeginToken();
     while (!((c = Peek(0)) == ']' && Peek(1) == ']' && Peek(2) == '>'))
     {
         if (Peek(0) == 0)
             return;
         sb.Append(Read());
     }
     EndToken();
     ConsumeToken(Token.StringLiteral, new Literal(sb.ToString()));
     BeginToken();
     Read(); // ]
     Read(); // ]
     Read(); // >
     EndToken();
     ConsumeToken(Token.CDATA_END);
     m_state = m_states.Pop();
 }
Example #36
0
 private void ProcessingInstructionContentState()
 {
     if (Peek(0) == 0)
         return;
     StringBuilder sb = new StringBuilder();
     char c;
     BeginToken();
     while (!((c = Peek(0)) == '?' && Peek(1) == '>'))
     {
         if (Peek(0) == 0)
             return;
         sb.Append(Read());
     }
     EndToken();
     ConsumeToken(Token.StringLiteral, new Literal(sb.ToString()));
     BeginToken();
     Read(); // ?
     Read(); // >
     EndToken();
     ConsumeToken(Token.PI_END);
     m_state = m_states.Pop();
 }
        /// <summary>
        /// Lexes the command line, using the same rules as <see cref="Environment.GetCommandLineArgs"/>.
        /// </summary>
        /// <param name="commandLine">The command line to parse.</param>
        /// <returns>The lexed command line.</returns>
        public static IEnumerable <string> Lex(this string commandLine)
        {
            Contract.Requires(commandLine != null);
            Contract.Ensures(Contract.Result <IEnumerable <string> >() != null);

            // The MSDN information for <see cref="Environment.GetCommandLineArgs"/> is incomplete.
            // This blog post fills in the gaps: http://www.hardtoc.com/archives/162 (webcite: http://www.webcitation.org/62LHTVelJ )

            LexerState state = LexerState.Default;

            Buffer buffer = new Buffer();

            foreach (var ch in commandLine)
            {
                switch (state)
                {
                case LexerState.Default:
                    if (ch == '"')
                    {
                        // Enter the quoted state, without placing anything in the buffer.
                        state = LexerState.Quoted;
                        break;
                    }

                    // Whitespace is ignored.
                    if (ch == ' ' || ch == '\t')
                    {
                        break;
                    }

                    buffer.AppendChar(ch);
                    state = LexerState.Argument;
                    break;

                case LexerState.Argument:
                    // We have an argument started, though it may be just an empty string for now.

                    if (ch == '"')
                    {
                        // Handle the special rules for any backslashes preceding a double-quote.
                        if (buffer.AppendQuote())
                        {
                            // An even number of backslashes means that this is a normal double-quote.
                            state = LexerState.Quoted;
                        }

                        break;
                    }

                    if (ch == ' ' || ch == '\t')
                    {
                        // Whitespace ends this argument, so publish it and restart in the default state.
                        yield return(buffer.Consume());

                        state = LexerState.Default;
                        break;
                    }

                    // Count backslashes; put other characters directly into the buffer.
                    buffer.AppendChar(ch);
                    break;

                case LexerState.Quoted:
                    // We are within quotes, but may already have characters in the argument buffer.

                    if (ch == '"')
                    {
                        // Handle the special rules for any backslashes preceding a double-quote.
                        if (buffer.AppendQuote())
                        {
                            // An even number of backslashes means that this is a normal double-quote.
                            state = LexerState.EndQuotedArgument;
                        }

                        break;
                    }

                    // Any non-quote character (including whitespace) is appended to the argument buffer.
                    buffer.AppendChar(ch);
                    break;

                case LexerState.EndQuotedArgument:
                    // This is a special state that is treated like Argument or Quoted depending on whether the next character is a quote. It's not possible to stay in this state.

                    if (ch == '"')
                    {
                        // We just read a double double-quote within a quoted context, so we add the quote to the buffer and re-enter the quoted state.
                        buffer.AppendNormalChar(ch);
                        state = LexerState.Quoted;
                    }
                    else if (ch == ' ' || ch == '\t')
                    {
                        // In this case, the double-quote we just read did in fact end the quotation, so we publish the argument and restart in the default state.
                        yield return(buffer.Consume());

                        state = LexerState.Default;
                    }
                    else
                    {
                        // If the double-quote is followed by a non-quote, non-whitespace character, then it's considered a continuation of the argument (leaving the quoted state).
                        buffer.AppendChar(ch);
                        state = LexerState.Argument;
                    }

                    break;
                }
            }

            // If we end in the middle of an argument (or even a quotation), then we just publish what we have.
            if (state != LexerState.Default)
            {
                yield return(buffer.Consume());
            }
        }
Example #38
0
 private void ElementContentState()
 {
     BeginToken();
     char c = Peek(0);
     if (c == 0)
         ConsumeToken(0); // EOF
     else if (MatchText("</"))
     {
         ConsumeChar('<');
         m_anchor++;
         ConsumeChar('/');
         m_state = LexerState.EndTag;
     }
     else if (MatchText("{{"))
     {
         ConsumeChar('{');
         m_anchor++;
         ConsumeChar('{');
     }
     else if (MatchText("}}"))
     {
         ConsumeChar('}');
         m_anchor++;
         ConsumeChar('}');
     }
     else if (c == '{')
     {
         ConsumeChar(Read());
         m_states.Push(m_state);
         m_state = LexerState.Default;
     }
     else if (MatchText("<!--"))
     {
         EndToken();
         ConsumeToken(Token.COMMENT_BEGIN);
         m_states.Push(m_state);
         m_state = LexerState.XmlComment;
     }
     else if (MatchText("<?"))
     {
         EndToken();
         ConsumeToken(Token.PI_BEGIN);
         m_states.Push(m_state);
         m_state = LexerState.ProcessingInstruction;
     }
     else if (MatchText("<![CDATA["))
     {
         EndToken();
         ConsumeToken(Token.CDATA_BEGIN);
         m_states.Push(m_state);
         m_state = LexerState.CDataSection;
     }
     else if (c == '<')
     {
         Read();
         EndToken();
         ConsumeToken(Token.BeginTag);
         m_states.Push(m_state);
         m_state = LexerState.StartTag;
     }
     else if (MatchText("&gt;"))
     {
         EndToken();
         ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef("&gt;"));
     }
     else if (MatchText("&lt;"))
     {
         EndToken();
         ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef("&lt;"));
     }
     else if (MatchText("&amp;"))
     {
         EndToken();
         ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef("&amp;"));
     }
     else if (MatchText("&quot;"))
     {
         EndToken();
         ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef("&quot;"));
     }
     else if (MatchText("&apos;"))
     {
         EndToken();
         ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef("&apos;"));
     }
     else if (MatchText("&#x"))
         ConsumeCharRefHex();
     else if (MatchText("&#"))
         ConsumeCharRef();
     else
     {
         StringBuilder sb = new StringBuilder();
         while ((c = Peek(0)) != 0 && c != '<' && c != '&' && c != '{' && c != '}')
             sb.Append(Read());
         EndToken();
         if (sb.Length == 0)
             return;
         ConsumeToken(Token.Char, new Literal(sb.ToString()));
     }
 }
Example #39
0
 private void ProcessingInstructionState()
 {
     if (Peek(0) == 0)
         return;
     BeginToken();
     char c = Peek(0);
     if (XmlCharType.Instance.IsWhiteSpace(c))
     {
         ConsumeS();
         m_state = LexerState.ProcessingInstructionContent;
     }
     else if (MatchText("?>"))
     {
         EndToken();
         ConsumeToken(Token.PI_END);
         m_state = m_states.Pop();
     }
     else if (XmlCharType.Instance.IsStartNameChar(c))
     {
         StringBuilder sb = new StringBuilder();
         while ((c = Peek(0)) != 0 && XmlCharType.Instance.IsNameChar(c))
             sb.Append(Read());
         EndToken();
         if (sb.ToString() == "xml")
             throw new XQueryException(Properties.Resources.InvalidPITarget);
         ConsumeToken(Token.StringLiteral, new Literal(sb.ToString()));
     }
 }
Example #40
0
        public void SkipTokenTest()
        {
            Lexicon    lexicon  = new Lexicon();
            LexerState global   = lexicon.DefaultLexer;
            LexerState keywords = global.CreateSubState();
            LexerState xml      = keywords.CreateSubState();

            var ID = global.DefineToken(RE.Range('a', 'z').Concat(
                                            (RE.Range('a', 'z') | RE.Range('0', '9')).Many()));
            var NUM        = global.DefineToken(RE.Range('0', '9').Many1());
            var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many());
            var ERROR      = global.DefineToken(RE.Range(Char.MinValue, (char)255));

            var IF   = keywords.DefineToken(RE.Literal("if"));
            var ELSE = keywords.DefineToken(RE.Literal("else"));

            var XMLNS = xml.DefineToken(RE.Literal("xmlns"));

            ScannerInfo     info    = lexicon.CreateScannerInfo();
            PeekableScanner scanner = new PeekableScanner(info);

            string       source = "asdf04a 1107 else Z if vvv xmlns 772737";
            StringReader sr     = new StringReader(source);

            scanner.SetSource(new SourceReader(sr));
            scanner.SetTriviaTokens(WHITESPACE.Index, ERROR.Index);
            info.LexerStateIndex = xml.Index;

            Lexeme l1 = scanner.Read();

            Assert.AreEqual(ID.Index, l1.TokenIndex);
            Assert.AreEqual("asdf04a", l1.Value);
            Assert.AreEqual(0, l1.PrefixTrivia.Count);

            Lexeme l2 = scanner.Read();

            Assert.AreEqual(NUM.Index, l2.TokenIndex);
            Assert.AreEqual("1107", l2.Value);
            Assert.AreEqual(1, l2.PrefixTrivia.Count);

            Lexeme l3 = scanner.Read();

            Assert.AreEqual(ELSE.Index, l3.TokenIndex);
            Assert.AreEqual("else", l3.Value);
            Assert.AreEqual(1, l2.PrefixTrivia.Count);

            Lexeme l4 = scanner.Read();

            Assert.AreEqual(IF.Index, l4.TokenIndex);
            Assert.AreEqual("if", l4.Value);
            Assert.AreEqual(3, l4.PrefixTrivia.Count);


            int p1 = scanner.Peek();

            Assert.AreEqual(ID.Index, p1);

            int p2   = scanner.Peek2();
            int p3   = scanner.Peek(3);
            int peof = scanner.Peek(4);

            Assert.AreEqual(info.EndOfStreamTokenIndex, peof);

            Lexeme l6 = scanner.Read();
            Lexeme l7 = scanner.Read();

            Assert.AreEqual(XMLNS.Index, l7.TokenIndex);

            Lexeme l8 = scanner.Read();

            Assert.AreEqual(NUM.Index, l8.TokenIndex);

            Lexeme leof = scanner.Read();

            Assert.AreEqual(info.EndOfStreamTokenIndex, leof.TokenIndex);
            Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof.Span.EndLocation.CharIndex);
            Assert.AreEqual(source.Length, leof.Span.StartLocation.CharIndex);
        }
Example #41
0
 private void KindTestForPiState()
 {
     SkipWhitespace();
     if (Peek(0) == 0)
         return;
     char c = Peek(0);
     BeginToken();
     if (c == ')')
     {
         ConsumeChar(Read());
         m_state = m_states.Pop();
     }
     //else if (MatchText("(:"))
     //{
     //    m_states.Push(m_state);
     //    m_state = LexerState.ExprComment;
     //    ExprCommentState();
     //}
     else if (XmlCharType.Instance.IsNCNameChar(c))
         ConsumeNCName();
     else if (c == '\'' || c == '"')
         ConsumeLiteral();
 }
Example #42
0
        public void LexerStateToDFATest()
        {
            Lexicon    lexicon  = new Lexicon();
            LexerState global   = lexicon.DefaultLexer;
            LexerState keywords = global.CreateSubState();
            LexerState xml      = keywords.CreateSubState();

            var ID = global.DefineToken(RE.Range('a', 'z').Concat(
                                            (RE.Range('a', 'z') | RE.Range('0', '9')).Many()));
            var NUM   = global.DefineToken(RE.Range('0', '9').Many1());
            var ERROR = global.DefineToken(RE.Range(Char.MinValue, (char)255));

            var IF   = keywords.DefineToken(RE.Literal("if"));
            var ELSE = keywords.DefineToken(RE.Literal("else"));

            var XMLNS = xml.DefineToken(RE.Literal("xmlns"));


            DFAModel dfa = DFAModel.Create(lexicon);

            CompressedTransitionTable tc = CompressedTransitionTable.Compress(dfa);

            ScannerInfo si = lexicon.CreateScannerInfo();

            FiniteAutomationEngine engine = new FiniteAutomationEngine(si.TransitionTable, si.CharClassTable);

            engine.InputString("if");

            Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("12345");
            Assert.AreEqual(NUM.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("asdf12dd");
            Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("A");
            Assert.AreEqual(ERROR.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("AAA");
            Assert.IsTrue(engine.IsAtStoppedState);

            engine.Reset();
            engine.InputString("if ");
            Assert.IsTrue(engine.IsAtStoppedState);

            engine.Reset();
            si.LexerStateIndex = keywords.Index;
            engine.InputString("if");
            Assert.AreEqual(IF.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("else");
            Assert.AreEqual(ELSE.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("xmlns");
            Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            si.LexerStateIndex = xml.Index;
            engine.InputString("if");
            Assert.AreEqual(IF.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("xml");
            Assert.IsFalse(engine.IsAtStoppedState);

            engine.Reset();
            engine.InputString("xmlns");
            Assert.AreEqual(XMLNS.Index, si.GetTokenIndex(engine.CurrentState));
            ;
        }
Example #43
0
 private void KindTestState()
 {
     SkipWhitespace();
     if (Peek(0) == 0)
         return;
     BeginToken();
     char c = Peek(0);
     if (c == '{')
     {
         ConsumeChar(Read());
         m_states.Push(LexerState.Operator);
         m_state = LexerState.Default;
     }
     else if (c == ')')
     {
         ConsumeChar(Read());
         m_state = m_states.Pop();
     }
     else if (c == '*')
     {
         ConsumeChar(Read());
         m_state = LexerState.CloseKindTest;
     }
     else if (MatchIdentifer("element", "("))
     {
         EndToken("element");
         ConsumeToken(Token.ELEMENT);
         BeginToken(m_bookmark[1]);
         ConsumeChar('(');
         m_states.Push(LexerState.KindTest);
     }
     else if (MatchIdentifer("schema-element", "("))
     {
         EndToken("schema-element");
         ConsumeToken(Token.SCHEMA_ELEMENT);
         BeginToken(m_bookmark[1]);
         ConsumeChar('(');
         m_states.Push(LexerState.KindTest);
     }
     else if (XmlCharType.Instance.IsNameChar(c))
     {
         ConsumeQName();
         m_state = LexerState.CloseKindTest;
     }
     //else if (MatchText("(:"))
     //{
     //    m_states.Push(m_state);
     //    m_state = LexerState.ExprComment;
     //    ExprCommentState();
     //}
 }
Example #44
0
 private void NamespaceKeywordState()
 {
     SkipWhitespace();
     if (Peek(0) == 0)
         return;
     BeginToken();
     char c = Peek(0);
     if (c == '\'' || c == '"')
     {
         ConsumeLiteral();
         m_state = LexerState.NamespaceDecl;
     }
     else if (MatchIdentifer("inherit"))
     {
         EndToken();
         ConsumeToken(Token.INHERIT);
         m_state = LexerState.Default;
     }
     else if (MatchIdentifer("no-inherit"))
     {
         EndToken();
         ConsumeToken(Token.NO_INHERIT);
         m_state = LexerState.Default;
     }
     else if (MatchIdentifer("namespace"))
     {
         EndToken();
         ConsumeToken(Token.NAMESPACE);
         m_state = LexerState.NamespaceDecl;
     }
     //else if (MatchText("(:"))
     //{
     //    m_states.Push(m_state);
     //    m_state = LexerState.ExprComment;
     //    ExprCommentState();
     //}
     else if (MatchIdentifer("default", "element"))
     {
         EndToken();
         ConsumeToken(Token.DEFAULT_ELEMENT);
         m_state = LexerState.NamespaceKeyword;
     }
     else if (MatchIdentifer("preserve"))
     {
         EndToken();
         ConsumeToken(Token.PRESERVE);
         m_state = LexerState.NamespaceKeyword;
     }
     else if (MatchIdentifer("no-preserve"))
     {
         EndToken();
         ConsumeToken(Token.NO_PRESERVE);
         m_state = LexerState.NamespaceKeyword;
     }
     else if (c == ',')
     {
         ConsumeChar(Read());
         m_state = LexerState.NamespaceKeyword;
     }
 }
 public void initialize(string InputFromInitialization)
 {
     Input       = InputFromInitialization;
     StringBegin = StringEnd = 0;
     State       = LexerState.Ready;
 }
Example #46
0
 private void TagAttributeState()
 {
     if (Peek(0) == 0)
         return;
     char c = Peek(0);
     BeginToken();
     if (MatchText("/>"))
     {
         ConsumeChar('/');
         m_anchor++;
         ConsumeChar('>');
         m_state = m_states.Pop();
     }
     else if (c == '>')
     {
         ConsumeChar(Read());
         m_state = LexerState.ElementContent;
     }
     else if (c == '[') // Mapping extensions
     {
         ConsumeChar(Read());
         m_states.Push(LexerState.AttributeState);
         m_state = LexerState.Default;
     }
     else if (c == '"')
     {
         ConsumeChar(Read());
         m_state = LexerState.QuotAttributeContent;
     }
     else if (c == '\'')
     {
         Read();
         EndToken();
         ConsumeToken(Token.Apos);
         m_state = LexerState.AposAttributeContent;
     }
     else if (c == '=')
         ConsumeChar(Read());
     else if (XmlCharType.Instance.IsWhiteSpace(c))
         ConsumeS();
     else if (XmlCharType.Instance.IsStartNameChar(c))
         ConsumeQName();
 }
Example #47
0
 public Lexer(NFA pad, string tag, LAD[] alts)
 {
     this.pad = pad;
     this.alts = alts;
     this.tag = tag;
     int root = pad.AddNode();
     int[] alt_shuffle = new int[alts.Length];
     for (int i = 0; i < alts.Length; i++) alt_shuffle[i] = i;
     Array.Sort(alt_shuffle, delegate (int i1, int i2) {
         int j1, j2;
         bool c1, c2;
         alts[i1].QueryLiteral(pad, out j1, out c1);
         alts[i2].QueryLiteral(pad, out j2, out c2);
         return (j1 != j2) ? (j2 - j1) : (i1 - i2);
     });
     for (int ix = 0; ix < alts.Length; ix++) {
         pad.curfate = alt_shuffle[ix];
         int target = pad.AddNode();
         pad.nodes_l[target].final = true;
         alts[alt_shuffle[ix]].ToNFA(pad, root, target);
     }
     nfates = alts.Length;
     fatebuffer = new int[nfates*2+2];
     for (int i = 0; i < nfates*2+2; i++)
         fatebuffer[i] = -1;
     fatebuffer[0] = fatebuffer[1] = 0;
     pad.Complete();
     // now the NFA nodes are all in tiebreak order by lowest index
     if (LtmTrace) {
         Dump();
     }
     start = new LexerState(pad);
     start.Add(0);
     pad.Close(start);
     nil = new LexerState(pad);
     pad.dfashare[nil] = nil;
     pad.dfashare[start] = start;
 }
 public void initialize(string InputFromInitialization)
 {
     Input = InputFromInitialization;
     StringBegin = StringEnd = 0;
     State = LexerState.Ready;
 }
Example #49
0
    public void Close(LexerState ls)
    {
        int ngrey = 0;
        for (int i = 0; i < ls.nstates.Length; i++) {
            int bm = ls.nstates[i];
            for (int j = 0; j < 32; j++) {
                if ((bm & (1 << j)) != 0)
                    greybuf[ngrey++] = 32*i + j;
            }
        }

        while (ngrey != 0) {
            int val = greybuf[--ngrey];
            foreach (NFA.Edge e in nodes[val].edges) {
                if (e.when == null) {
                    int ix = e.to >> 5;
                    int m = 1 << (e.to & 31);
                    if ((ls.nstates[ix] & m) == 0) {
                        ls.nstates[ix] |= m;
                        greybuf[ngrey++] = e.to;
                    }
                }
            }
        }
    }
Example #50
0
 private void StartTagState()
 {
     if (Peek(0) == 0)
         return;
     char c = Peek(0);
     BeginToken();
     if (MatchText("/>"))
     {                
         ConsumeChar('/');
         m_anchor++;
         ConsumeChar('>');
         m_state = m_states.Pop();
     }
     else if (c == '>')
     {
         ConsumeChar(Read());
         m_state = LexerState.ElementContent;
     }
     else if (XmlCharType.Instance.IsWhiteSpace(c))
         ConsumeS();
     else if (XmlCharType.Instance.IsStartNameChar(c))
     {
         ConsumeQName();
         m_state = LexerState.AttributeState;
     }
 }
        private static IEnumerable <Token> LexImpl(TokenTextIndex tokenFactory)
        {
            string     text                  = tokenFactory.Text;
            LexerState state                 = LexerState.SkipWhitespace;
            int        tokenStart            = 0;
            int        multiLineCommentStart = 0;
            int        valueLeftBraceDepth   = 0;

            for (int idx = 0; idx < text.Length; ++idx)
            {
                char ch = text[idx];
                // Note: The "error detection" cases are later in the switch because we
                // expect them to be visited less often. (and the C# compiler emits the branches
                // in order)
                switch (state)
                {
                case LexerState.SkipWhitespace:
                    // Putting Default first because we expect most of the time to be skipping
                    // whitespace.
                    tokenStart = idx;
                    switch (ch)
                    {
                    case ' ':
                    case '\t':
                    case '\r':
                    case '\n':
                    case '\u2028':
                    case '\u2029':
                        // Skip whitespace
                        break;

                    case '\'':
                        state = LexerState.CollectingString;
                        break;

                    case '/':
                        state = LexerState.CommentCandidate;
                        break;

                    case '|':
                        yield return(tokenFactory.Token(idx, TokenKind.Pipe));

                        break;

                    case ':':
                        yield return(tokenFactory.Token(idx, TokenKind.Colon));

                        break;

                    case ';':
                        yield return(tokenFactory.Token(idx, TokenKind.Semicolon));

                        break;

                    case '.':
                        state = LexerState.DotsCandidate;
                        break;

                    case '(':
                        yield return(tokenFactory.Token(idx, TokenKind.Lparen));

                        break;

                    case ')':
                        yield return(tokenFactory.Token(idx, TokenKind.Rparen));

                        break;

                    case '*':
                        yield return(tokenFactory.Token(idx, TokenKind.Star));

                        break;

                    case '+':
                        yield return(tokenFactory.Token(idx, TokenKind.Plus));

                        break;

                    case '?':
                        yield return(tokenFactory.Token(idx, TokenKind.Question));

                        break;

                    default:
                        state = LexerState.CollectingIdentifier;
                        break;
                    }
                    break;

                case LexerState.CollectingString:
                    if (ch == '\'')
                    {
                        yield return(tokenFactory.Token(tokenStart, idx + 1, TokenKind.String));

                        state = LexerState.SkipWhitespace;
                    }
                    break;

                case LexerState.SkipSingleLineComment:
                    switch (ch)
                    {
                    case '\r':
                    case '\n':
                    case '\u2028':
                    case '\u2029':
                        state = LexerState.SkipWhitespace;
                        break;
                    }
                    break;

                case LexerState.CommentCandidate:
                    switch (ch)
                    {
                    case '/':
                        state = LexerState.SkipSingleLineComment;
                        break;

                    case '*':
                        state = LexerState.MultiLineComment;
                        multiLineCommentStart = idx - 1;
                        break;

                    default:
                        throw new G4ParseFailureException(tokenFactory.Location(idx - 1), Strings.UnrecognizedForwardSlash);
                    }
                    break;

                case LexerState.MultiLineComment:
                    switch (ch)
                    {
                    case '*':
                        state = LexerState.MultiLineCommentStar;
                        break;

                    case '@':
                        state      = LexerState.CollectingAnnotation;
                        tokenStart = idx;
                        break;

                    case '{':
                        state      = LexerState.CollectingAnnotationValue;
                        tokenStart = idx;
                        break;
                    }
                    break;

                case LexerState.MultiLineCommentStar:
                    switch (ch)
                    {
                    case '*':
                        // Do nothing, e.g. in case *****/
                        break;

                    case '@':
                        state      = LexerState.CollectingAnnotation;
                        tokenStart = idx;
                        break;

                    case '{':
                        state      = LexerState.CollectingAnnotationValue;
                        tokenStart = idx;
                        break;

                    case '/':
                        state = LexerState.SkipWhitespace;
                        break;

                    default:
                        state = LexerState.MultiLineComment;
                        break;
                    }
                    break;

                case LexerState.CollectingAnnotation:
                    switch (ch)
                    {
                    case ' ':
                    case '\t':
                    case '\r':
                    case '\n':
                    case '\u2028':
                    case '\u2029':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Annotation));

                        state = LexerState.MultiLineComment;
                        break;

                    case '*':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Annotation));

                        state = LexerState.MultiLineCommentStar;
                        break;

                    case '{':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Annotation));

                        valueLeftBraceDepth = 0;
                        state      = LexerState.CollectingAnnotationValue;
                        tokenStart = idx;
                        break;

                    case '@':
                        throw new G4ParseFailureException(tokenFactory.Location(tokenStart), Strings.UnrecognizedAtInAnnotation);
                    }
                    break;

                case LexerState.CollectingAnnotationValue:
                    switch (ch)
                    {
                    case '{':
                        valueLeftBraceDepth++;
                        break;

                    case '}':
                        if (valueLeftBraceDepth > 0)
                        {
                            valueLeftBraceDepth--;
                        }
                        else
                        {
                            yield return(tokenFactory.Token(tokenStart, idx + 1, TokenKind.AnnotationValue));

                            state = LexerState.MultiLineComment;
                        }
                        break;

                    case '*':
                        state = LexerState.CollectingAnnotationValueStar;
                        break;
                    }
                    break;

                case LexerState.CollectingIdentifier:
                    switch (ch)
                    {
                    case ' ':
                    case '\t':
                    case '\r':
                    case '\n':
                    case '\u2028':
                    case '\u2029':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        state = LexerState.SkipWhitespace;
                        break;

                    case '\'':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        tokenStart = idx;
                        state      = LexerState.CollectingString;
                        break;

                    case '/':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        state = LexerState.CommentCandidate;
                        break;

                    case '|':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        yield return(tokenFactory.Token(idx, TokenKind.Pipe));

                        state = LexerState.SkipWhitespace;
                        break;

                    case ':':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        yield return(tokenFactory.Token(idx, TokenKind.Colon));

                        state = LexerState.SkipWhitespace;
                        break;

                    case ';':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        yield return(tokenFactory.Token(idx, TokenKind.Semicolon));

                        state = LexerState.SkipWhitespace;
                        break;

                    case '.':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        tokenStart = idx;
                        state      = LexerState.DotsCandidate;
                        break;

                    case '(':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        yield return(tokenFactory.Token(idx, TokenKind.Lparen));

                        state = LexerState.SkipWhitespace;
                        break;

                    case ')':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        yield return(tokenFactory.Token(idx, TokenKind.Rparen));

                        state = LexerState.SkipWhitespace;
                        break;

                    case '*':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        yield return(tokenFactory.Token(idx, TokenKind.Star));

                        state = LexerState.SkipWhitespace;
                        break;

                    case '+':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        yield return(tokenFactory.Token(idx, TokenKind.Plus));

                        state = LexerState.SkipWhitespace;
                        break;

                    case '?':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        yield return(tokenFactory.Token(idx, TokenKind.Question));

                        state = LexerState.SkipWhitespace;
                        break;
                    }
                    break;

                case LexerState.CollectingAnnotationValueStar:
                    switch (ch)
                    {
                    case '}':
                        yield return(tokenFactory.Token(tokenStart, idx + 1, TokenKind.AnnotationValue));

                        state = LexerState.MultiLineComment;
                        break;

                    case '/':
                        throw new G4ParseFailureException(tokenFactory.Location(tokenStart), Strings.UnclosedAnnotation);

                    default:
                        state = LexerState.CollectingAnnotationValue;
                        break;
                    }
                    break;

                case LexerState.DotsCandidate:
                    switch (ch)
                    {
                    case '.':
                        yield return(tokenFactory.Token(tokenStart, idx + 1, TokenKind.Dots));

                        state = LexerState.SkipWhitespace;
                        break;

                    default:
                        throw new G4ParseFailureException(tokenFactory.Location(tokenStart), Strings.SingleDot);
                    }
                    break;
                }
            }

            switch (state)
            {
            case LexerState.CollectingIdentifier:
                yield return(tokenFactory.Token(tokenStart, text.Length, TokenKind.Identifier));

                break;

            case LexerState.MultiLineComment:
            case LexerState.MultiLineCommentStar:
            case LexerState.CollectingAnnotation:
            case LexerState.CollectingAnnotationValue:
            case LexerState.CollectingAnnotationValueStar:
                throw new G4ParseFailureException(tokenFactory.Location(multiLineCommentStart), Strings.UnclosedMultiLineComment);

            case LexerState.CommentCandidate:
                throw new G4ParseFailureException(tokenFactory.Location(text.Length), Strings.UnrecognizedForwardSlash);

            case LexerState.CollectingString:
                throw new G4ParseFailureException(tokenFactory.Location(tokenStart), Strings.UnclosedString);

            case LexerState.DotsCandidate:
                throw new G4ParseFailureException(tokenFactory.Location(text.Length), Strings.SingleDot);

            case LexerState.SkipWhitespace:
            case LexerState.SkipSingleLineComment:
                // OK (do nothing)
                break;
            }
        }
Example #52
0
 private void OptionState()
 {
     SkipWhitespace();
     if (Peek(0) == 0)
         return;
     if (XmlCharType.Instance.IsStartNameChar(Peek(0)))
     {
         ConsumeQName();
         m_state = LexerState.Default;
     }
 }
Example #53
0
 private void XmlSpace_DeclState()
 {
     SkipWhitespace();
     if (Peek(0) == 0)
         return;
     BeginToken();
     if (MatchIdentifer("preserve"))
     {
         EndToken();
         ConsumeToken(Token.PRESERVE);
         m_state = LexerState.Default;
     }
     else if (MatchIdentifer("strip"))
     {
         EndToken();
         ConsumeToken(Token.STRIP);
         m_state = LexerState.Default;
     }
     //else if (MatchText("(:"))
     //{
     //    m_states.Push(m_state);
     //    m_state = LexerState.ExprComment;
     //    ExprCommentState();
     //}
 }
Example #54
0
 private void OccurrenceIndicatorState()
 {
     SkipWhitespace();
     BeginToken();
     //if (MatchText("(:"))
     //{
     //    m_states.Push(m_state);
     //    m_state = LexerState.ExprComment;
     //    ExprCommentState();
     //}
     //else
     {
         char c = Peek(0);
         if (c == '*')
         {
             //if (!(XmlCharType.Instance.IsNameChar(Peek(1)) || XmlCharType.Instance.IsDigit)
             //{
             Read();
             EndToken();
             ConsumeToken(Token.Indicator1);
         }
         else if (c == '+')
         {
             Read();
             EndToken();
             ConsumeToken(Token.Indicator2);
         }
         else if (c == '?')
         {
             Read();
             EndToken();
             ConsumeToken(Token.Indicator3);
         }
         m_state = LexerState.Operator;
         OperatorState();
     }
 }
Example #55
0
        public void ScannerTest()
        {
            Lexicon    lexicon  = new Lexicon();
            LexerState global   = lexicon.DefaultLexer;
            LexerState keywords = global.CreateSubState();
            LexerState xml      = keywords.CreateSubState();

            var ID = global.DefineToken(RE.Range('a', 'z').Concat(
                                            (RE.Range('a', 'z') | RE.Range('0', '9')).Many()));
            var NUM        = global.DefineToken(RE.Range('0', '9').Many1());
            var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many());
            var ERROR      = global.DefineToken(RE.Range(Char.MinValue, (char)255));

            var IF   = keywords.DefineToken(RE.Literal("if"));
            var ELSE = keywords.DefineToken(RE.Literal("else"));

            var XMLNS = xml.DefineToken(RE.Literal("xmlns"));

            ScannerInfo     info    = lexicon.CreateScannerInfo();
            PeekableScanner scanner = new PeekableScanner(info);

            string       source = "asdf04a 1107 else Z if vvv xmlns 772737";
            StringReader sr     = new StringReader(source);

            scanner.SetSource(new SourceReader(sr));

            Lexeme l1 = scanner.Read();

            Assert.AreEqual(ID.Index, l1.TokenIndex);
            Assert.AreEqual("asdf04a", l1.Value);
            Assert.AreEqual(0, l1.Span.StartLocation.Column);
            Assert.AreEqual(6, l1.Span.EndLocation.Column);

            Lexeme l2 = scanner.Read();

            Assert.AreEqual(WHITESPACE.Index, l2.TokenIndex);
            Assert.AreEqual(" ", l2.Value);

            Lexeme l3 = scanner.Read();

            Assert.AreEqual(NUM.Index, l3.TokenIndex);
            Assert.AreEqual("1107", l3.Value);

            Lexeme l4 = scanner.Read();

            Assert.AreEqual(WHITESPACE.Index, l4.TokenIndex);

            Lexeme l5 = scanner.Read();

            Assert.AreEqual(ID.Index, l5.TokenIndex);

            int p1 = scanner.Peek();

            Assert.AreEqual(WHITESPACE.Index, p1);

            int p2 = scanner.Peek2();

            Assert.AreEqual(ERROR.Index, p2);

            int p3 = scanner.Peek(3);

            Assert.AreEqual(WHITESPACE.Index, p3);

            int p4 = scanner.Peek(4);

            Assert.AreEqual(ID.Index, p4);

            int p5 = scanner.Peek(5);

            Assert.AreEqual(WHITESPACE.Index, p5);

            Lexeme l6 = scanner.Read();
            Lexeme l7 = scanner.Read();

            Assert.AreEqual(ERROR.Index, l7.TokenIndex);

            int p3_2 = scanner.Peek();

            Assert.AreEqual(p3, p3_2);

            Lexeme l8   = scanner.Read(); // whitespace
            Lexeme l9   = scanner.Read(); // ID:if
            Lexeme l10  = scanner.Read(); // whitespace
            Lexeme l11  = scanner.Read(); // ID:vvv
            Lexeme l12  = scanner.Read(); // whitespace
            Lexeme l13  = scanner.Read(); // ID:xmlns
            Lexeme l14  = scanner.Read(); // whitespace
            Lexeme l15  = scanner.Read(); // NUM:772737
            Lexeme leof = scanner.Read(); // eof

            Assert.AreEqual(info.EndOfStreamTokenIndex, leof.TokenIndex);
            Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof.Span.EndLocation.CharIndex);
            Assert.AreEqual(source.Length, leof.Span.StartLocation.CharIndex);

            Lexeme leof2 = scanner.Read(); //after eof, should return eof again

            Assert.AreEqual(info.EndOfStreamTokenIndex, leof2.TokenIndex);
            Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof2.Span.StartLocation.CharIndex);
        }
Example #56
0
 private void XmlCommentState()
 {
     BeginToken();
     StringBuilder sb = new StringBuilder();
     char c;
     while (!((c = Peek(0)) == '-' && Peek(1) == '-' && Peek(2) == '>'))
     {
         if (Peek(0) == 0)
             return;
         sb.Append(Read());
     }
     EndToken();
     ConsumeToken(Token.StringLiteral, new Literal(sb.ToString()));
     BeginToken();
     Read(); // -
     Read(); // -
     Read(); // >
     EndToken();
     ConsumeToken(Token.COMMENT_END);
     m_state = m_states.Pop();
 }
Example #57
0
        public void CompactCharSetTest()
        {
            Lexicon    lexicon  = new Lexicon();
            LexerState global   = lexicon.DefaultLexer;
            LexerState keywords = global.CreateSubState();
            LexerState xml      = keywords.CreateSubState();

            var lettersCategories = new[] { UnicodeCategory.LetterNumber,
                                            UnicodeCategory.LowercaseLetter,
                                            UnicodeCategory.ModifierLetter,
                                            UnicodeCategory.OtherLetter,
                                            UnicodeCategory.TitlecaseLetter,
                                            UnicodeCategory.UppercaseLetter };

            var RE_IDCHAR = RE.CharsOf(c => lettersCategories.Contains(Char.GetUnicodeCategory(c)));


            var ID = global.DefineToken(RE_IDCHAR.Concat(
                                            (RE_IDCHAR | RE.Range('0', '9')).Many()));
            var NUM        = global.DefineToken(RE.Range('0', '9').Many1());
            var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many());

            var IF   = keywords.DefineToken(RE.Literal("if"));
            var ELSE = keywords.DefineToken(RE.Literal("else"));

            var XMLNS = xml.DefineToken(RE.Literal("xmlns"));

            var scannerInfo = lexicon.CreateScannerInfo();

            scannerInfo.LexerStateIndex = xml.Index;

            Scanner s = new Scanner(scannerInfo);

            string source = "xmlns 你好吗1 123 蘏臦囧綗 ABCD if";

            SourceReader sr = new SourceReader(new StringReader(source));

            s.SetSource(sr);
            s.SetTriviaTokens(WHITESPACE.Index);

            var l1 = s.Read();

            Assert.AreEqual(XMLNS.Index, l1.TokenIndex);

            var l2 = s.Read();

            Assert.AreEqual(ID.Index, l2.TokenIndex);

            var l3 = s.Read();

            Assert.AreEqual(NUM.Index, l3.TokenIndex);

            var l4 = s.Read();

            Assert.AreEqual(ID.Index, l4.TokenIndex);

            var l5 = s.Read();

            Assert.AreEqual(ID.Index, l5.TokenIndex);

            var l6 = s.Read();

            Assert.AreEqual(IF.Index, l6.TokenIndex);
        }
Example #58
0
    public LexerState Next(NFA nf, int ch)
    {
        LexerState l;
        if (dfc.TryGetValue(ch, out l))
            return l;
        l = new LexerState(nf);
        for (int i = 0; i < nstates.Length; i++) {
            int bm = nstates[i];
            for (int j = 0; j < 32; j++) {
                if ((bm & (1 << j)) == 0)
                    continue;
                int ei = 0, eimax = 0;
                var es = nf.EdgesOf(32*i + j, ref ei, ref eimax);
                while (ei != eimax) {
                    var e = es[ei++];
                    if (e.when == ch || e.when == -1 && e.when_cc.Accepts(ch))
                        l.Add(e.to);
                }
            }
        }

        nf.Close(l);
        LexerState cl;

        if (!nf.dfashare.TryGetValue(l, out cl)) {
            nf.dfashare[l] = cl = l;
        }
        dfc[ch] = cl;
        return cl;
    }
Example #59
0
    public void Close(LexerState ls)
    {
        int ngrey = 0;
        for (int i = 0; i < ls.nstates.Length; i++) {
            int bm = ls.nstates[i];
            for (int j = 0; j < 32; j++) {
                if ((bm & (1 << j)) != 0)
                    greybuf[ngrey++] = 32*i + j;
            }
        }

        while (ngrey != 0) {
            int val = greybuf[--ngrey];
            int eix = 0, lix = 0;
            EdgesOf(val, ref eix, ref lix);
            while (eix != lix) {
                Edge e = edges[eix++];
                if (e.when == -2) {
                    int ix = e.to >> 5;
                    int m = 1 << (e.to & 31);
                    if ((ls.nstates[ix] & m) == 0) {
                        ls.nstates[ix] |= m;
                        greybuf[ngrey++] = e.to;
                    }
                }
            }
        }
    }
Example #60
0
 private void UriToOperatorState()
 {
     SkipWhitespace();
     if (Peek(0) == 0)
         return;
     char c = Peek(0);
     if (c == '\'' || c == '"')
     {
         ConsumeLiteral();
         m_state = LexerState.Operator;
     }
 }