public static IEnumerable <Token <LexerTokenType> > LexTokens(Position start, ITextProvider textProvider) { System.Diagnostics.Debug.Assert(textProvider != null); Position pos = start; string text; Range range; // So that the caller's implementation of 'tryGetMoreText' doesn't have to buffer and be // smart about token boundaries, we detect when we're about to yield the last token // in the current buffer, and instead try to get more text and re-lex. Token <LexerTokenType> lastToken = null; while (textProvider.TryGetText(pos, out text, out range)) { if (lastToken != null) { text = string.Concat(lastToken.Value, text); range = new Range(lastToken.Range.Start, range.End); } foreach (Token <LexerTokenType> token in XmlLexer.LexTokens(text, range.Start)) { if (token.Range.End.Offset != range.End.Offset) { lastToken = null; yield return(token); } else { lastToken = token; } } pos = range.End; } // If we fell off the end with a token, make sure the caller gets it! if (lastToken != null) { yield return(lastToken); } }
// Assumes that 'start' is a reasonable place from which to start parsing... that is, // it's not inside a tag. public static IEnumerable <Statement <StatementType, ParserTokenType> > ParseStatements(Position start, ITextProvider textProvider) { TokenBuffer <LexerTokenType> tokenBuffer = new TokenBuffer <LexerTokenType>(XmlLexer.LexTokens(start, textProvider)); XmlTokenInfo parserTokenInfo = new XmlTokenInfo(); while (tokenBuffer.CurrentToken != null) { var statement = new StatementBuilder <StatementType, ParserTokenType, LexerTokenType>(tokenBuffer, parserTokenInfo); switch (tokenBuffer.CurrentTokenType) { case LexerTokenType.Whitespace: case LexerTokenType.Newline: statement.As(StatementType.Ignorable).AcceptAllWhitespace(); break; case LexerTokenType.LeftAngle: // Once we've seen a left angle bracket, it's either a comment or an // object open or close. statement.Expect(LexerTokenType.LeftAngle, ParserTokenType.LeftAngle) .Accept(LexerTokenType.Exclamation, ParserTokenType.Comment, isComment => { return(isComment.As(StatementType.Comment) .Expect(LexerTokenType.DoubleDash, ParserTokenType.Comment) .AggregateWhileNot(LexerTokenType.DoubleDash, ParserTokenType.Comment) .Expect(LexerTokenType.DoubleDash, ParserTokenType.Comment) .Expect(LexerTokenType.RightAngle, ParserTokenType.RightAngle)); }, isNotComment => { return(isNotComment.Accept(LexerTokenType.Question, ParserTokenType.Unknown, isProcessing => { return isProcessing.As(StatementType.Ignorable) .AggregateWhile(l => l != LexerTokenType.Question && l != LexerTokenType.RightAngle, ParserTokenType.Unknown, expectNonEmpty: true) .Expect(LexerTokenType.Question, ParserTokenType.Unknown) .Expect(LexerTokenType.RightAngle, ParserTokenType.RightAngle); }, isNotProcessing => { return isNotProcessing.Accept(LexerTokenType.Slash, ParserTokenType.Slash, isCloseTag => { return isCloseTag.As(StatementType.ObjectEnd) .AcceptAllWhitespace() .ExpectXmlObjectName() .AcceptAllWhitespace() .Expect(LexerTokenType.RightAngle, ParserTokenType.RightAngle); }, isOpenTag => { isOpenTag.As(StatementType.ObjectStart) .AcceptAllWhitespace() .ExpectXmlObjectName() .AcceptAllWhitespace(); while (!isOpenTag.HasError && isOpenTag.TokenBuffer.CurrentTokenType == LexerTokenType.Identifier) { isOpenTag.ExpectXmlPropertyName() .AcceptAllWhitespace() .Expect(LexerTokenType.Equals, ParserTokenType.Equals) .AcceptAllWhitespace() .ExpectXmlPropertyValue(ParserTokenType.PropertyValue) .AcceptAllWhitespace(); } return isOpenTag.Accept(LexerTokenType.Slash, ParserTokenType.Slash, isSelfClose => { // If there's a trailing '/' inside the tag, it's self-closing. return isSelfClose.As(StatementType.Object); }) .Expect(LexerTokenType.RightAngle, ParserTokenType.RightAngle); }); })); }); break; default: // Unknown, unexpected token! statement.As(StatementType.Unknown, ParserTokenType.Unknown) .AggregateWhileNot(LexerTokenType.LeftAngle, ParserTokenType.Unknown); break; } yield return(statement.ToStatement()); } }