// Assumes that 'start' is a reasonable place from which to start parsing... that is, // it's not the second physical line in a logical continuation line. public static IEnumerable <Statement <StatementType, ParserTokenType> > ParseStatements(Position start, ITextProvider textProvider) { TokenBuffer <LexerTokenType> tokenBuffer = new TokenBuffer <LexerTokenType>(RtypeLexer.LexTokens(start, textProvider)); RTypeTokenInfo parserTokenInfo = new RTypeTokenInfo(); while (tokenBuffer.CurrentToken != null) { var statement = new StatementBuilder <StatementType, ParserTokenType, LexerTokenType>(tokenBuffer, parserTokenInfo); // Many statements can start with leading whitespace, we save that off first so that // the switch below is easier... Token <LexerTokenType> leadingWhitespace; tokenBuffer.Accept(LexerTokenType.Whitespace, out leadingWhitespace); switch (tokenBuffer.CurrentTokenType) { case LexerTokenType.Newline: case LexerTokenType.Hash: // comment statement.As(StatementType.Ignorable, ParserTokenType.Whitespace, leadingWhitespace) .ReadToEndOfLine(); break; case LexerTokenType.UseKeyword: // Should we disallow leading whitespace for the use statement? statement.As(StatementType.Use, ParserTokenType.Whitespace, leadingWhitespace) .Expect(LexerTokenType.UseKeyword, ParserTokenType.UseKeyword) .Accept(LexerTokenType.Whitespace, ParserTokenType.Whitespace) .Expect(LexerTokenType.Identifier, ParserTokenType.NamespacePrefixDeclaration) .ReadToEndOfLine(); break; case LexerTokenType.Identifier: // A leading identifier indicates a new message statement.As(StatementType.Object, ParserTokenType.Whitespace, leadingWhitespace) .ExpectRtypeObjectName(); // Now, we allow "name=value" and comments and newlines... we have to be smart about // understanding when it's a continuation line and when it's a new object. We do this by // looking for an "equals" after the identifier. bool keepConsuming = true; bool beginningOfLine = false; bool canHaveDefaultPropertyValue = true; while (keepConsuming && tokenBuffer.CurrentToken != null) { if (beginningOfLine) { // Temporarily eat any whitespace so we can check the text... tokenBuffer.Accept(LexerTokenType.Whitespace, out leadingWhitespace); var looksLikeProperty = tokenBuffer.LooksLikePropertyAssignment(); if (leadingWhitespace != null) { tokenBuffer.PushBack(leadingWhitespace); } if (!looksLikeProperty) { keepConsuming = false; break; } } beginningOfLine = false; // Because whitespace, comments, and newlines can happen often, it's easier // to go ahead and account for them first... statement.AcceptTrailingComment(); if (tokenBuffer.Is(LexerTokenType.Newline)) { statement.Expect(LexerTokenType.Newline, ParserTokenType.Whitespace); beginningOfLine = true; canHaveDefaultPropertyValue = false; continue; } // If it's allowable to have the default property value, we need to check to see if the token after this // one is an equals or not... if it's not, we assume this is an (optionally) quoted value. if (tokenBuffer.CurrentToken != null && canHaveDefaultPropertyValue && !tokenBuffer.LooksLikePropertyAssignment()) { statement.ExpectRtypePropertyValue(ParserTokenType.PropertyValue); canHaveDefaultPropertyValue = false; continue; } while (!statement.HasError && tokenBuffer.CurrentToken != null && !tokenBuffer.Is(LexerTokenType.Newline)) { // Read a "Name=Value" pair... statement.ExpectRtypePropertyName() .Expect(LexerTokenType.Equals, ParserTokenType.Equals) .ExpectRtypePropertyValue(ParserTokenType.PropertyValue) .AcceptTrailingComment(); } if (statement.HasError) { statement.ReadToEndOfLine(); keepConsuming = false; } } break; default: // Unknown, unexpected token! statement.As(StatementType.Unknown, ParserTokenType.Whitespace, leadingWhitespace) .ReadToEndOfLine(); break; } yield return(statement.ToStatement()); } }
public static IEnumerable <Statement <StatementType, ParserTokenType> > ParseStatements(Position start, ITextProvider textProvider) { TokenBuffer <LexerTokenType> tokenBuffer = new TokenBuffer <LexerTokenType>(Lexer.LexTokens(start, textProvider)); MsgsTokenInfo parserTokenInfo = new MsgsTokenInfo(); while (tokenBuffer.CurrentToken != null) { var statement = new StatementBuilder <StatementType, ParserTokenType, LexerTokenType>(tokenBuffer, parserTokenInfo); bool readToEndOfLine = true; switch (tokenBuffer.CurrentTokenType) { case LexerTokenType.Newline: statement.As(StatementType.Ignorable, ParserTokenType.Whitespace); // *Don't* ReadToEndOfLine(), because we've already read it! readToEndOfLine = false; break; case LexerTokenType.Comment: statement.As(StatementType.Ignorable, ParserTokenType.Comment); break; case LexerTokenType.TypeKeyword: statement.As(StatementType.MessageTypeDefiniton, ParserTokenType.TypeKeyword) .Expect(LexerTokenType.Whitespace, ParserTokenType.Whitespace) .Expect(LexerTokenType.Identifier, ParserTokenType.MessageTypeDefinition) .Expect(LexerTokenType.Whitespace, ParserTokenType.Whitespace) .Expect(LexerTokenType.Number, ParserTokenType.MessageTypeRange) .Expect(LexerTokenType.Whitespace, ParserTokenType.Whitespace) .Expect(LexerTokenType.Number, ParserTokenType.MessageTypeRange); break; case LexerTokenType.Identifier: // A leading identifier indicates a new message statement.As(StatementType.Message, ParserTokenType.MessageType) .Expect(LexerTokenType.Whitespace, ParserTokenType.Whitespace) .Expect(LexerTokenType.Identifier, ParserTokenType.MessageName) .Accept(LexerTokenType.Whitespace, ParserTokenType.Whitespace) .Accept(LexerTokenType.Number, ParserTokenType.MessageTypeRange); break; case LexerTokenType.Whitespace: // Whitespace is either a message instance, or blank or a comment... we have to look // ahead to see what it is. Token <LexerTokenType> firstToken; tokenBuffer.Accept(LexerTokenType.Whitespace, out firstToken); if (tokenBuffer.Is(LexerTokenType.Newline) || tokenBuffer.Is(LexerTokenType.Comment)) { statement.As(StatementType.Ignorable, ParserTokenType.Whitespace, firstToken); } else { // Beginning of a message instance... statement.As(StatementType.MessageInstance, ParserTokenType.Whitespace, firstToken); // Loop through the remaining tokens on this line, interpreting the replacements and escapes... while (tokenBuffer.CurrentToken != null && !tokenBuffer.Is(LexerTokenType.Newline) && !tokenBuffer.Is(LexerTokenType.Comment)) { switch (tokenBuffer.CurrentTokenType) { default: // identifiers, numbers, etc. get treated as values case LexerTokenType.Value: statement.AcceptAny(ParserTokenType.Value); break; case LexerTokenType.Escape: statement.Expect(LexerTokenType.Escape, ParserTokenType.Escape); break; case LexerTokenType.LeftBrace: // parse the replacement format... statement.Accept(LexerTokenType.LeftBrace, ParserTokenType.LeftBrace) .Accept(LexerTokenType.LeftBracket, ParserTokenType.LeftBracket, ifBracket => { return(ifBracket.Accept(LexerTokenType.Identifier, ParserTokenType.ReplacementType, ifType => { return ifType.Accept(LexerTokenType.Comma, ParserTokenType.Comma, ifComma => ifComma.Expect(LexerTokenType.Number, ParserTokenType.ReplacementPosition)); }, ifNoType => { return ifNoType.Expect(LexerTokenType.Number, ParserTokenType.ReplacementPosition); }) .Expect(LexerTokenType.RightBracket, ParserTokenType.RightBracket)); }) .Expect(LexerTokenType.Identifier, ParserTokenType.ReplacementName) .Accept(LexerTokenType.Comma, ParserTokenType.Comma, ifComma => ifComma.Expect(LexerTokenType.Number, ParserTokenType.ReplacementAlignment)) .Accept(LexerTokenType.Colon, ParserTokenType.Colon, ifColon => ifColon.AggregateWhileNot(LexerTokenType.RightBrace, ParserTokenType.ReplacementFormat)) .Expect(LexerTokenType.RightBrace, ParserTokenType.RightBrace); break; case LexerTokenType.RightBrace: // needs to be escaped! // Unknown, unexpected token! // Note that we have to double the '}', because this is used in a string.Format call! statement.Unexpected("Unescaped right-brace. This must be escaped (\\}}) for use in the message."); break; } // We re-enable the statement so that we can keep consuming tokens... statement.Enable(); } } break; default: // Unknown, unexpected token! statement.As(StatementType.Unknown, ParserTokenType.Unknown); break; } if (readToEndOfLine) { statement.ReadToEndOfLine(tokenBuffer); } yield return(statement.ToStatement()); } }