private void AdvanceExponent(char test) { char c = Buffer.Peek(); if (char.ToLower(test) == char.ToLower(c)) { Buffer.AdvanceColumn(); c = Buffer.Peek(); if (c == '+' || c == '-') { Buffer.AdvanceColumn(); } Buffer.AdvanceColumnsWhile(SyntaxUtils.IsNumeric); } }
private bool LexUntilCodeEnd(CommandResult commandResult) { // Special case, we dont want to parse doxygen stuff inside a code section // so we wait until a @endcode follows bool isComplete = false; while (!Buffer.IsEOF) { char c0 = Buffer.Peek(); char c1 = Buffer.Peek(1); if ((c0 == '@' || c0 == '\\') && SyntaxUtils.IsIdentStart(c1)) { Buffer.StartLexeme(); Buffer.AdvanceColumn(); Buffer.AdvanceColumnsWhile(SyntaxUtils.IsIdentPart); string ident = Buffer.GetSourceText(Buffer.LexemeStart.Index + 1, Buffer.LexemeWidth - 1); if ("endcode".Equals(ident)) { PushToken(DoxygenTokenPool.Make(DoxygenTokenKind.CommandEnd, Buffer.LexemeRange, true)); isComplete = true; break; } } else if (SyntaxUtils.IsLineBreak(c0)) { int lb = SyntaxUtils.GetLineBreakChars(c0, c1); Buffer.AdvanceLine(lb); } else if ('\t'.Equals(c0)) { Buffer.AdvanceTab(); } else { Buffer.AdvanceColumn(); } } if (!isComplete) { AddError(commandResult.StartPos, $"Unterminated code-block, expect '@endcode' or '\\endcode'", "Code", commandResult.CommandName); return(false); } return(true); }
private LexResult LexIdent(bool isPreprocessor) { Debug.Assert(SyntaxUtils.IsIdentStart(Buffer.Peek())); StringBuilder identBuffer = new StringBuilder(); while (!Buffer.IsEOF) { char c = Buffer.Peek(); if (SyntaxUtils.IsIdentPart(c)) { identBuffer.Append(c); Buffer.AdvanceColumn(); } else { break; } } CppTokenKind kind = CppTokenKind.IdentLiteral; TextPosition identStart = Buffer.LexemeStart; int identLength = Buffer.LexemeWidth; string identString = identBuffer.ToString(); if (isPreprocessor && PreProcessorKeywords.Contains(identString)) { kind = CppTokenKind.PreprocessorKeyword; } else if (ReservedKeywords.Contains(identString)) { kind = CppTokenKind.ReservedKeyword; } else if (TypeKeywords.Contains(identString) || GlobalClassKeywords.Contains(identString)) { kind = CppTokenKind.TypeKeyword; } else { kind = CppTokenKind.IdentLiteral; } return(new LexResult(kind, true)); }
protected override bool LexNext(State state) { do { Buffer.SkipAllWhitespaces(); switch (Buffer.Peek()) { case TextStream.InvalidCharacter: { if (Buffer.IsEOF) { PushToken(HtmlTokenPool.Make(HtmlTokenKind.EOF, new TextRange(Buffer.TextPosition, 0), false)); return(false); } else { Buffer.AdvanceColumn(); } } break; case '<': { LexTag(); return(true); } default: { Buffer.AdvanceColumn(); break; } } } while (!Buffer.IsEOF); PushToken(HtmlTokenPool.Make(HtmlTokenKind.EOF, new TextRange(Buffer.TextPosition, 0), false)); return(false); }
private CommandResult LexCommandTokens() { Debug.Assert(DoxygenSyntax.IsCommandBegin(Buffer.Peek())); // Command Buffer.StartLexeme(); Buffer.AdvanceColumn(); DoxygenTokenKind kind = DoxygenTokenKind.Command; { char first = Buffer.Peek(); switch (first) { case '{': case '}': kind = (first == '{') ? DoxygenTokenKind.GroupStart : DoxygenTokenKind.GroupEnd; Buffer.AdvanceColumn(); break; case '$': case '@': case '\\': case '~': case '<': case '=': case '>': case '#': case '"': Buffer.AdvanceColumn(); break; case ':': case '|': case '-': Buffer.AdvanceColumnsWhile(d => d.Equals(first)); break; default: if (DoxygenSyntax.IsCommandIdentStart(first)) { while (!Buffer.IsEOF) { if (!DoxygenSyntax.IsCommandIdentPart(Buffer.Peek())) { break; } Buffer.AdvanceColumn(); } } break; } } TextPosition commandStart = Buffer.LexemeStart; int commandLen = Buffer.LexemeWidth; string commandName = Buffer.GetSourceText(Buffer.LexemeStart.Index + 1, commandLen - 1); var rule = DoxygenSyntax.GetCommandRule(commandName); if (rule != null) { if (rule.Kind == DoxygenSyntax.CommandKind.StartCommandBlock) { kind = DoxygenTokenKind.CommandStart; } else if (rule.Kind == DoxygenSyntax.CommandKind.EndCommandBlock) { kind = DoxygenTokenKind.CommandEnd; } } else { // @NOTE(final): Group start/end are not a "known" command if (kind != DoxygenTokenKind.GroupStart && kind != DoxygenTokenKind.GroupEnd) { kind = DoxygenTokenKind.InvalidCommand; } } DoxygenToken commandToken = DoxygenTokenPool.Make(kind, Buffer.LexemeRange, true); PushToken(commandToken); CommandResult result = new CommandResult(commandStart, rule, commandName); string typeName = "Command"; if (rule != null) { int argNumber = 0; int argCount = rule.Args.Count(); bool noMoreArgs = false; foreach (var arg in rule.Args) { // @TODO(final): Handle rule repeat type for arguments on same type char first = Buffer.Peek(); if (!arg.Flags.HasFlag(DoxygenSyntax.ArgumentFlags.DirectlyAfterCommand)) { if (SyntaxUtils.IsSpacing(first) || first == '\t') { Buffer.SkipSpacings(TextStream.SkipType.All); } else { // No more arguments are following noMoreArgs = true; } } Buffer.StartLexeme(); // Prefix string prefix = arg.Prefix; string postfix = arg.Postfix; bool hadPrefix = false; if (prefix != null && !noMoreArgs) { if (!string.IsNullOrEmpty(prefix)) { if (Buffer.CompareText(0, prefix) == 0) { Buffer.AdvanceColumns(prefix.Length); hadPrefix = true; } } else if ((prefix.Length == 0) && (!string.IsNullOrEmpty(postfix))) { hadPrefix = true; } } switch (arg.Kind) { case DoxygenSyntax.ArgumentKind.PrefixToPostfix: { if (hadPrefix && !noMoreArgs) { Debug.Assert(!string.IsNullOrEmpty(postfix)); bool foundPrefixToPostfix = false; while (!Buffer.IsEOF) { if (Buffer.CompareText(0, postfix) == 0) { Buffer.AdvanceColumns(postfix.Length); foundPrefixToPostfix = true; break; } else if (SyntaxUtils.IsLineBreak(Buffer.Peek())) { break; } else { Buffer.AdvanceColumn(); } } if (arg.IsOptional || foundPrefixToPostfix) { DoxygenToken argToken = DoxygenTokenPool.Make(DoxygenTokenKind.ArgumentCaption, Buffer.LexemeRange, foundPrefixToPostfix); PushToken(argToken); } else if (arg.IsRequired) { AddError(Buffer.TextPosition, $"Expected postfix '{postfix}' for argument ({argNumber}:{arg}) in command '{commandName}'", typeName, commandName); return(result); } } else if (arg.IsOptional) { DoxygenToken argToken = DoxygenTokenPool.Make(DoxygenTokenKind.ArgumentCaption, Buffer.LexemeRange, false); PushToken(argToken); } else if (arg.IsRequired) { AddError(Buffer.TextPosition, $"Expected prefix '{prefix}' for argument ({argNumber}:{arg}) in command '{commandName}'", typeName, commandName); return(result); } } break; case DoxygenSyntax.ArgumentKind.MultipleObjectReference: case DoxygenSyntax.ArgumentKind.SingleObjectReference: { // @TODO(final): ReferencedObject is not always a identifier // Here are some examples of valid referenced objects: // simple_identifier // a_function() // my::awesome::namespace::object // my::awesome::namespace::function() // my#awesome#namespace#function() // method1,method2(),class#field bool foundRef = false; if (!noMoreArgs) { bool allowMultiple = arg.Kind == DoxygenSyntax.ArgumentKind.MultipleObjectReference; bool requireIdent = true; int referenceCount = 0; while (!Buffer.IsEOF) { int oldPos = Buffer.StreamPosition; char c0 = Buffer.Peek(); char c1 = Buffer.Peek(1); if (!requireIdent) { if (c0 == ':' && c1 == ':') { Buffer.AdvanceColumns(2); requireIdent = true; continue; } else if (c0 == '#') { Buffer.AdvanceColumn(); requireIdent = true; continue; } else if (c0 == ',' && referenceCount > 0 && allowMultiple) { Buffer.AdvanceColumn(); requireIdent = true; continue; } else { // Correct termination of object-reference foundRef = true; break; } } else { if (SyntaxUtils.IsIdentStart(c0)) { requireIdent = false; while (!Buffer.IsEOF) { if (!SyntaxUtils.IsIdentPart(Buffer.Peek())) { break; } Buffer.AdvanceColumn(); } if (Buffer.Peek() == '(') { // Parse until right parent Buffer.AdvanceColumn(); bool terminatedFunc = false; while (!Buffer.IsEOF) { if (Buffer.Peek() == ')') { Buffer.AdvanceColumn(); terminatedFunc = true; break; } Buffer.AdvanceAuto(); } if (!terminatedFunc) { AddError(Buffer.TextPosition, $"Unterminated function reference for argument ({argNumber}:{arg}) in command '{commandName}'", typeName, commandName); return(result); } } ++referenceCount; continue; } else { AddError(Buffer.TextPosition, $"Requires identifier, but found '{Buffer.Peek()}' for argument ({argNumber}:{arg}) in command '{commandName}'", typeName, commandName); return(result); } } } if (Buffer.IsEOF) { // Correct termination of object-reference when stream ends (Single-line) foundRef = true; } } if (arg.IsOptional || foundRef) { DoxygenToken argToken = DoxygenTokenPool.Make(DoxygenTokenKind.ArgumentIdent, Buffer.LexemeRange, foundRef); PushToken(argToken); } else if (arg.IsRequired) { AddError(Buffer.TextPosition, $"Unexpected character '{Buffer.Peek()}' for argument ({argNumber}:{arg}) in command '{commandName}'", typeName, commandName); return(result); } } break; case DoxygenSyntax.ArgumentKind.Identifier: { bool foundIdent = false; // Special handling for @param command and ... parameter if (!noMoreArgs && "param".Equals(commandName) && (arg.Kind == DoxygenSyntax.ArgumentKind.Identifier)) { if (Buffer.Peek() == '.') { char c1 = Buffer.Peek(1); char c2 = Buffer.Peek(2); if (c1 == '.' && c2 == '.') { Buffer.AdvanceColumns(3); foundIdent = true; } } } // We dont allow parsing a ident, when any special handling was matched if (!noMoreArgs && !foundIdent && SyntaxUtils.IsIdentStart(Buffer.Peek())) { foundIdent = true; while (!Buffer.IsEOF) { if (!SyntaxUtils.IsIdentPart(Buffer.Peek())) { break; } Buffer.AdvanceColumn(); } } if (arg.IsOptional || foundIdent) { DoxygenToken argToken = DoxygenTokenPool.Make(DoxygenTokenKind.ArgumentIdent, Buffer.LexemeRange, foundIdent); PushToken(argToken); } else if (arg.IsRequired) { AddError(Buffer.TextPosition, $"Unexpected character '{Buffer.Peek()}' for argument ({argNumber}:{arg}) in command '{commandName}'", typeName, commandName); return(result); } } break; case DoxygenSyntax.ArgumentKind.HeaderFile: case DoxygenSyntax.ArgumentKind.HeaderName: { bool foundFilename = false; if (!noMoreArgs) { bool requiredQuotes = arg.Kind == DoxygenSyntax.ArgumentKind.HeaderName; char curChar = Buffer.Peek(); if (curChar == '<' || curChar == '\"') { char quoteChar = curChar == '<' ? '>' : '\"'; Buffer.AdvanceColumn(); while (!Buffer.IsEOF) { curChar = Buffer.Peek(); if (curChar == quoteChar) { Buffer.AdvanceColumn(); foundFilename = true; break; } else if (SyntaxUtils.IsLineBreak(curChar)) { break; } Buffer.AdvanceColumn(); } if (!foundFilename) { AddError(Buffer.TextPosition, $"Unterminated filename, expect quote char '{quoteChar}' but got '{Buffer.Peek()}' for argument ({argNumber}:{arg}) in command '{commandName}'", typeName, commandName); return(result); } } else if (!requiredQuotes) { if (SyntaxUtils.IsFilename(Buffer.Peek())) { foundFilename = true; while (!Buffer.IsEOF) { if (!SyntaxUtils.IsFilename(Buffer.Peek())) { break; } Buffer.AdvanceColumn(); } } } } if (arg.IsOptional || foundFilename) { DoxygenToken argToken = DoxygenTokenPool.Make(DoxygenTokenKind.ArgumentFile, Buffer.LexemeRange, foundFilename); PushToken(argToken); } else if (arg.IsRequired) { AddError(Buffer.TextPosition, $"Unexpected character '{Buffer.Peek()}' for argument ({argNumber}:{arg}) in command '{commandName}'", typeName, commandName); return(result); } } break; case DoxygenSyntax.ArgumentKind.SingleWord: { // @TODO(final): IsWordStart() bool foundWord = false; if (!noMoreArgs && char.IsLetterOrDigit(Buffer.Peek())) { foundWord = true; while (!Buffer.IsEOF) { // @TODO(final): IsWordPart() if (char.IsWhiteSpace(Buffer.Peek())) { break; } Buffer.AdvanceColumn(); } } if (arg.IsOptional || foundWord) { DoxygenToken argToken = DoxygenTokenPool.Make(DoxygenTokenKind.ArgumentCaption, Buffer.LexemeRange, foundWord); PushToken(argToken); } else if (arg.IsRequired) { AddError(Buffer.TextPosition, $"Unexpected character '{Buffer.Peek()}' for argument ({argNumber}:{arg}) in command '{commandName}'", typeName, commandName); return(result); } } break; case DoxygenSyntax.ArgumentKind.QuotedString: { bool isComplete = false; // @TODO(final): Make quotes configurable in the argument rule bool hasQuote = Buffer.Peek() == '"' || Buffer.Peek() == '<'; char endQuote = char.MaxValue; if (hasQuote && !noMoreArgs) { endQuote = Buffer.Peek() == '<' ? '>' : '"'; Buffer.AdvanceColumn(); while (!Buffer.IsEOF) { if (!hasQuote) { if (char.IsWhiteSpace(Buffer.Peek())) { break; } } else { if (Buffer.Peek() == endQuote) { Buffer.AdvanceColumn(); isComplete = true; break; } else if (SyntaxUtils.IsLineBreak(Buffer.Peek()) || Buffer.Peek() == TextStream.InvalidCharacter) { break; } } Buffer.AdvanceColumn(); } if (!isComplete) { AddError(Buffer.TextPosition, $"Unterminated quote string for argument ({argNumber}:{arg}) in command '{commandName}'", typeName, commandName); return(result); } } if (arg.IsOptional || isComplete) { DoxygenToken argToken = DoxygenTokenPool.Make(DoxygenTokenKind.ArgumentText, Buffer.LexemeRange, isComplete); PushToken(argToken); } else if (arg.IsRequired) { AddError(Buffer.TextPosition, $"Unexpected character '{Buffer.Peek()}' for argument ({argNumber}:{arg}) in command '{commandName}'", typeName, commandName); return(result); } } break; case DoxygenSyntax.ArgumentKind.UntilEndOfLine: { bool eolFound = false; if (!noMoreArgs) { while (!Buffer.IsEOF) { if (SyntaxUtils.IsLineBreak(Buffer.Peek())) { eolFound = true; break; } Buffer.AdvanceColumn(); } if (Buffer.IsEOF) { eolFound = true; } } if (arg.IsOptional || eolFound) { DoxygenToken argToken = DoxygenTokenPool.Make(DoxygenTokenKind.ArgumentText, Buffer.LexemeRange, true); PushToken(argToken); } else if (arg.IsRequired) { AddError(Buffer.TextPosition, $"Unterminated end-of-line for argument ({argNumber}:{arg}) in command '{commandName}'", typeName, commandName); return(result); } } break; case DoxygenSyntax.ArgumentKind.ComplexLine: case DoxygenSyntax.ArgumentKind.ComplexBlock: // @TODO(final): Implement complex line/block properly goto CommandDone; default: AddError(Buffer.TextPosition, $"Unsupported argument ({argNumber}:{arg}) in command '{commandName}'", typeName, commandName); return(result); } // Postfix if (!noMoreArgs && (hadPrefix && !string.IsNullOrWhiteSpace(postfix) && arg.Kind != DoxygenSyntax.ArgumentKind.PrefixToPostfix)) { if (Buffer.CompareText(0, postfix) == 0) { Buffer.AdvanceColumns(prefix.Length); } else { AddError(Buffer.TextPosition, $"Expected postfix '{postfix}' for pp-argument({argNumber}:{arg}) in command '{commandName}'", typeName, commandName); return(result); } } ++argNumber; } } CommandDone: result.IsValid = true; return(result); }
protected override bool LexNext(State hiddenState) { DoxygenState state = (DoxygenState)hiddenState; state.Flags = StateFlags.None; state.CurrentLineStartIndex = Buffer.StreamPosition; do { char first = Buffer.Peek(); char second = Buffer.Peek(1); char third = Buffer.Peek(2); switch (first) { case ' ': case '\v': case '\f': case '\t': Buffer.SkipSpacings(TextStream.SkipType.All); break; case '\r': case '\n': { if (state.Flags.HasFlag(StateFlags.InsideBlock) && state.Flags.HasFlag(StateFlags.SingleLine)) { Done(state); return(true); } // @NOTE(final): Detect if our line content until the line break was empty Debug.Assert(Buffer.StreamPosition >= state.CurrentLineStartIndex); int len = Buffer.StreamPosition - state.CurrentLineStartIndex; bool wasEmptyLine = Buffer.MatchCharacters(state.CurrentLineStartIndex, len, char.IsWhiteSpace) || (len == 0); Buffer.StartLexeme(); Buffer.SkipLineBreaks(TextStream.SkipType.Single); state.CurrentLineStartIndex = Buffer.StreamPosition; PushToken(DoxygenTokenPool.Make(wasEmptyLine ? DoxygenTokenKind.EmptyLine : DoxygenTokenKind.EndOfLine, Buffer.LexemeRange, true)); } break; case '/': { if (second == '*') { // Multi line if (DoxygenSyntax.MultiLineDocChars.Contains(third)) { Debug.Assert(!state.Flags.HasFlag(StateFlags.InsideBlock)); Buffer.StartLexeme(); Buffer.AdvanceColumns(3); state.Flags = StateFlags.InsideBlock; if (third == '*') { char n3 = Buffer.Peek(); if (n3 == '/') { Buffer.AdvanceColumn(); return(PushToken(DoxygenTokenPool.Make(DoxygenTokenKind.DoxyBlockStartMulti, Buffer.LexemeRange, true))); } state.Flags |= StateFlags.JavaDoc; } PushToken(DoxygenTokenPool.Make(DoxygenTokenKind.DoxyBlockStartMulti, Buffer.LexemeRange, true)); StartText(state); continue; } else { // Just skip until normal multi-line comment ends var r = CppLexer.LexMultiLineComment(Buffer, true); if (!r.IsComplete) { AddError(Buffer.TextPosition, $"Unterminated multi-line comment, expect '*/' but got EOF", r.Kind.ToString()); return(false); } continue; } } else if (second == '/') { // Single line char n2 = Buffer.Peek(2); if (DoxygenSyntax.SingleLineDocChars.Contains(n2)) { Debug.Assert(!state.Flags.HasFlag(StateFlags.InsideBlock)); Buffer.StartLexeme(); Buffer.AdvanceColumns(3); state.Flags = StateFlags.InsideBlock | StateFlags.SingleLine; PushToken(DoxygenTokenPool.Make(DoxygenTokenKind.DoxyBlockStartSingle, Buffer.LexemeRange, true)); StartText(state); continue; } else { // Just skip until normal single-line comment ends var r = CppLexer.LexSingleLineComment(Buffer, true); if (!r.IsComplete) { AddError(Buffer.TextPosition, $"Unterminated single-line comment, expect linebreak but got EOF", r.Kind.ToString()); return(false); } continue; } } else { Buffer.AdvanceColumn(); } } break; case '*': { if (state.Flags.HasFlag(StateFlags.InsideBlock)) { if (second == '/') { EndText(state); Buffer.StartLexeme(); Buffer.AdvanceColumns(2); state.Flags = StateFlags.None; return(PushToken(DoxygenTokenPool.Make(DoxygenTokenKind.DoxyBlockEnd, Buffer.LexemeRange, true))); } else if (state.Flags.HasFlag(StateFlags.JavaDoc)) { // Push single star token (java doc style) EndText(state); Buffer.StartLexeme(); Buffer.AdvanceColumn(); PushToken(DoxygenTokenPool.Make(DoxygenTokenKind.DoxyBlockChars, Buffer.LexemeRange, true)); StartText(state); state.CurrentLineStartIndex = Buffer.StreamPosition; continue; } } Buffer.AdvanceColumn(); } break; case '@': case '\\': { if (state.Flags.HasFlag(StateFlags.InsideBlock)) { EndText(state); var commandResult = LexCommandTokens(); if (commandResult.IsValid) { if ("code".Equals(commandResult.CommandName)) { if (!LexUntilCodeEnd(commandResult)) { return(false); } } } StartText(state); } else { Buffer.AdvanceColumn(); } } break; case TextStream.InvalidCharacter: { if (Buffer.IsEOF) { Done(state); PushToken(DoxygenTokenPool.Make(DoxygenTokenKind.EOF, new TextRange(Buffer.TextPosition, 0), false)); return(false); } else { Buffer.AdvanceColumn(); } } break; default: { Buffer.AdvanceColumn(); break; } } } while (!Buffer.IsEOF); Done(state); PushToken(DoxygenTokenPool.Make(DoxygenTokenKind.EOF, new TextRange(Buffer.TextPosition, 0), false)); return(false); }
protected override bool LexNext(State hiddenState) { CppLexerState state = (CppLexerState)hiddenState; bool allowWhitespaces = !state.IsInsidePreprocessor; if (allowWhitespaces) { Buffer.SkipAllWhitespaces(); } if (Buffer.IsEOF) { return(false); } int line = Buffer.TextPosition.Line; char first = Buffer.Peek(); char second = Buffer.Peek(1); char third = Buffer.Peek(2); Buffer.StartLexeme(); LexResult lexRes = new LexResult(CppTokenKind.Unknown, true); switch (first) { case '&': { if (second == '&') { lexRes.Kind = CppTokenKind.LogicalAndOp; Buffer.AdvanceColumns(2); } else if (second == '=') { lexRes.Kind = CppTokenKind.AndAssign; Buffer.AdvanceColumns(2); } else { lexRes.Kind = CppTokenKind.AndOp; Buffer.AdvanceColumn(); } } break; case '|': { if (second == '|') { lexRes.Kind = CppTokenKind.LogicalOrOp; Buffer.AdvanceColumns(2); } else if (second == '=') { lexRes.Kind = CppTokenKind.OrAssign; Buffer.AdvanceColumns(2); } else { lexRes.Kind = CppTokenKind.OrOp; Buffer.AdvanceColumn(); } } break; case '=': { if (second == '=') { lexRes.Kind = CppTokenKind.LogicalEqualsOp; Buffer.AdvanceColumns(2); } else { lexRes.Kind = CppTokenKind.EqOp; Buffer.AdvanceColumn(); } } break; case '!': { if (second == '=') { lexRes.Kind = CppTokenKind.LogicalNotEqualsOp; Buffer.AdvanceColumns(2); } else { lexRes.Kind = CppTokenKind.ExclationMark; Buffer.AdvanceColumn(); } } break; case '<': { if (second == '<') { if (third == '=') { lexRes.Kind = CppTokenKind.LeftShiftAssign; Buffer.AdvanceColumns(3); } else { lexRes.Kind = CppTokenKind.LeftShiftOp; Buffer.AdvanceColumns(2); } } else if (second == '=') { lexRes.Kind = CppTokenKind.LessOrEqualOp; Buffer.AdvanceColumns(2); } else { lexRes.Kind = CppTokenKind.LessThanOp; Buffer.AdvanceColumn(); } } break; case '>': { if (second == '>') { if (third == '=') { lexRes.Kind = CppTokenKind.RightShiftAssign; Buffer.AdvanceColumns(3); } else { lexRes.Kind = CppTokenKind.RightShiftOp; Buffer.AdvanceColumns(2); } } else if (second == '=') { lexRes.Kind = CppTokenKind.GreaterOrEqualOp; Buffer.AdvanceColumns(2); } else { lexRes.Kind = CppTokenKind.GreaterThanOp; Buffer.AdvanceColumn(); } } break; case '+': { if (second == '+') { lexRes.Kind = CppTokenKind.IncOp; Buffer.AdvanceColumns(2); } else if (second == '=') { lexRes.Kind = CppTokenKind.AddAssign; Buffer.AdvanceColumns(2); } else { lexRes.Kind = CppTokenKind.AddOp; Buffer.AdvanceColumn(); } } break; case '-': { if (second == '-') { lexRes.Kind = CppTokenKind.DecOp; Buffer.AdvanceColumns(2); } else if (second == '=') { lexRes.Kind = CppTokenKind.SubAssign; Buffer.AdvanceColumns(2); } else if (second == '>') { lexRes.Kind = CppTokenKind.PtrOp; Buffer.AdvanceColumns(2); } else { lexRes.Kind = CppTokenKind.SubOp; Buffer.AdvanceColumn(); } } break; case '/': { if (second == '=') { lexRes.Kind = CppTokenKind.DivAssign; Buffer.AdvanceColumns(2); } else if (second == '/') { lexRes = LexSingleLineComment(Buffer, true); if (!lexRes.IsComplete) { AddError(Buffer.LexemeStart, $"Unterminated single-line comment, expect '\n' or '\r' but found '{Buffer.Peek()}'", lexRes.Kind.ToString()); } } else if (second == '*') { lexRes = LexMultiLineComment(Buffer, true); if (!lexRes.IsComplete) { AddError(Buffer.LexemeStart, $"Unterminated single-line comment, expect '*/' but found '{Buffer.Peek()}'", lexRes.Kind.ToString()); } } else { Buffer.AdvanceColumn(); lexRes.Kind = CppTokenKind.DivOp; } } break; case '*': { if (second == '=') { lexRes.Kind = CppTokenKind.MulAssign; Buffer.AdvanceColumns(2); } else { lexRes.Kind = CppTokenKind.MulOp; Buffer.AdvanceColumn(); } } break; case '%': { if (second == '=') { lexRes.Kind = CppTokenKind.ModAssign; Buffer.AdvanceColumns(2); } else { lexRes.Kind = CppTokenKind.ModOp; Buffer.AdvanceColumn(); } } break; case '.': { if (second == '.' && third == '.') { lexRes.Kind = CppTokenKind.Ellipsis; Buffer.AdvanceColumns(3); } else if (SyntaxUtils.IsNumeric(second)) { lexRes = LexNumber(); } else { lexRes.Kind = CppTokenKind.Dot; Buffer.AdvanceColumn(); } } break; case '^': { if (second == '=') { lexRes.Kind = CppTokenKind.XorAssign; Buffer.AdvanceColumns(2); } else { lexRes.Kind = CppTokenKind.XorOp; Buffer.AdvanceColumn(); } } break; case '#': return(LexPreprocessor(state)); case '"': lexRes = LexString("string"); break; case '\'': lexRes = LexString("char"); break; case '~': lexRes.Kind = CppTokenKind.Tilde; Buffer.AdvanceColumn(); break; case '\\': lexRes.Kind = CppTokenKind.Backslash; Buffer.AdvanceColumn(); break; case ',': lexRes.Kind = CppTokenKind.Comma; Buffer.AdvanceColumn(); break; case ';': lexRes.Kind = CppTokenKind.Semicolon; Buffer.AdvanceColumn(); break; case ':': lexRes.Kind = CppTokenKind.Colon; Buffer.AdvanceColumn(); break; case '?': lexRes.Kind = CppTokenKind.QuestionMark; Buffer.AdvanceColumn(); break; case '{': lexRes.Kind = CppTokenKind.LeftBrace; Buffer.AdvanceColumn(); break; case '}': lexRes.Kind = CppTokenKind.RightBrace; Buffer.AdvanceColumn(); break; case '[': lexRes.Kind = CppTokenKind.LeftBracket; Buffer.AdvanceColumn(); break; case ']': lexRes.Kind = CppTokenKind.RightBracket; Buffer.AdvanceColumn(); break; case '(': lexRes.Kind = CppTokenKind.LeftParen; Buffer.AdvanceColumn(); break; case ')': lexRes.Kind = CppTokenKind.RightParen; Buffer.AdvanceColumn(); break; default: { if (SyntaxUtils.IsLineBreak(first) && allowWhitespaces) { lexRes.Kind = CppTokenKind.EndOfLine; int nb = SyntaxUtils.GetLineBreakChars(first, second); Buffer.AdvanceLine(nb); } else if (first == '\t' && allowWhitespaces) { lexRes.Kind = CppTokenKind.Spacings; while (!Buffer.IsEOF) { if (Buffer.Peek() != '\t') { break; } Buffer.AdvanceTab(); } } else if (SyntaxUtils.IsSpacing(first) && allowWhitespaces) { lexRes.Kind = CppTokenKind.Spacings; Buffer.AdvanceColumnsWhile(SyntaxUtils.IsSpacing); } else if (SyntaxUtils.IsIdentStart(first)) { Debug.Assert(!state.IsInsidePreprocessor); lexRes = LexIdent(false); } else if (SyntaxUtils.IsNumeric(first)) { lexRes = LexNumber(); } else { AddError(Buffer.TextPosition, $"Unexpected character '{first}'", "Character"); return(false); } } break; } return(PushToken(CppTokenPool.Make(lexRes.Kind, Buffer.LexemeRange, lexRes.IsComplete))); }
private bool LexPreprocessor(CppLexerState state) { Debug.Assert(Buffer.Peek() == '#'); state.StartPreprocessor(); // Preprocessor start Buffer.StartLexeme(); Buffer.AdvanceColumn(); PushToken(CppTokenPool.Make(CppTokenKind.PreprocessorStart, Buffer.LexemeRange, true)); do { Buffer.SkipSpacings(TextStream.SkipType.All); Buffer.StartLexeme(); char first = Buffer.Peek(); char second = Buffer.Peek(1); char third = Buffer.Peek(2); if (first == '\\') { if (SyntaxUtils.IsLineBreak(second)) { Buffer.AdvanceColumn(); int lb = SyntaxUtils.GetLineBreakChars(second, third); Buffer.AdvanceLine(lb); continue; } else { AddError(Buffer.TextPosition, $"Unterminated preprocessor next-line, expect linebreak after '\' but got '{second}'", "Preprocessor"); return(false); } } else if (first == '#') { Buffer.AdvanceColumn(); PushToken(CppTokenPool.Make(CppTokenKind.PreprocessorOperator, Buffer.LexemeRange, true)); } else if (SyntaxUtils.IsLineBreak(first)) { int lb = SyntaxUtils.GetLineBreakChars(first, second); Buffer.AdvanceLine(lb); PushToken(CppTokenPool.Make(CppTokenKind.EndOfLine, Buffer.LexemeRange, true)); break; } else if (SyntaxUtils.IsIdentStart(first)) { LexResult identResult = LexIdent(true); CppToken identToken = CppTokenPool.Make(identResult.Kind, Buffer.LexemeRange, identResult.IsComplete); PushToken(identToken); Buffer.SkipSpacings(TextStream.SkipType.All); Buffer.StartLexeme(); if (identToken.Kind == CppTokenKind.PreprocessorKeyword) { switch (identToken.Value) { case "define": { if (!SyntaxUtils.IsIdentStart(Buffer.Peek())) { AddError(Buffer.TextPosition, $"Expect identifier for define, but got '{Buffer.Peek()}'", "Preprocessor"); return(false); } LexResult defineValueResult = LexIdent(false); CppToken defineValueToken = CppTokenPool.Make(CppTokenKind.PreprocessorDefineSource, Buffer.LexemeRange, defineValueResult.IsComplete); PushToken(defineValueToken); } break; case "defined": { if (Buffer.Peek() == '(') { Buffer.AdvanceColumn(); if (!SyntaxUtils.IsIdentStart(Buffer.Peek())) { AddError(Buffer.TextPosition, $"Expect identifier for defined, but got '{Buffer.Peek()}'", "Preprocessor"); return(false); } LexResult definedValueResult = LexIdent(false); CppToken definedValueToken = CppTokenPool.Make(CppTokenKind.PreprocessorDefineTarget, Buffer.LexemeRange, definedValueResult.IsComplete); PushToken(definedValueToken); Buffer.SkipSpacings(TextStream.SkipType.All); if (Buffer.Peek() != ')') { AddError(Buffer.TextPosition, $"Unterminated defined token, expect ')' but got '{Buffer.Peek()}'", "Preprocessor"); return(false); } } } break; case "include": { char n = Buffer.Peek(); if (n == '<' || n == '"') { bool isComplete = false; Buffer.AdvanceColumn(); char quote = (n == '<') ? '>' : n; while (!Buffer.IsEOF) { if (Buffer.Peek() == quote) { isComplete = true; Buffer.AdvanceColumn(); break; } Buffer.AdvanceColumn(); } CppToken includeToken = CppTokenPool.Make(CppTokenKind.PreprocessorInclude, Buffer.LexemeRange, isComplete); PushToken(includeToken); } else { return(false); } } break; case "pragma": { // @NOTE(final): Just skip until end-of-line while (!Buffer.IsEOF) { char c = Buffer.Peek(); if (SyntaxUtils.IsLineBreak(c)) { break; } if (c == '\t') { Buffer.AdvanceTab(); } else { Buffer.AdvanceColumn(); } } } break; } } } else { if (!LexNext(state)) { break; } } } while (!Buffer.IsEOF); state.EndPreprocessor(); PushToken(CppTokenPool.Make(CppTokenKind.PreprocessorEnd, new TextRange(Buffer.TextPosition, 0), true)); return(true); }
private LexResult LexNumber() { Debug.Assert(SyntaxUtils.IsNumeric(Buffer.Peek()) || Buffer.Peek() == '.'); CppTokenKind kind; char first = Buffer.Peek(0); char second = Buffer.Peek(1); bool dotSeen = false; if (first == '0') { if (second == 'x' || second == 'X') { // Hex kind = CppTokenKind.HexLiteral; Buffer.AdvanceColumns(2); // Skip 0[xX] } else if (second == 'b' || second == 'B') { // Binary kind = CppTokenKind.BinaryLiteral; Buffer.AdvanceColumns(2); // Skip 0[bB] } else { // Octal kind = CppTokenKind.OctalLiteral; } } else if (first == '.') { Debug.Assert(SyntaxUtils.IsNumeric(second)); kind = CppTokenKind.IntegerFloatLiteral; Buffer.AdvanceColumn(); dotSeen = true; } else { Debug.Assert(SyntaxUtils.IsNumeric(first)); kind = CppTokenKind.IntegerLiteral; } // @NOTE(final): We never set the DecimalHexLiteral kind initially, // as every hex decimal always starts as a normal hex literal! Debug.Assert(kind != CppTokenKind.HexadecimalFloatLiteral); // First number part int firstLiteralPos = Buffer.TextPosition.Index; bool readNextLiteral = false; do { readNextLiteral = false; int s = Buffer.TextPosition.Index; switch (kind) { case CppTokenKind.IntegerLiteral: case CppTokenKind.IntegerFloatLiteral: if (SyntaxUtils.IsNumeric(Buffer.Peek())) { Buffer.AdvanceColumnsWhile(SyntaxUtils.IsNumeric); } else { AddError(Buffer.TextPosition, $"Expect integer literal, but got '{Buffer.Peek()}'", kind.ToString()); } break; case CppTokenKind.OctalLiteral: if (SyntaxUtils.IsOctal(Buffer.Peek())) { Buffer.AdvanceColumnsWhile(SyntaxUtils.IsOctal); } else { AddError(Buffer.TextPosition, $"Expect octal literal, but got '{Buffer.Peek()}'", kind.ToString()); } break; case CppTokenKind.HexLiteral: if (SyntaxUtils.IsHex(Buffer.Peek())) { Buffer.AdvanceColumnsWhile(SyntaxUtils.IsHex); } else { AddError(Buffer.TextPosition, $"Expect hex literal, but got '{Buffer.Peek()}'", kind.ToString()); } break; case CppTokenKind.BinaryLiteral: if (SyntaxUtils.IsBinary(Buffer.Peek())) { Buffer.AdvanceColumnsWhile(SyntaxUtils.IsBinary); } else { AddError(Buffer.TextPosition, $"Expect binary literal, but got '{Buffer.Peek()}'", kind.ToString()); } break; default: AddError(Buffer.TextPosition, $"Unsupported token kind '{kind}' for integer literal on {Buffer}", kind.ToString()); break; } bool hadIntegerLiteral = Buffer.TextPosition.Index > s; if (kind != CppTokenKind.IntegerFloatLiteral && kind != CppTokenKind.HexadecimalFloatLiteral) { // @NOTE(final): Single quotes (') are allowed as separators for any non-decimal literal char check0 = Buffer.Peek(); if (check0 == '\'') { if (!hadIntegerLiteral) { AddError(Buffer.TextPosition, $"Too many single quote escape in integer literal, expect any integer literal but got '{Buffer.Peek()}'", kind.ToString()); return(new LexResult(kind, false)); } Buffer.AdvanceColumn(); readNextLiteral = true; } } } while (!Buffer.IsEOF && readNextLiteral); // Validate any literal after starting dot if (dotSeen) { if (firstLiteralPos == Buffer.TextPosition.Index) { AddError(Buffer.TextPosition, $"Expect any integer literal after starting dot, but got '{Buffer.Peek()}'", kind.ToString()); return(new LexResult(kind, false)); } } // Dot separator if ((!dotSeen) && ((kind == CppTokenKind.IntegerLiteral) || (kind == CppTokenKind.HexLiteral) || (kind == CppTokenKind.OctalLiteral) )) { char check0 = Buffer.Peek(); if (check0 == '.') { dotSeen = true; Buffer.AdvanceColumn(); if (kind == CppTokenKind.IntegerLiteral || kind == CppTokenKind.OctalLiteral) { kind = CppTokenKind.IntegerFloatLiteral; } else { Debug.Assert(kind == CppTokenKind.HexLiteral); kind = CppTokenKind.HexadecimalFloatLiteral; } } else if (SyntaxUtils.IsExponentPrefix(check0)) { if (kind == CppTokenKind.IntegerLiteral || kind == CppTokenKind.OctalLiteral) { kind = CppTokenKind.IntegerFloatLiteral; } else { Debug.Assert(kind == CppTokenKind.HexLiteral); kind = CppTokenKind.HexadecimalFloatLiteral; } } } // Decimal after dot separator if ((kind != CppTokenKind.IntegerFloatLiteral) && (kind != CppTokenKind.HexadecimalFloatLiteral)) { // Integer suffix if (SyntaxUtils.IsIntegerSuffix(Buffer.Peek())) { Buffer.AdvanceColumnsWhile(SyntaxUtils.IsIntegerSuffix, 3); } } else { if (kind == CppTokenKind.IntegerFloatLiteral) { // Float decimal if (SyntaxUtils.IsNumeric(Buffer.Peek())) { Buffer.AdvanceColumnsWhile(SyntaxUtils.IsNumeric); } if (Buffer.Peek() == 'e' || Buffer.Peek() == 'E') { AdvanceExponent('e'); } } else { // Hex decimal Debug.Assert(kind == CppTokenKind.HexadecimalFloatLiteral); if (SyntaxUtils.IsHex(Buffer.Peek())) { Buffer.AdvanceColumnsWhile(SyntaxUtils.IsHex); } if (Buffer.Peek() == 'p' || Buffer.Peek() == 'P') { AdvanceExponent('e'); } } // Float suffix if (SyntaxUtils.IsFloatSuffix(Buffer.Peek())) { Buffer.AdvanceColumn(); } } return(new LexResult(kind, true)); }
private LexResult LexString(string typeName) { Debug.Assert(Buffer.Peek(0) == '"' || Buffer.Peek(0) == '\''); char quoteChar = Buffer.Peek(); Buffer.AdvanceColumn(); bool isComplete = false; CppTokenKind kind = quoteChar == '\'' ? CppTokenKind.CharLiteral : CppTokenKind.StringLiteral; int maxCount = (kind == CppTokenKind.CharLiteral) ? 1 : -1; int minCount = (kind == CppTokenKind.CharLiteral) ? 1 : 0; int count = 0; while (!Buffer.IsEOF) { char first = Buffer.Peek(); char second = Buffer.Peek(1); if (first == quoteChar) { isComplete = true; break; } else if (first == '\\') { switch (second) { case '\'': case '"': case '?': case '\\': case 'a': case 'b': case 'f': case 'n': case 'e': case 'r': case 't': case 'v': { Buffer.AdvanceColumns(2); ++count; continue; } case 'x': case 'X': case 'u': case 'U': { Buffer.AdvanceColumns(2); if (SyntaxUtils.IsHex(Buffer.Peek())) { int len = 0; while (!Buffer.IsEOF) { if (!SyntaxUtils.IsHex(Buffer.Peek())) { break; } else { ++len; Buffer.AdvanceColumn(); } } } else { AddError(Buffer.TextPosition, $"Unsupported hex escape character '{Buffer.Peek()}'!", typeName); break; } ++count; continue; } default: if (SyntaxUtils.IsOctal(second)) { Buffer.AdvanceColumn(); while (!Buffer.IsEOF) { if (!SyntaxUtils.IsOctal(Buffer.Peek())) { break; } else { Buffer.AdvanceColumn(); } } ++count; continue; } else { AddError(Buffer.TextPosition, $"Not supported escape character '{Buffer.Peek()}'!", typeName); break; } } } else if (SyntaxUtils.IsLineBreak(first)) { break; } else if (char.IsWhiteSpace(first)) { Buffer.AdvanceManual(first, second); } else { Buffer.AdvanceColumn(); } ++count; } // Skip over quote char if (isComplete) { Debug.Assert(Buffer.Peek() == quoteChar); Buffer.AdvanceColumn(); } if (!isComplete) { AddError(Buffer.LexemeStart, $"Unterminated {typeName} literal!", typeName); } else { if (minCount > 0 && count < minCount) { AddError(Buffer.LexemeStart, $"Not enough characters for {typeName} literal, expect {minCount} but got {count}!", typeName); } else if (maxCount > -1 && (count > maxCount)) { AddError(Buffer.LexemeStart, $"Too many characters for {typeName} literal, expect {maxCount} but got {count}!", typeName); } } return(new LexResult(kind, isComplete)); }
private void LexTag() { Debug.Assert(Buffer.Peek() == '<'); Buffer.StartLexeme(); HtmlToken startTagToken = HtmlTokenPool.Make(HtmlTokenKind.MetaTagStart, Buffer.LexemeRange, false); PushToken(startTagToken); bool allowAttributes = true; Buffer.AdvanceColumn(); if (Buffer.Peek() == '/') { startTagToken.ChangeKind(HtmlTokenKind.MetaTagClose); Buffer.AdvanceColumn(); allowAttributes = false; } PushToken(HtmlTokenPool.Make(HtmlTokenKind.TagChars, Buffer.LexemeRange, true)); if (SyntaxUtils.IsIdentStart(Buffer.Peek())) { Buffer.StartLexeme(); while (!Buffer.IsEOF) { if (SyntaxUtils.IsIdentPart(Buffer.Peek())) { Buffer.AdvanceColumn(); } else { break; } } PushToken(HtmlTokenPool.Make(HtmlTokenKind.TagName, Buffer.LexemeRange, true)); } if (allowAttributes) { while (!Buffer.IsEOF) { Buffer.SkipAllWhitespaces(); char c = Buffer.Peek(); if (!SyntaxUtils.IsIdentStart(c)) { break; } else { Buffer.StartLexeme(); while (!Buffer.IsEOF) { if (SyntaxUtils.IsIdentPart(Buffer.Peek())) { Buffer.AdvanceColumn(); } else { break; } } PushToken(HtmlTokenPool.Make(HtmlTokenKind.AttrName, Buffer.LexemeRange, true)); Buffer.SkipAllWhitespaces(); // Allow whitespaces before = if (Buffer.Peek() == '=') { Buffer.StartLexeme(); Buffer.AdvanceColumn(); PushToken(HtmlTokenPool.Make(HtmlTokenKind.AttrChars, Buffer.LexemeRange, true)); Buffer.SkipAllWhitespaces(); // Allow whitespaces after = if (Buffer.Peek() == '"' || Buffer.Peek() == '\'') { char quote = Buffer.Peek(); Buffer.StartLexeme(); Buffer.AdvanceColumn(); while (!Buffer.IsEOF) { char attrC = Buffer.Peek(); if (attrC != quote && attrC != '\n') { Buffer.AdvanceColumn(); } else { break; } } if (Buffer.Peek() == quote) { Buffer.AdvanceColumn(); } PushToken(HtmlTokenPool.Make(HtmlTokenKind.AttrValue, Buffer.LexemeRange, true)); } } else { break; } } } } Buffer.SkipAllWhitespaces(); // Allow whitespaces before / if (Buffer.Peek() == '/') { startTagToken.ChangeKind(HtmlTokenKind.MetaTagStartAndClose); Buffer.AdvanceColumn(); Buffer.SkipAllWhitespaces(); // Allow whitespaces after / } Buffer.SkipUntil('>'); if (Buffer.Peek() == '>') { Buffer.StartLexeme(); Buffer.AdvanceColumn(); PushToken(HtmlTokenPool.Make(HtmlTokenKind.TagChars, Buffer.LexemeRange, true)); } int tagLength = Buffer.StreamPosition - startTagToken.Index; startTagToken.ChangeLength(tagLength); }