/// <summary> /// Advance until the first non-whitespace character is encountered. /// </summary> /// <param name="span">The span to advance from.</param> /// <returns>A result with the first non-whitespace character.</returns> protected static Result<char> SkipWhiteSpace(TextSpan span) { var next = span.ConsumeChar(); while (next.HasValue && char.IsWhiteSpace(next.Value)) { next = next.Remainder.ConsumeChar(); } return next; }
static Result <char> SkipInsignificant(TextSpan span) { var result = span.ConsumeChar(); while (result.HasValue && result.Value != '\n' && char.IsWhiteSpace(result.Value)) { result = result.Remainder.ConsumeChar(); } return(result); }
protected override IEnumerable <Result <CspTokenType> > Tokenize(TextSpan remainder, TokenizationState <CspTokenType> state) { while (true) { var next = remainder.ConsumeChar(); if (!next.HasValue) { yield break; } if (next.Value == ';') { yield return(Result.Value( CspTokenType.Semicolon, next.Location, next.Remainder )); remainder = next.Remainder; } else if (next.Value == ' ') { var consumed = WhitespaceParser(remainder); yield return(Result.Value( CspTokenType.Whitespace, consumed.Location, consumed.Remainder )); remainder = consumed.Remainder; } else if (IsValidFirstCharForDirectiveNameOrSourceExpression(next.Value)) { var consumed = DirectiveNameOrSourceExpressionParser(remainder); yield return(Result.Value( CspTokenType.DirectiveNameOrSourceExpression, consumed.Location, consumed.Remainder )); remainder = consumed.Remainder; } else { // Couldn't parse. yield return(Result.Empty <CspTokenType>(remainder)); } } }
protected override IEnumerable <Result <int> > Tokenize(TextSpan span, TokenizationState <int> state) { Assert.NotNull(state); Assert.Null(state.Previous); var next = span.ConsumeChar(); yield return(Result.Value(0, next.Location, next.Remainder)); for (var i = 1; i < span.Length; ++i) { Assert.NotNull(state.Previous); Assert.Equal(i - 1, state.Previous !.Value.Kind); next = next.Remainder.ConsumeChar(); yield return(Result.Value(i, next.Location, next.Remainder)); } }
protected override IEnumerable <Result <CppTokenKind> > Tokenize(TextSpan span) { var next = SkipWhiteSpace(span); if (!next.HasValue) { yield break; } do { char ch = next.Value; var prevLocation = next.Location; switch (ch) { case '/': { // Consume / var tmp = next.Remainder.ConsumeChar(); if (tmp.Value == '*') { CppTokenKind kind = CppTokenKind.MultiLineComment; var content = Comment.CStyle(next.Location); if (content.HasValue) { TextSpan s = content.Location.Skip(2); var r = s.ConsumeChar(); if (r.HasValue && DoxygenSyntax.MultiLineDocChars.Contains(r.Value)) { kind = CppTokenKind.MultiLineCommentDoc; } } yield return(Result.Value(kind, content.Location, content.Remainder)); next = content.Remainder.ConsumeChar(); } else if (tmp.Value == '/') { CppTokenKind kind = CppTokenKind.SingleLineComment; var content = Comment.CPlusPlusStyle(next.Location); if (content.HasValue) { TextSpan s = content.Location.Skip(2); var r = s.ConsumeChar(); if (r.HasValue && DoxygenSyntax.SingleLineDocChars.Contains(r.Value)) { kind = CppTokenKind.SingleLineCommentDoc; } } yield return(Result.Value(kind, content.Location, content.Remainder)); next = content.Remainder.ConsumeChar(); } else { goto default; } } break; #if false case '#': { var content = PreprocessorParser(next.Location); yield return(Result.Value(CTokenKind.Preprocessor, content.Location, content.Remainder)); next = content.Remainder.ConsumeChar(); } break; #endif case '"': { var content = CStringParser(next.Location); yield return(Result.Value(CppTokenKind.StringLiteral, content.Location, content.Remainder)); next = content.Remainder.ConsumeChar(); } break; case '\'': { var content = CCharParser(next.Location); yield return(Result.Value(CppTokenKind.CharLiteral, content.Location, content.Remainder)); next = content.Remainder.ConsumeChar(); } break; case '.': { var tmp = next.Remainder.ConsumeChar(); if (tmp.Value >= '0' && tmp.Value <= '9') { goto case '0'; } goto default; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { // Hex var content = CHexNumberParser(next.Location); if (content.HasValue) { var result = Result.Value(CppTokenKind.HexLiteral, next.Location, content.Remainder); yield return(result); next = content.Remainder.ConsumeChar(); break; } // Octal content = COctalNumberParser(next.Location); if (content.HasValue) { var result = Result.Value(CppTokenKind.OctalLiteral, next.Location, content.Remainder); yield return(result); next = content.Remainder.ConsumeChar(); break; } // Decimal-Float content = CDecimalFloatNumberParser(next.Location); if (content.HasValue) { var result = Result.Value(CppTokenKind.IntegerFloatLiteral, next.Location, content.Remainder); yield return(result); next = content.Remainder.ConsumeChar(); break; } // Hex-Float content = CDecimalHexNumberParser(next.Location); if (content.HasValue) { var result = Result.Value(CppTokenKind.HexadecimalFloatLiteral, next.Location, content.Remainder); yield return(result); next = content.Remainder.ConsumeChar(); break; } // Integer content = CIntegerNumberParser(next.Location); if (content.HasValue) { var result = Result.Value(CppTokenKind.IntegerLiteral, next.Location, content.Remainder); yield return(result); next = content.Remainder.ConsumeChar(); break; } throw new ParseException($"Unknown number format for '{next.Location}'"); } case char n when((n >= 'a' && n <= 'z') || (n >= 'A' && n <= 'Z') || (n == '_')): { // Ident CppTokenKind kind = CppTokenKind.IdentLiteral; var content = IdentParser(next.Location); if (content.HasValue) { string value = content.Value.ToStringValue(); if (CppLexer.ReservedKeywords.Contains(value)) { kind = CppTokenKind.ReservedKeyword; } else if (CppLexer.GlobalClassKeywords.Contains(value) || CppLexer.TypeKeywords.Contains(value)) { kind = CppTokenKind.TypeKeyword; } } yield return(Result.Value(kind, content.Location, content.Remainder)); next = content.Remainder.ConsumeChar(); } break; default: { CppTokenKind foundTokenKind = CppTokenKind.Unknown; string foundTokenValue = null; string source = next.Location.Source; int index = next.Location.Position.Absolute; foreach (var tokenValue in _tokenValues) { if (string.Compare(tokenValue, 0, source, index, tokenValue.Length) == 0) { foundTokenKind = _valueToTokenMap[tokenValue]; foundTokenValue = tokenValue; break; } } if (foundTokenKind != CppTokenKind.Unknown) { var content = Span.EqualTo(foundTokenValue)(next.Location); Debug.Assert(content.HasValue && content.Value.Length > 0); yield return(Result.Value(foundTokenKind, content.Location, content.Remainder)); next = content.Remainder.ConsumeChar(); } else { var tmp = next.Location.ConsumeChar(); Debug.Assert(tmp.HasValue); yield return(Result.Value(CppTokenKind.Unknown, tmp.Location, tmp.Remainder)); } } break; } if (next.Location.Equals(prevLocation)) { // ERROR: Consume next character while cursor not changed next = next.Remainder.ConsumeChar(); } next = SkipWhiteSpace(next.Location); } while (next.HasValue); }
protected override IEnumerable <Result <Tokens> > Tokenize(TextSpan input) { Result <char> next = input.ConsumeChar(); bool checkForHeader = true; while (next.HasValue) { // need to check for a header when starting a new line if (checkForHeader) { var headerStartLocation = next.Location; var tokenQueue = new List <Result <Tokens> >(); while (next.HasValue && (next.Value == 'X' || next.Value == 'Y')) { tokenQueue.Add(Result.Value(next.Value == 'X' ? Tokens.X : Tokens.Y, next.Location, next.Remainder)); next = next.Remainder.ConsumeChar(); } // only if we had at least one X or one Y if (tokenQueue.Any()) { if (next.HasValue && next.Value == ':') { // this is a header token; we have to return a Result of the start location // along with the remainder at this location yield return(Result.Value(Tokens.Header, headerStartLocation, next.Remainder)); next = next.Remainder.ConsumeChar(); } else { // this isn't a header; we have to return all the tokens we parsed up to this point foreach (Result <Tokens> tokenResult in tokenQueue) { yield return(tokenResult); } } } if (!next.HasValue) { yield break; } } checkForHeader = false; if (next.Value == '\r') { // skip over the carriage return next = next.Remainder.ConsumeChar(); continue; } if (next.Value == '\n') { // line break; check for a header token here next = next.Remainder.ConsumeChar(); checkForHeader = true; continue; } if (next.Value == 'X') { yield return(Result.Value(Tokens.X, next.Location, next.Remainder)); next = next.Remainder.ConsumeChar(); } else if (next.Value == 'Y') { yield return(Result.Value(Tokens.Y, next.Location, next.Remainder)); next = next.Remainder.ConsumeChar(); } else if (next.Value == ':') { yield return(Result.Value(Tokens.Colon, next.Location, next.Remainder)); next = next.Remainder.ConsumeChar(); } else if (next.Value == ' ') { yield return(Result.Value(Tokens.Space, next.Location, next.Remainder)); next = next.Remainder.ConsumeChar(); } else { yield return(Result.Empty <Tokens>(next.Location, $"unrecognized `{next.Value}`")); next = next.Remainder.ConsumeChar(); // Skip the character anyway } } }