Ejemplo n.º 1
0
        private static void SaveCurrentCharToNewToken(TokenizationState state, SqlTokenType tokenType)
        {
            char charToSave = state.CurrentChar;

            state.HasUnprocessedCurrentCharacter = false; //because we're using it now!
            SaveToken(state, tokenType, charToSave.ToString());
        }
Ejemplo n.º 2
0
        private static TextParser <TokenType> GetParserToTry(TokenizationState <TokenType> state)
        {
            switch (state?.Previous?.Kind)
            {
            case null:
                return(StartOfFileParser);

            case TokenType.KeyChars:
            case TokenType.KeyEscapeSequence:
            case TokenType.KeyPhysicalNewLine:
                return(AfterKeyComponentParser);

            case TokenType.Separator:
                return(AfterSeparatorParser);

            case TokenType.Value:
                return(AfterValueComponentParser);

            case TokenType.Comment:
                return(AfterCommentParser);

            case TokenType.Whitespace:
                return(AfterWhitespaceParser);

            default:
                throw new ArgumentOutOfRangeException();
            }
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Tokenize <paramref name="source"/>.
        /// </summary>
        /// <param name="source">The source to tokenize.</param>
        /// <returns>A result with the list of tokens or an error.</returns>
        /// <exception cref="ArgumentNullException"><paramref name="source"/> is null.</exception>
        /// <exception cref="ParseException">The tokenizer could not correctly perform tokenization.</exception>
        public Result<TokenList<TKind>> TryTokenize(string source)
        {
            if (source == null) throw new ArgumentNullException(nameof(source));

            var state = new TokenizationState<TKind>();

            var sourceSpan = new TextSpan(source);
            var remainder = sourceSpan;
            var results = new List<Token<TKind>>();
            foreach (var result in Tokenize(sourceSpan, state))
            {
                if (!result.HasValue)
                    return Result.CastEmpty<TKind, TokenList<TKind>>(result);

                if (result.Remainder == remainder) // Broken parser, not a failed parsing.
                    throw new ParseException($"Zero-width tokens are not supported; token {Presentation.FormatExpectation(result.Value)} at position {result.Location.Position}.");

                remainder = result.Remainder;
                var token = new Token<TKind>(result.Value, result.Location.Until(result.Remainder));
                state.Previous = token;
                results.Add(token);
            }

            var value = new TokenList<TKind>(results.ToArray());
            return Result.Value(value, sourceSpan, remainder);
        }
Ejemplo n.º 4
0
        protected override IEnumerable <Result <int> > Tokenize(TextSpan span, TokenizationState <int> state)
        {
            Assert.NotNull(state);
            Assert.Null(state.Previous);
            var next = span.ConsumeChar();

            yield return(Result.Value(0, next.Location, next.Remainder));

            for (var i = 1; i < span.Length; ++i)
            {
                Assert.NotNull(state.Previous);
                Assert.Equal(i - 1, state.Previous !.Value.Kind);
                next = next.Remainder.ConsumeChar();
                yield return(Result.Value(i, next.Location, next.Remainder));
            }
        }
Ejemplo n.º 5
0
        private static void SaveToken(TokenizationState state, SqlTokenType tokenType, string tokenValue)
        {
            var foundToken = new Token(tokenType, tokenValue);

            state.TokenContainer.Add(foundToken);

            long positionOfLastCharacterInToken = state.InputReader.LastCharacterPosition - (state.HasUnprocessedCurrentCharacter ? 1 : 0);

            if (state.RequestedMarkerPosition != null &&
                state.TokenContainer.MarkerToken == null &&
                state.RequestedMarkerPosition <= positionOfLastCharacterInToken
                )
            {
                state.TokenContainer.MarkerToken = foundToken;
                //TODO: this is wrong for container types, as commented elsewhere. the marker position will be too high.
                var rawPositionInToken = foundToken.Value.Length - (positionOfLastCharacterInToken - state.RequestedMarkerPosition);
                // temporarily bypass overflow issues without fixing underlying problem
                state.TokenContainer.MarkerPosition = rawPositionInToken > foundToken.Value.Length ? foundToken.Value.Length : rawPositionInToken;
            }
        }
Ejemplo n.º 6
0
        protected override IEnumerable <Result <SqlToken> > Tokenize(
            TextSpan stringSpan,
            TokenizationState <SqlToken> tokenizationState)
        {
            var next = SkipWhiteSpace(stringSpan);

            if (!next.HasValue)
            {
                yield break;
            }

            do
            {
                if (char.IsDigit(next.Value))
                {
                    var result = OneOf(
                        (SqlToken.HexNumber, SqlParser.HexInteger),
                        (SqlToken.Number, SqlParser.Real)
                        )(next);

                    yield return(result.parseResult);

                    next = result.next;
                }
                else if (char.IsLetter(next.Value) || next.Value == '_')
                {
                    var beginIdentifier = next.Location;
                    do
                    {
                        next = next.Remainder.ConsumeChar();
                    }while (next.HasValue && (char.IsLetterOrDigit(next.Value) || next.Value == '_'));

                    if (TryGetKeyword(beginIdentifier.Until(next.Location), out var keyword))
                    {
                        yield return(Result.Value(keyword, beginIdentifier, next.Location));
                    }
                    else
                    {
                        yield return(Result.Value(SqlToken.Identifier, beginIdentifier, next.Location));
                    }
                }
                else
                {
                    var beginSymbol = next.Location;
                    do
                    {
                        next = next.Remainder.ConsumeChar();
                    }while (next.HasValue && !IsIdent(next));

                    var symbolText = beginSymbol.Until(next.Location).ToStringValue().TrimSpaces();
                    if (TryGetSymbol(symbolText, out var symbol))
                    {
                        yield return(Result.Value(symbol, beginSymbol, next.Location));
                    }
                    else
                    {
                        yield return(Result.Value(SqlToken.Unknown, beginSymbol, next.Location));
                    }
                }

                next = SkipWhiteSpace(next.Location);
            } while (next.HasValue);
        }
Ejemplo n.º 7
0
        private static void CompleteToken(TokenizationState state, bool nextCharRead)
        {
            if (state.CurrentTokenizationType == null)
            {
                throw new Exception("Cannot complete Token, as there is no current Tokenization Type");
            }

            switch (state.CurrentTokenizationType)
            {
            case SqlTokenizationType.BlockComment:
                SaveToken(state, SqlTokenType.MultiLineComment, state.CurrentTokenValue.ToString());
                break;

            case SqlTokenizationType.OtherNode:
                SaveToken(state, SqlTokenType.OtherNode, state.CurrentTokenValue.ToString());
                break;

            case SqlTokenizationType.PseudoName:
                SaveToken(state, SqlTokenType.PseudoName, state.CurrentTokenValue.ToString());
                break;

            case SqlTokenizationType.SingleLineComment:
                SaveToken(state, SqlTokenType.SingleLineComment, state.CurrentTokenValue.ToString());
                break;

            case SqlTokenizationType.SingleLineCommentCStyle:
                SaveToken(state, SqlTokenType.SingleLineCommentCStyle, state.CurrentTokenValue.ToString());
                break;

            case SqlTokenizationType.SingleHyphen:
                SaveToken(state, SqlTokenType.OtherOperator, "-");
                break;

            case SqlTokenizationType.SingleDollar:
                SaveToken(state, SqlTokenType.MonetaryValue, "$");
                break;

            case SqlTokenizationType.SingleSlash:
                SaveToken(state, SqlTokenType.OtherOperator, "/");
                break;

            case SqlTokenizationType.WhiteSpace:
                SaveToken(state, SqlTokenType.WhiteSpace, state.CurrentTokenValue.ToString());
                break;

            case SqlTokenizationType.SingleN:
                SaveToken(state, SqlTokenType.OtherNode, "N");
                break;

            case SqlTokenizationType.SingleExclamation:
                SaveToken(state, SqlTokenType.OtherNode, "!");
                break;

            case SqlTokenizationType.SinglePipe:
                SaveToken(state, SqlTokenType.OtherNode, "|");
                break;

            case SqlTokenizationType.SingleGT:
                SaveToken(state, SqlTokenType.OtherOperator, ">");
                break;

            case SqlTokenizationType.SingleLT:
                SaveToken(state, SqlTokenType.OtherOperator, "<");
                break;

            case SqlTokenizationType.NString:
                SaveToken(state, SqlTokenType.NationalString, state.CurrentTokenValue.ToString());
                break;

            case SqlTokenizationType.String:
                SaveToken(state, SqlTokenType.String, state.CurrentTokenValue.ToString());
                break;

            case SqlTokenizationType.QuotedString:
                SaveToken(state, SqlTokenType.QuotedString, state.CurrentTokenValue.ToString());
                break;

            case SqlTokenizationType.BracketQuotedName:
                SaveToken(state, SqlTokenType.BracketQuotedName, state.CurrentTokenValue.ToString());
                break;

            case SqlTokenizationType.OtherOperator:
            case SqlTokenizationType.SingleOtherCompoundableOperator:
                SaveToken(state, SqlTokenType.OtherOperator, state.CurrentTokenValue.ToString());
                break;

            case SqlTokenizationType.SingleZero:
                SaveToken(state, SqlTokenType.Number, "0");
                break;

            case SqlTokenizationType.SinglePeriod:
                SaveToken(state, SqlTokenType.Period, ".");
                break;

            case SqlTokenizationType.SingleAsterisk:
                SaveToken(state, SqlTokenType.Asterisk, state.CurrentTokenValue.ToString());
                break;

            case SqlTokenizationType.SingleEquals:
                SaveToken(state, SqlTokenType.EqualsSign, state.CurrentTokenValue.ToString());
                break;

            case SqlTokenizationType.Number:
            case SqlTokenizationType.DecimalValue:
            case SqlTokenizationType.FloatValue:
                SaveToken(state, SqlTokenType.Number, state.CurrentTokenValue.ToString());
                break;

            case SqlTokenizationType.BinaryValue:
                SaveToken(state, SqlTokenType.BinaryValue, state.CurrentTokenValue.ToString());
                break;

            case SqlTokenizationType.MonetaryValue:
                SaveToken(state, SqlTokenType.MonetaryValue, state.CurrentTokenValue.ToString());
                break;

            default:
                throw new Exception("Unrecognized SQL Node Type");
            }

            state.CurrentTokenizationType = null;
        }
Ejemplo n.º 8
0
        private static void ProcessOrOpenToken(TokenizationState state)
        {
            if (state.CurrentTokenizationType != null)
            {
                throw new Exception("Cannot start a new Token: existing Tokenization Type is not null");
            }

            if (!state.HasUnprocessedCurrentCharacter)
            {
                throw new Exception("Cannot start a new Token: no (outstanding) current character specified!");
            }

            //start a new value.
            state.CurrentTokenValue.Length = 0;

            if (IsWhitespace(state.CurrentChar))
            {
                state.CurrentTokenizationType = SqlTokenizationType.WhiteSpace;
                state.ConsumeCurrentCharacterIntoToken();
            }
            else if (state.CurrentChar == '-')
            {
                state.CurrentTokenizationType        = SqlTokenizationType.SingleHyphen;
                state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later
            }
            else if (state.CurrentChar == '$')
            {
                state.CurrentTokenizationType        = SqlTokenizationType.SingleDollar;
                state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later
            }
            else if (state.CurrentChar == '/')
            {
                state.CurrentTokenizationType        = SqlTokenizationType.SingleSlash;
                state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later
            }
            else if (state.CurrentChar == 'N')
            {
                state.CurrentTokenizationType        = SqlTokenizationType.SingleN;
                state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later except N-string case
            }
            else if (state.CurrentChar == '\'')
            {
                state.CurrentTokenizationType        = SqlTokenizationType.String;
                state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing
            }
            else if (state.CurrentChar == '"')
            {
                state.CurrentTokenizationType        = SqlTokenizationType.QuotedString;
                state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing
            }
            else if (state.CurrentChar == '[')
            {
                state.CurrentTokenizationType        = SqlTokenizationType.BracketQuotedName;
                state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing
            }
            else if (state.CurrentChar == '(')
            {
                SaveCurrentCharToNewToken(state, SqlTokenType.OpenParens);
            }
            else if (state.CurrentChar == ')')
            {
                SaveCurrentCharToNewToken(state, SqlTokenType.CloseParens);
            }
            else if (state.CurrentChar == ',')
            {
                SaveCurrentCharToNewToken(state, SqlTokenType.Comma);
            }
            else if (state.CurrentChar == '.')
            {
                state.CurrentTokenizationType        = SqlTokenizationType.SinglePeriod;
                state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later
            }
            else if (state.CurrentChar == '0')
            {
                state.CurrentTokenizationType        = SqlTokenizationType.SingleZero;
                state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later
            }
            else if (state.CurrentChar >= '1' && state.CurrentChar <= '9')
            {
                state.CurrentTokenizationType = SqlTokenizationType.Number;
                state.ConsumeCurrentCharacterIntoToken();
            }
            else if (IsCurrencyPrefix(state.CurrentChar))
            {
                state.CurrentTokenizationType = SqlTokenizationType.MonetaryValue;
                state.ConsumeCurrentCharacterIntoToken();
            }
            else if (state.CurrentChar == ';')
            {
                SaveCurrentCharToNewToken(state, SqlTokenType.Semicolon);
            }
            else if (state.CurrentChar == ':')
            {
                SaveCurrentCharToNewToken(state, SqlTokenType.Colon);
            }
            else if (state.CurrentChar == '*')
            {
                state.CurrentTokenizationType        = SqlTokenizationType.SingleAsterisk;
                state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later
            }
            else if (state.CurrentChar == '=')
            {
                state.CurrentTokenizationType        = SqlTokenizationType.SingleEquals;
                state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later
            }
            else if (state.CurrentChar == '<')
            {
                state.CurrentTokenizationType        = SqlTokenizationType.SingleLT;
                state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later
            }
            else if (state.CurrentChar == '>')
            {
                state.CurrentTokenizationType        = SqlTokenizationType.SingleGT;
                state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later
            }
            else if (state.CurrentChar == '!')
            {
                state.CurrentTokenizationType        = SqlTokenizationType.SingleExclamation;
                state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later
            }
            else if (state.CurrentChar == '|')
            {
                state.CurrentTokenizationType        = SqlTokenizationType.SinglePipe;
                state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later
            }
            else if (IsCompoundableOperatorCharacter(state.CurrentChar))
            {
                state.CurrentTokenizationType = SqlTokenizationType.SingleOtherCompoundableOperator;
                state.ConsumeCurrentCharacterIntoToken();
            }
            else if (IsOperatorCharacter(state.CurrentChar))
            {
                SaveCurrentCharToNewToken(state, SqlTokenType.OtherOperator);
            }
            else
            {
                state.CurrentTokenizationType = SqlTokenizationType.OtherNode;
                state.ConsumeCurrentCharacterIntoToken();
            }
        }
Ejemplo n.º 9
0
 private static void SwallowOutstandingCharacterAndCompleteToken(TokenizationState state)
 {
     //this is for cases where we *know* we are swallowing the "current character" (not putting it in the output)
     state.HasUnprocessedCurrentCharacter = false;
     CompleteToken(state, false);
 }
Ejemplo n.º 10
0
 private static void AppendCharAndCompleteToken(TokenizationState state)
 {
     state.ConsumeCurrentCharacterIntoToken();
     CompleteToken(state, false);
 }
Ejemplo n.º 11
0
 private static void CompleteTokenAndProcessNext(TokenizationState state)
 {
     CompleteToken(state, true);
     ProcessOrOpenToken(state);
 }
Ejemplo n.º 12
0
        public ITokenList TokenizeSQL(string inputSQL, long?requestedMarkerPosition)
        {
            var state = new TokenizationState(inputSQL, requestedMarkerPosition);

            state.ReadNextCharacter();
            while (state.HasUnprocessedCurrentCharacter)
            {
                if (state.CurrentTokenizationType == null)
                {
                    ProcessOrOpenToken(state);
                    state.ReadNextCharacter();
                    continue;
                }

                switch (state.CurrentTokenizationType.Value)
                {
                case SqlTokenizationType.WhiteSpace:
                    if (IsWhitespace(state.CurrentChar))
                    {
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    else
                    {
                        CompleteTokenAndProcessNext(state);
                    }
                    break;

                case SqlTokenizationType.SinglePeriod:
                    if (state.CurrentChar >= '0' && state.CurrentChar <= '9')
                    {
                        state.CurrentTokenizationType = SqlTokenizationType.DecimalValue;
                        state.CurrentTokenValue.Append('.');
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    else
                    {
                        state.CurrentTokenValue.Append('.');
                        CompleteTokenAndProcessNext(state);
                    }
                    break;

                case SqlTokenizationType.SingleZero:
                    if (state.CurrentChar == 'x' || state.CurrentChar == 'X')
                    {
                        state.CurrentTokenizationType = SqlTokenizationType.BinaryValue;
                        state.CurrentTokenValue.Append('0');
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    else if (state.CurrentChar >= '0' && state.CurrentChar <= '9')
                    {
                        state.CurrentTokenizationType = SqlTokenizationType.Number;
                        state.CurrentTokenValue.Append('0');
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    else if (state.CurrentChar == '.')
                    {
                        state.CurrentTokenizationType = SqlTokenizationType.DecimalValue;
                        state.CurrentTokenValue.Append('0');
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    else
                    {
                        state.CurrentTokenValue.Append('0');
                        CompleteTokenAndProcessNext(state);
                    }
                    break;

                case SqlTokenizationType.Number:
                    if (state.CurrentChar == 'e' || state.CurrentChar == 'E')
                    {
                        state.CurrentTokenizationType = SqlTokenizationType.FloatValue;
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    else if (state.CurrentChar == '.')
                    {
                        state.CurrentTokenizationType = SqlTokenizationType.DecimalValue;
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    else if (state.CurrentChar >= '0' && state.CurrentChar <= '9')
                    {
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    else
                    {
                        CompleteTokenAndProcessNext(state);
                    }
                    break;

                case SqlTokenizationType.DecimalValue:
                    if (state.CurrentChar == 'e' || state.CurrentChar == 'E')
                    {
                        state.CurrentTokenizationType = SqlTokenizationType.FloatValue;
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    else if (state.CurrentChar >= '0' && state.CurrentChar <= '9')
                    {
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    else
                    {
                        CompleteTokenAndProcessNext(state);
                    }
                    break;

                case SqlTokenizationType.FloatValue:
                    if (state.CurrentChar >= '0' && state.CurrentChar <= '9')
                    {
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    else if ((state.CurrentChar == '-' || state.CurrentChar == '+') && state.CurrentTokenValue.ToString().ToUpper().EndsWith("E"))
                    {
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    else
                    {
                        CompleteTokenAndProcessNext(state);
                    }
                    break;

                case SqlTokenizationType.BinaryValue:
                    if ((state.CurrentChar >= '0' && state.CurrentChar <= '9') ||
                        (state.CurrentChar >= 'A' && state.CurrentChar <= 'F') ||
                        (state.CurrentChar >= 'a' && state.CurrentChar <= 'f')
                        )
                    {
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    else
                    {
                        CompleteTokenAndProcessNext(state);
                    }
                    break;

                case SqlTokenizationType.SingleDollar:
                    state.CurrentTokenValue.Append('$');

                    if ((state.CurrentChar >= 'A' && state.CurrentChar <= 'Z') ||
                        (state.CurrentChar >= 'a' && state.CurrentChar <= 'z')
                        )
                    {
                        state.CurrentTokenizationType = SqlTokenizationType.PseudoName;
                    }
                    else
                    {
                        state.CurrentTokenizationType = SqlTokenizationType.MonetaryValue;
                    }

                    state.ConsumeCurrentCharacterIntoToken();
                    break;

                case SqlTokenizationType.MonetaryValue:
                    if (state.CurrentChar >= '0' && state.CurrentChar <= '9')
                    {
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    else if (state.CurrentChar == '-' && state.CurrentTokenValue.Length == 1)
                    {
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    else if (state.CurrentChar == '.' && !state.CurrentTokenValue.ToString().Contains("."))
                    {
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    else
                    {
                        CompleteTokenAndProcessNext(state);
                    }
                    break;

                case SqlTokenizationType.SingleHyphen:
                    if (state.CurrentChar == '-')
                    {
                        state.CurrentTokenizationType        = SqlTokenizationType.SingleLineComment;
                        state.HasUnprocessedCurrentCharacter = false;     //DISCARDING the hyphen because of weird standard
                    }
                    else if (state.CurrentChar == '=')
                    {
                        state.CurrentTokenizationType = SqlTokenizationType.OtherOperator;
                        state.CurrentTokenValue.Append('-');
                        AppendCharAndCompleteToken(state);
                    }
                    else
                    {
                        state.CurrentTokenizationType = SqlTokenizationType.OtherOperator;
                        state.CurrentTokenValue.Append('-');
                        CompleteTokenAndProcessNext(state);
                    }
                    break;

                case SqlTokenizationType.SingleSlash:
                    if (state.CurrentChar == '*')
                    {
                        state.CurrentTokenizationType        = SqlTokenizationType.BlockComment;
                        state.HasUnprocessedCurrentCharacter = false;     //DISCARDING the asterisk because of weird standard
                        state.CommentNesting++;
                    }
                    else if (state.CurrentChar == '/')
                    {
                        state.CurrentTokenizationType        = SqlTokenizationType.SingleLineCommentCStyle;
                        state.HasUnprocessedCurrentCharacter = false;     //DISCARDING the slash because of weird standard
                    }
                    else if (state.CurrentChar == '=')
                    {
                        state.CurrentTokenizationType = SqlTokenizationType.OtherOperator;
                        state.CurrentTokenValue.Append('/');
                        AppendCharAndCompleteToken(state);
                    }
                    else
                    {
                        state.CurrentTokenizationType = SqlTokenizationType.OtherOperator;
                        state.CurrentTokenValue.Append('/');
                        CompleteTokenAndProcessNext(state);
                    }
                    break;

                case SqlTokenizationType.SingleLineComment:
                case SqlTokenizationType.SingleLineCommentCStyle:
                    if (state.CurrentChar == (char)13 || state.CurrentChar == (char)10)
                    {
                        int nextCharInt = state.InputReader.Peek();
                        if (state.CurrentChar == (char)13 && nextCharInt == 10)
                        {
                            state.ConsumeCurrentCharacterIntoToken();
                            state.ReadNextCharacter();
                        }
                        AppendCharAndCompleteToken(state);
                    }
                    else
                    {
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    break;

                case SqlTokenizationType.BlockComment:
                    if (state.CurrentChar == '*')
                    {
                        if (state.InputReader.Peek() == (int)'/')
                        {
                            state.CommentNesting--;
                            if (state.CommentNesting > 0)
                            {
                                state.ConsumeCurrentCharacterIntoToken();
                                state.ReadNextCharacter();
                                state.ConsumeCurrentCharacterIntoToken();
                            }
                            else
                            {
                                state.HasUnprocessedCurrentCharacter = false;     //discarding the asterisk
                                state.ReadNextCharacter();
                                //TODO: DANGER DANGER why do "contained" token types have this inconsistent handling where the delimiters are not in the value???
                                SwallowOutstandingCharacterAndCompleteToken(state);
                            }
                        }
                        else
                        {
                            state.ConsumeCurrentCharacterIntoToken();
                        }
                    }
                    else
                    {
                        if (state.CurrentChar == '/' && state.InputReader.Peek() == (int)'*')
                        {
                            state.ConsumeCurrentCharacterIntoToken();
                            state.ReadNextCharacter();
                            state.ConsumeCurrentCharacterIntoToken();
                            state.CommentNesting++;
                        }
                        else
                        {
                            state.ConsumeCurrentCharacterIntoToken();
                        }
                    }
                    break;

                case SqlTokenizationType.OtherNode:
                case SqlTokenizationType.PseudoName:
                    if (IsNonWordCharacter(state.CurrentChar))
                    {
                        CompleteTokenAndProcessNext(state);
                    }
                    else
                    {
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    break;

                case SqlTokenizationType.SingleN:
                    if (state.CurrentChar == '\'')
                    {
                        state.CurrentTokenizationType        = SqlTokenizationType.NString;
                        state.HasUnprocessedCurrentCharacter = false;     //DISCARDING the apostrophe because of weird standard
                    }
                    else
                    {
                        if (IsNonWordCharacter(state.CurrentChar))
                        {
                            CompleteTokenAndProcessNext(state);
                        }
                        else
                        {
                            state.CurrentTokenizationType = SqlTokenizationType.OtherNode;
                            state.CurrentTokenValue.Append('N');
                            state.ConsumeCurrentCharacterIntoToken();
                        }
                    }
                    break;

                case SqlTokenizationType.NString:
                case SqlTokenizationType.String:
                    if (state.CurrentChar == '\'')
                    {
                        if (state.InputReader.Peek() == (int)'\'')
                        {
                            //add the character (once)
                            state.ConsumeCurrentCharacterIntoToken();

                            //throw away the second character... because (for some reason?) we're storing the effective value" rather than the raw token...
                            state.DiscardNextCharacter();
                        }
                        else
                        {
                            //TODO: DANGER DANGER why do "contained" token types have this inconsistent handling where the delimiters are not in the value???
                            SwallowOutstandingCharacterAndCompleteToken(state);
                        }
                    }
                    else
                    {
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    break;

                case SqlTokenizationType.QuotedString:
                    if (state.CurrentChar == '"')
                    {
                        if (state.InputReader.Peek() == (int)'"')
                        {
                            //add the character (once)
                            state.ConsumeCurrentCharacterIntoToken();

                            //throw away the second character... because (for some reason?) we're storing the effective value" rather than the raw token...
                            state.DiscardNextCharacter();
                        }
                        else
                        {
                            //TODO: DANGER DANGER why do "contained" token types have this inconsistent handling where the delimiters are not in the value???
                            SwallowOutstandingCharacterAndCompleteToken(state);
                        }
                    }
                    else
                    {
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    break;

                case SqlTokenizationType.BracketQuotedName:
                    if (state.CurrentChar == ']')
                    {
                        if (state.InputReader.Peek() == (int)']')
                        {
                            //add the character (once)
                            state.ConsumeCurrentCharacterIntoToken();

                            //throw away the second character... because (for some reason?) we're storing the effective value" rather than the raw token...
                            state.DiscardNextCharacter();
                        }
                        else
                        {
                            //TODO: DANGER DANGER why do "contained" token types have this inconsistent handling where the delimiters are not in the value???
                            SwallowOutstandingCharacterAndCompleteToken(state);
                        }
                    }
                    else
                    {
                        state.ConsumeCurrentCharacterIntoToken();
                    }
                    break;

                case SqlTokenizationType.SingleLT:
                    state.CurrentTokenValue.Append('<');
                    state.CurrentTokenizationType = SqlTokenizationType.OtherOperator;
                    if (state.CurrentChar == '=' || state.CurrentChar == '>' || state.CurrentChar == '<')
                    {
                        AppendCharAndCompleteToken(state);
                    }
                    else
                    {
                        CompleteTokenAndProcessNext(state);
                    }
                    break;

                case SqlTokenizationType.SingleGT:
                    state.CurrentTokenValue.Append('>');
                    state.CurrentTokenizationType = SqlTokenizationType.OtherOperator;
                    if (state.CurrentChar == '=' || state.CurrentChar == '>')
                    {
                        AppendCharAndCompleteToken(state);
                    }
                    else
                    {
                        CompleteTokenAndProcessNext(state);
                    }
                    break;

                case SqlTokenizationType.SingleAsterisk:
                    state.CurrentTokenValue.Append('*');
                    if (state.CurrentChar == '=')
                    {
                        state.CurrentTokenizationType = SqlTokenizationType.OtherOperator;
                        AppendCharAndCompleteToken(state);
                    }
                    else
                    {
                        CompleteTokenAndProcessNext(state);
                    }
                    break;

                case SqlTokenizationType.SingleOtherCompoundableOperator:
                    state.CurrentTokenizationType = SqlTokenizationType.OtherOperator;
                    if (state.CurrentChar == '=')
                    {
                        AppendCharAndCompleteToken(state);
                    }
                    else
                    {
                        CompleteTokenAndProcessNext(state);
                    }
                    break;

                case SqlTokenizationType.SinglePipe:
                    state.CurrentTokenizationType = SqlTokenizationType.OtherOperator;
                    state.CurrentTokenValue.Append('|');
                    if (state.CurrentChar == '=' || state.CurrentChar == '|')
                    {
                        AppendCharAndCompleteToken(state);
                    }
                    else
                    {
                        CompleteTokenAndProcessNext(state);
                    }
                    break;

                case SqlTokenizationType.SingleEquals:
                    state.CurrentTokenValue.Append('=');
                    if (state.CurrentChar == '=')
                    {
                        AppendCharAndCompleteToken(state);
                    }
                    else
                    {
                        CompleteTokenAndProcessNext(state);
                    }
                    break;

                case SqlTokenizationType.SingleExclamation:
                    state.CurrentTokenValue.Append('!');
                    if (state.CurrentChar == '=' || state.CurrentChar == '<' || state.CurrentChar == '>')
                    {
                        state.CurrentTokenizationType = SqlTokenizationType.OtherOperator;
                        AppendCharAndCompleteToken(state);
                    }
                    else
                    {
                        state.CurrentTokenizationType = SqlTokenizationType.OtherNode;
                        CompleteTokenAndProcessNext(state);
                    }
                    break;

                default:
                    throw new Exception("In-progress node unrecognized!");
                }

                state.ReadNextCharacter();
            }


            if (state.CurrentTokenizationType != null)
            {
                if (state.CurrentTokenizationType.Value == SqlTokenizationType.BlockComment ||
                    state.CurrentTokenizationType.Value == SqlTokenizationType.String ||
                    state.CurrentTokenizationType.Value == SqlTokenizationType.NString ||
                    state.CurrentTokenizationType.Value == SqlTokenizationType.QuotedString ||
                    state.CurrentTokenizationType.Value == SqlTokenizationType.BracketQuotedName
                    )
                {
                    state.TokenContainer.HasUnfinishedToken = true;
                }

                SwallowOutstandingCharacterAndCompleteToken(state);
            }

            return(state.TokenContainer);
        }
        protected override IEnumerable <Result <FilterExpressionToken> > Tokenize(
            TextSpan stringSpan,
            TokenizationState <FilterExpressionToken> tokenizationState)
        {
            var next = SkipWhiteSpace(stringSpan);

            if (!next.HasValue)
            {
                yield break;
            }

            do
            {
                if (char.IsDigit(next.Value))
                {
                    var hex = FilterExpressionTextParsers.HexInteger(next.Location);
                    if (hex.HasValue)
                    {
                        next = hex.Remainder.ConsumeChar();
                        yield return(Result.Value(FilterExpressionToken.HexNumber, hex.Location, hex.Remainder));
                    }
                    else
                    {
                        var real = FilterExpressionTextParsers.Real(next.Location);
                        if (!real.HasValue)
                        {
                            yield return(Result.CastEmpty <TextSpan, FilterExpressionToken>(real));
                        }
                        else
                        {
                            yield return(Result.Value(FilterExpressionToken.Number, real.Location, real.Remainder));
                        }

                        next = real.Remainder.ConsumeChar();
                    }

                    if (!IsDelimiter(next))
                    {
                        yield return(Result.Empty <FilterExpressionToken>(next.Location, new[] { "digit" }));
                    }
                }
                else if (next.Value == '\'')
                {
                    var str = FilterExpressionTextParsers.SqlString(next.Location);
                    if (!str.HasValue)
                    {
                        yield return(Result.CastEmpty <string, FilterExpressionToken>(str));
                    }

                    next = str.Remainder.ConsumeChar();

                    yield return(Result.Value(FilterExpressionToken.String, str.Location, str.Remainder));
                }
                else if (next.Value == '@')
                {
                    var beginIdentifier = next.Location;
                    var startOfName     = next.Remainder;
                    do
                    {
                        next = next.Remainder.ConsumeChar();
                    }while (next.HasValue && char.IsLetterOrDigit(next.Value));

                    if (next.Remainder == startOfName)
                    {
                        yield return(Result.Empty <FilterExpressionToken>(startOfName, new[] { "built-in identifier name" }));
                    }
                    else
                    {
                        yield return(Result.Value(FilterExpressionToken.BuiltInIdentifier, beginIdentifier, next.Location));
                    }
                }
                else if (char.IsLetter(next.Value) || next.Value == '_')
                {
                    var beginIdentifier = next.Location;
                    do
                    {
                        next = next.Remainder.ConsumeChar();
                    }while (next.HasValue && (char.IsLetterOrDigit(next.Value) || next.Value == '_'));

                    FilterExpressionToken keyword;
                    if (TryGetKeyword(beginIdentifier.Until(next.Location), out keyword))
                    {
                        yield return(Result.Value(keyword, beginIdentifier, next.Location));
                    }
                    else
                    {
                        yield return(Result.Value(FilterExpressionToken.Identifier, beginIdentifier, next.Location));
                    }
                }
                else if (next.Value == '/' &&
                         (!tokenizationState.Previous.HasValue ||
                          PreRegexTokens.Contains(tokenizationState.Previous.Value.Kind)))
                {
                    var regex = FilterExpressionTextParsers.RegularExpression(next.Location);
                    if (!regex.HasValue)
                    {
                        yield return(Result.CastEmpty <Unit, FilterExpressionToken>(regex));
                    }

                    yield return(Result.Value(FilterExpressionToken.RegularExpression, next.Location, regex.Remainder));

                    next = regex.Remainder.ConsumeChar();
                }
                else
                {
                    var compoundOp = FilterExpressionTextParsers.CompoundOperator(next.Location);
                    if (compoundOp.HasValue)
                    {
                        yield return(Result.Value(compoundOp.Value, compoundOp.Location, compoundOp.Remainder));

                        next = compoundOp.Remainder.ConsumeChar();
                    }
                    else if (next.Value < SimpleOps.Length && SimpleOps[next.Value] != FilterExpressionToken.None)
                    {
                        yield return(Result.Value(SimpleOps[next.Value], next.Location, next.Remainder));

                        next = next.Remainder.ConsumeChar();
                    }
                    else
                    {
                        yield return(Result.Empty <FilterExpressionToken>(next.Location));

                        next = next.Remainder.ConsumeChar();
                    }
                }

                next = SkipWhiteSpace(next.Location);
            } while (next.HasValue);
        }
Ejemplo n.º 14
0
        protected override IEnumerable <Result <TokenType> > Tokenize(TextSpan remainder, TokenizationState <TokenType> state)
        {
            // Parsing of the properties file syntax has to be stateful: any string can appear inside the value, so
            // without knowing what the last token was, you don't know how to parse the remaining text. Hence why
            // we're overriding this overload that takes the state.

            while (!remainder.IsAtEnd)
            {
                var parserToTry = GetParserToTry(state);
                var parseResult = parserToTry(remainder);
                if (parseResult.HasValue)
                {
                    remainder = parseResult.Remainder;
                    yield return(parseResult);
                }
                else
                {
                    yield return(Result.Empty <TokenType>(remainder));
                }
            }
        }
Ejemplo n.º 15
0
        protected override IEnumerable <Result <CspTokenType> > Tokenize(TextSpan remainder, TokenizationState <CspTokenType> state)
        {
            while (true)
            {
                var next = remainder.ConsumeChar();
                if (!next.HasValue)
                {
                    yield break;
                }

                if (next.Value == ';')
                {
                    yield return(Result.Value(
                                     CspTokenType.Semicolon,
                                     next.Location,
                                     next.Remainder
                                     ));

                    remainder = next.Remainder;
                }
                else if (next.Value == ' ')
                {
                    var consumed = WhitespaceParser(remainder);
                    yield return(Result.Value(
                                     CspTokenType.Whitespace,
                                     consumed.Location,
                                     consumed.Remainder
                                     ));

                    remainder = consumed.Remainder;
                }
                else if (IsValidFirstCharForDirectiveNameOrSourceExpression(next.Value))
                {
                    var consumed = DirectiveNameOrSourceExpressionParser(remainder);
                    yield return(Result.Value(
                                     CspTokenType.DirectiveNameOrSourceExpression,
                                     consumed.Location,
                                     consumed.Remainder
                                     ));

                    remainder = consumed.Remainder;
                }
                else
                {
                    // Couldn't parse.
                    yield return(Result.Empty <CspTokenType>(remainder));
                }
            }
        }
Ejemplo n.º 16
0
 /// <summary>
 /// Subclasses should override to perform tokenization when the
 /// last-produced-token needs to be tracked.
 /// </summary>
 /// <param name="span">The input span to tokenize.</param>
 /// <param name="state">The tokenization state maintained during the operation.</param>
 /// <returns>A list of parsed tokens.</returns>
 protected virtual IEnumerable<Result<TKind>> Tokenize(TextSpan span, TokenizationState<TKind> state)
 {
     return Tokenize(span);
 }