private static void AppendCharAndCompleteToken(TokenizationState state) { state.ConsumeCurrentCharacterIntoToken(); CompleteToken(state, false); }
private static void ProcessOrOpenToken(TokenizationState state) { if (state.CurrentTokenizationType != null) { throw new Exception("Cannot start a new Token: existing Tokenization Type is not null"); } if (!state.HasUnprocessedCurrentCharacter) { throw new Exception("Cannot start a new Token: no (outstanding) current character specified!"); } //start a new value. state.CurrentTokenValue.Length = 0; if (IsWhitespace(state.CurrentChar)) { state.CurrentTokenizationType = SqlTokenizationType.WhiteSpace; state.ConsumeCurrentCharacterIntoToken(); } else if (state.CurrentChar == '-') { state.CurrentTokenizationType = SqlTokenizationType.SingleHyphen; state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later } else if (state.CurrentChar == '$') { state.CurrentTokenizationType = SqlTokenizationType.SingleDollar; state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later } else if (state.CurrentChar == '/') { state.CurrentTokenizationType = SqlTokenizationType.SingleSlash; state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later } else if (state.CurrentChar == 'N') { state.CurrentTokenizationType = SqlTokenizationType.SingleN; state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later except N-string case } else if (state.CurrentChar == '\'') { state.CurrentTokenizationType = SqlTokenizationType.String; state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing } else if (state.CurrentChar == '"') { state.CurrentTokenizationType = SqlTokenizationType.QuotedString; state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing } else if (state.CurrentChar == '[') { state.CurrentTokenizationType = SqlTokenizationType.BracketQuotedName; state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing } else if (state.CurrentChar == '(') { SaveCurrentCharToNewToken(state, SqlTokenType.OpenParens); } else if (state.CurrentChar == ')') { SaveCurrentCharToNewToken(state, SqlTokenType.CloseParens); } else if (state.CurrentChar == ',') { SaveCurrentCharToNewToken(state, SqlTokenType.Comma); } else if (state.CurrentChar == '.') { state.CurrentTokenizationType = SqlTokenizationType.SinglePeriod; state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later } else if (state.CurrentChar == '0') { state.CurrentTokenizationType = SqlTokenizationType.SingleZero; state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later } else if (state.CurrentChar >= '1' && state.CurrentChar <= '9') { state.CurrentTokenizationType = SqlTokenizationType.Number; state.ConsumeCurrentCharacterIntoToken(); } else if (IsCurrencyPrefix(state.CurrentChar)) { state.CurrentTokenizationType = SqlTokenizationType.MonetaryValue; state.ConsumeCurrentCharacterIntoToken(); } else if (state.CurrentChar == ';') { SaveCurrentCharToNewToken(state, SqlTokenType.Semicolon); } else if (state.CurrentChar == ':') { SaveCurrentCharToNewToken(state, SqlTokenType.Colon); } else if (state.CurrentChar == '*') { state.CurrentTokenizationType = SqlTokenizationType.SingleAsterisk; state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later } else if (state.CurrentChar == '=') { state.CurrentTokenizationType = SqlTokenizationType.SingleEquals; state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later } else if (state.CurrentChar == '<') { state.CurrentTokenizationType = SqlTokenizationType.SingleLT; state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later } else if (state.CurrentChar == '>') { state.CurrentTokenizationType = SqlTokenizationType.SingleGT; state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later } else if (state.CurrentChar == '!') { state.CurrentTokenizationType = SqlTokenizationType.SingleExclamation; state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later } else if (state.CurrentChar == '|') { state.CurrentTokenizationType = SqlTokenizationType.SinglePipe; state.HasUnprocessedCurrentCharacter = false; //purposefully swallowing, will be reinserted later } else if (IsCompoundableOperatorCharacter(state.CurrentChar)) { state.CurrentTokenizationType = SqlTokenizationType.SingleOtherCompoundableOperator; state.ConsumeCurrentCharacterIntoToken(); } else if (IsOperatorCharacter(state.CurrentChar)) { SaveCurrentCharToNewToken(state, SqlTokenType.OtherOperator); } else { state.CurrentTokenizationType = SqlTokenizationType.OtherNode; state.ConsumeCurrentCharacterIntoToken(); } }
public ITokenList TokenizeSQL(string inputSQL, long?requestedMarkerPosition) { var state = new TokenizationState(inputSQL, requestedMarkerPosition); state.ReadNextCharacter(); while (state.HasUnprocessedCurrentCharacter) { if (state.CurrentTokenizationType == null) { ProcessOrOpenToken(state); state.ReadNextCharacter(); continue; } switch (state.CurrentTokenizationType.Value) { case SqlTokenizationType.WhiteSpace: if (IsWhitespace(state.CurrentChar)) { state.ConsumeCurrentCharacterIntoToken(); } else { CompleteTokenAndProcessNext(state); } break; case SqlTokenizationType.SinglePeriod: if (state.CurrentChar >= '0' && state.CurrentChar <= '9') { state.CurrentTokenizationType = SqlTokenizationType.DecimalValue; state.CurrentTokenValue.Append('.'); state.ConsumeCurrentCharacterIntoToken(); } else { state.CurrentTokenValue.Append('.'); CompleteTokenAndProcessNext(state); } break; case SqlTokenizationType.SingleZero: if (state.CurrentChar == 'x' || state.CurrentChar == 'X') { state.CurrentTokenizationType = SqlTokenizationType.BinaryValue; state.CurrentTokenValue.Append('0'); state.ConsumeCurrentCharacterIntoToken(); } else if (state.CurrentChar >= '0' && state.CurrentChar <= '9') { state.CurrentTokenizationType = SqlTokenizationType.Number; state.CurrentTokenValue.Append('0'); state.ConsumeCurrentCharacterIntoToken(); } else if (state.CurrentChar == '.') { state.CurrentTokenizationType = SqlTokenizationType.DecimalValue; state.CurrentTokenValue.Append('0'); state.ConsumeCurrentCharacterIntoToken(); } else { state.CurrentTokenValue.Append('0'); CompleteTokenAndProcessNext(state); } break; case SqlTokenizationType.Number: if (state.CurrentChar == 'e' || state.CurrentChar == 'E') { state.CurrentTokenizationType = SqlTokenizationType.FloatValue; state.ConsumeCurrentCharacterIntoToken(); } else if (state.CurrentChar == '.') { state.CurrentTokenizationType = SqlTokenizationType.DecimalValue; state.ConsumeCurrentCharacterIntoToken(); } else if (state.CurrentChar >= '0' && state.CurrentChar <= '9') { state.ConsumeCurrentCharacterIntoToken(); } else { CompleteTokenAndProcessNext(state); } break; case SqlTokenizationType.DecimalValue: if (state.CurrentChar == 'e' || state.CurrentChar == 'E') { state.CurrentTokenizationType = SqlTokenizationType.FloatValue; state.ConsumeCurrentCharacterIntoToken(); } else if (state.CurrentChar >= '0' && state.CurrentChar <= '9') { state.ConsumeCurrentCharacterIntoToken(); } else { CompleteTokenAndProcessNext(state); } break; case SqlTokenizationType.FloatValue: if (state.CurrentChar >= '0' && state.CurrentChar <= '9') { state.ConsumeCurrentCharacterIntoToken(); } else if ((state.CurrentChar == '-' || state.CurrentChar == '+') && state.CurrentTokenValue.ToString().ToUpper().EndsWith("E")) { state.ConsumeCurrentCharacterIntoToken(); } else { CompleteTokenAndProcessNext(state); } break; case SqlTokenizationType.BinaryValue: if ((state.CurrentChar >= '0' && state.CurrentChar <= '9') || (state.CurrentChar >= 'A' && state.CurrentChar <= 'F') || (state.CurrentChar >= 'a' && state.CurrentChar <= 'f') ) { state.ConsumeCurrentCharacterIntoToken(); } else { CompleteTokenAndProcessNext(state); } break; case SqlTokenizationType.SingleDollar: state.CurrentTokenValue.Append('$'); if ((state.CurrentChar >= 'A' && state.CurrentChar <= 'Z') || (state.CurrentChar >= 'a' && state.CurrentChar <= 'z') ) { state.CurrentTokenizationType = SqlTokenizationType.PseudoName; } else { state.CurrentTokenizationType = SqlTokenizationType.MonetaryValue; } state.ConsumeCurrentCharacterIntoToken(); break; case SqlTokenizationType.MonetaryValue: if (state.CurrentChar >= '0' && state.CurrentChar <= '9') { state.ConsumeCurrentCharacterIntoToken(); } else if (state.CurrentChar == '-' && state.CurrentTokenValue.Length == 1) { state.ConsumeCurrentCharacterIntoToken(); } else if (state.CurrentChar == '.' && !state.CurrentTokenValue.ToString().Contains(".")) { state.ConsumeCurrentCharacterIntoToken(); } else { CompleteTokenAndProcessNext(state); } break; case SqlTokenizationType.SingleHyphen: if (state.CurrentChar == '-') { state.CurrentTokenizationType = SqlTokenizationType.SingleLineComment; state.HasUnprocessedCurrentCharacter = false; //DISCARDING the hyphen because of weird standard } else if (state.CurrentChar == '=') { state.CurrentTokenizationType = SqlTokenizationType.OtherOperator; state.CurrentTokenValue.Append('-'); AppendCharAndCompleteToken(state); } else { state.CurrentTokenizationType = SqlTokenizationType.OtherOperator; state.CurrentTokenValue.Append('-'); CompleteTokenAndProcessNext(state); } break; case SqlTokenizationType.SingleSlash: if (state.CurrentChar == '*') { state.CurrentTokenizationType = SqlTokenizationType.BlockComment; state.HasUnprocessedCurrentCharacter = false; //DISCARDING the asterisk because of weird standard state.CommentNesting++; } else if (state.CurrentChar == '/') { state.CurrentTokenizationType = SqlTokenizationType.SingleLineCommentCStyle; state.HasUnprocessedCurrentCharacter = false; //DISCARDING the slash because of weird standard } else if (state.CurrentChar == '=') { state.CurrentTokenizationType = SqlTokenizationType.OtherOperator; state.CurrentTokenValue.Append('/'); AppendCharAndCompleteToken(state); } else { state.CurrentTokenizationType = SqlTokenizationType.OtherOperator; state.CurrentTokenValue.Append('/'); CompleteTokenAndProcessNext(state); } break; case SqlTokenizationType.SingleLineComment: case SqlTokenizationType.SingleLineCommentCStyle: if (state.CurrentChar == (char)13 || state.CurrentChar == (char)10) { int nextCharInt = state.InputReader.Peek(); if (state.CurrentChar == (char)13 && nextCharInt == 10) { state.ConsumeCurrentCharacterIntoToken(); state.ReadNextCharacter(); } AppendCharAndCompleteToken(state); } else { state.ConsumeCurrentCharacterIntoToken(); } break; case SqlTokenizationType.BlockComment: if (state.CurrentChar == '*') { if (state.InputReader.Peek() == (int)'/') { state.CommentNesting--; if (state.CommentNesting > 0) { state.ConsumeCurrentCharacterIntoToken(); state.ReadNextCharacter(); state.ConsumeCurrentCharacterIntoToken(); } else { state.HasUnprocessedCurrentCharacter = false; //discarding the asterisk state.ReadNextCharacter(); //TODO: DANGER DANGER why do "contained" token types have this inconsistent handling where the delimiters are not in the value??? SwallowOutstandingCharacterAndCompleteToken(state); } } else { state.ConsumeCurrentCharacterIntoToken(); } } else { if (state.CurrentChar == '/' && state.InputReader.Peek() == (int)'*') { state.ConsumeCurrentCharacterIntoToken(); state.ReadNextCharacter(); state.ConsumeCurrentCharacterIntoToken(); state.CommentNesting++; } else { state.ConsumeCurrentCharacterIntoToken(); } } break; case SqlTokenizationType.OtherNode: case SqlTokenizationType.PseudoName: if (IsNonWordCharacter(state.CurrentChar)) { CompleteTokenAndProcessNext(state); } else { state.ConsumeCurrentCharacterIntoToken(); } break; case SqlTokenizationType.SingleN: if (state.CurrentChar == '\'') { state.CurrentTokenizationType = SqlTokenizationType.NString; state.HasUnprocessedCurrentCharacter = false; //DISCARDING the apostrophe because of weird standard } else { if (IsNonWordCharacter(state.CurrentChar)) { CompleteTokenAndProcessNext(state); } else { state.CurrentTokenizationType = SqlTokenizationType.OtherNode; state.CurrentTokenValue.Append('N'); state.ConsumeCurrentCharacterIntoToken(); } } break; case SqlTokenizationType.NString: case SqlTokenizationType.String: if (state.CurrentChar == '\'') { if (state.InputReader.Peek() == (int)'\'') { //add the character (once) state.ConsumeCurrentCharacterIntoToken(); //throw away the second character... because (for some reason?) we're storing the effective value" rather than the raw token... state.DiscardNextCharacter(); } else { //TODO: DANGER DANGER why do "contained" token types have this inconsistent handling where the delimiters are not in the value??? SwallowOutstandingCharacterAndCompleteToken(state); } } else { state.ConsumeCurrentCharacterIntoToken(); } break; case SqlTokenizationType.QuotedString: if (state.CurrentChar == '"') { if (state.InputReader.Peek() == (int)'"') { //add the character (once) state.ConsumeCurrentCharacterIntoToken(); //throw away the second character... because (for some reason?) we're storing the effective value" rather than the raw token... state.DiscardNextCharacter(); } else { //TODO: DANGER DANGER why do "contained" token types have this inconsistent handling where the delimiters are not in the value??? SwallowOutstandingCharacterAndCompleteToken(state); } } else { state.ConsumeCurrentCharacterIntoToken(); } break; case SqlTokenizationType.BracketQuotedName: if (state.CurrentChar == ']') { if (state.InputReader.Peek() == (int)']') { //add the character (once) state.ConsumeCurrentCharacterIntoToken(); //throw away the second character... because (for some reason?) we're storing the effective value" rather than the raw token... state.DiscardNextCharacter(); } else { //TODO: DANGER DANGER why do "contained" token types have this inconsistent handling where the delimiters are not in the value??? SwallowOutstandingCharacterAndCompleteToken(state); } } else { state.ConsumeCurrentCharacterIntoToken(); } break; case SqlTokenizationType.SingleLT: state.CurrentTokenValue.Append('<'); state.CurrentTokenizationType = SqlTokenizationType.OtherOperator; if (state.CurrentChar == '=' || state.CurrentChar == '>' || state.CurrentChar == '<') { AppendCharAndCompleteToken(state); } else { CompleteTokenAndProcessNext(state); } break; case SqlTokenizationType.SingleGT: state.CurrentTokenValue.Append('>'); state.CurrentTokenizationType = SqlTokenizationType.OtherOperator; if (state.CurrentChar == '=' || state.CurrentChar == '>') { AppendCharAndCompleteToken(state); } else { CompleteTokenAndProcessNext(state); } break; case SqlTokenizationType.SingleAsterisk: state.CurrentTokenValue.Append('*'); if (state.CurrentChar == '=') { state.CurrentTokenizationType = SqlTokenizationType.OtherOperator; AppendCharAndCompleteToken(state); } else { CompleteTokenAndProcessNext(state); } break; case SqlTokenizationType.SingleOtherCompoundableOperator: state.CurrentTokenizationType = SqlTokenizationType.OtherOperator; if (state.CurrentChar == '=') { AppendCharAndCompleteToken(state); } else { CompleteTokenAndProcessNext(state); } break; case SqlTokenizationType.SinglePipe: state.CurrentTokenizationType = SqlTokenizationType.OtherOperator; state.CurrentTokenValue.Append('|'); if (state.CurrentChar == '=' || state.CurrentChar == '|') { AppendCharAndCompleteToken(state); } else { CompleteTokenAndProcessNext(state); } break; case SqlTokenizationType.SingleEquals: state.CurrentTokenValue.Append('='); if (state.CurrentChar == '=') { AppendCharAndCompleteToken(state); } else { CompleteTokenAndProcessNext(state); } break; case SqlTokenizationType.SingleExclamation: state.CurrentTokenValue.Append('!'); if (state.CurrentChar == '=' || state.CurrentChar == '<' || state.CurrentChar == '>') { state.CurrentTokenizationType = SqlTokenizationType.OtherOperator; AppendCharAndCompleteToken(state); } else { state.CurrentTokenizationType = SqlTokenizationType.OtherNode; CompleteTokenAndProcessNext(state); } break; default: throw new Exception("In-progress node unrecognized!"); } state.ReadNextCharacter(); } if (state.CurrentTokenizationType != null) { if (state.CurrentTokenizationType.Value == SqlTokenizationType.BlockComment || state.CurrentTokenizationType.Value == SqlTokenizationType.String || state.CurrentTokenizationType.Value == SqlTokenizationType.NString || state.CurrentTokenizationType.Value == SqlTokenizationType.QuotedString || state.CurrentTokenizationType.Value == SqlTokenizationType.BracketQuotedName ) { state.TokenContainer.HasUnfinishedToken = true; } SwallowOutstandingCharacterAndCompleteToken(state); } return(state.TokenContainer); }