static public IList<SqlToken> ExtractTokens(Char[] charsToEvaluate, int oneBasedLineNumber, int oneBasedStartCharacterIndex, SqlToken openTokenIfAny) { if (openTokenIfAny != null) { if (openTokenIfAny.TokenType == TokenTypes.BlockCommentStart) { return ExtractBlockCommentTokens(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex, openTokenIfAny); } else if (openTokenIfAny.TokenType == TokenTypes.StringStart) { return ExtractStringTokens(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex, openTokenIfAny); } else if (openTokenIfAny.TokenType == TokenTypes.OpenBracket) { return ExtractBracketizedTokens(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex); } throw new InvalidOperationException("Unexpected open token type."); } else if (isWhitespace(charsToEvaluate)) { return ExtractWhitespaceTokens(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex); } else if (isLineCommentStart(charsToEvaluate)) { return ExtractLineCommentTokens(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex); } else if (isBlockCommentStart(charsToEvaluate)) { return ExtractBlockCommentTokens(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex); } else if (isStringStart(charsToEvaluate)) { return ExtractStringTokens(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex); } else if (isOpenBracket(charsToEvaluate)) { return ExtractBracketizedTokens(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex); } else if (isPunctuation(charsToEvaluate)) { return ExtractPunctuationToken(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex); } else { string keyword = SqlKeyWords.GetSqlKeyWord(charsToEvaluate); if (keyword != "") { return ExtractKeywordToken(oneBasedLineNumber, oneBasedStartCharacterIndex, keyword); } } return ExtractUnknownToken(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex); }
private static IList<SqlToken> ExtractStringTokens(Char[] charsToEvaluate, int oneBasedLineNumber, int oneBasedStartCharacterIndex, SqlToken openTokenIfAny = null) { List<SqlToken> tokens = new List<SqlToken>(); int offset = 0; SqlToken t; if (isStringStart(charsToEvaluate) && !weAreAlreadyInAnOpenString(openTokenIfAny)) { t = new SqlToken(TokenTypes.StringStart, oneBasedLineNumber, oneBasedStartCharacterIndex); t.Text = QUOTE_TOKEN; offset += QUOTE_TOKEN.Length; tokens.Add(t); } if (thereAreStillCharactersRemaining(charsToEvaluate, offset)) { StringBuilder remainingChars = new StringBuilder(charsToEvaluate.Length - offset); for (int i = offset; i < charsToEvaluate.Length; i += 1) { if (isEscapedQuote(charsToEvaluate, i)) { remainingChars.Append("'"); i += 1; } else if (isStringEnd(charsToEvaluate, i)) { var body = remainingChars.ToString(); if (body.Length > 0) { t = new SqlToken(TokenTypes.StringBody, oneBasedLineNumber, oneBasedStartCharacterIndex + offset); t.Text = body; tokens.Add(t); } t = new SqlToken(TokenTypes.StringEnd, oneBasedLineNumber, oneBasedStartCharacterIndex + i); t.Text = QUOTE_TOKEN; tokens.Add(t); return tokens; } else { remainingChars.Append(charsToEvaluate[i]); } } //We didn't find a string end (*/) //hack: fix this. foreach (var s in tokens.Where(tok => tok.TokenType == TokenTypes.StringStart)) { s.IsOpen = true; } t = new SqlToken(TokenTypes.StringBody, oneBasedLineNumber, oneBasedStartCharacterIndex + offset); t.Text = remainingChars.ToString(); tokens.Add(t); return tokens; } //hack: fix this. foreach (var s in tokens.Where(tok => tok.TokenType == TokenTypes.StringStart)) { s.IsOpen = true; } return tokens; }
private static IList<SqlToken> ExtractBracketizedTokens(Char[] charsToEvaluate, int oneBasedLineNumber, int oneBasedStartCharacterIndex, SqlToken openTokenIfAny = null) { List<SqlToken> tokens = new List<SqlToken>(); int offset = 0; SqlToken t; if (isOpenBracket(charsToEvaluate) && !weAreAlreadyInAnOpenBracket(openTokenIfAny)) { t = new SqlToken(TokenTypes.OpenBracket, oneBasedLineNumber, oneBasedStartCharacterIndex); t.Text = "["; offset += "[".Length; tokens.Add(t); } if (thereAreStillCharactersRemaining(charsToEvaluate, offset)) { StringBuilder remainingChars = new StringBuilder(charsToEvaluate.Length - offset); for (int i = offset; i < charsToEvaluate.Length; i += 1) { if (isEscapedCloseBracket(charsToEvaluate, i)) { remainingChars.Append("]"); i += 1; } else if (isCloseBracket(charsToEvaluate, i)) { t = new SqlToken(TokenTypes.BracketBody, oneBasedLineNumber, oneBasedStartCharacterIndex + offset); t.Text = remainingChars.ToString(); tokens.Add(t); t = new SqlToken(TokenTypes.CloseBracket, oneBasedLineNumber, oneBasedStartCharacterIndex + i); t.Text = "]"; tokens.Add(t); return tokens; } else { remainingChars.Append(charsToEvaluate[i]); } } //We didn't find a close bracket. foreach (var b in tokens.Where(tok => tok.TokenType == TokenTypes.OpenBracket)) { b.IsOpen = true; } t = new SqlToken(TokenTypes.BracketBody, oneBasedLineNumber, oneBasedStartCharacterIndex + offset); t.Text = new String(charsToEvaluate, offset, charsToEvaluate.Length - offset); tokens.Add(t); return tokens; } foreach (var b in tokens.Where(tok => tok.TokenType == TokenTypes.OpenBracket)) { b.IsOpen = true; } return tokens; }
private static IList<SqlToken> ExtractBlockCommentTokens(Char[] charsToEvaluate, int oneBasedLineNumber, int oneBasedStartCharacterIndex, SqlToken openTokenIfAny = null) { List<SqlToken> tokens = new List<SqlToken>(); int offset = 0; SqlToken t; if (isBlockCommentStart(charsToEvaluate) && !weAreAlreadyInAnOpenBlockComment(openTokenIfAny)) { t = new SqlToken(TokenTypes.BlockCommentStart, oneBasedLineNumber, oneBasedStartCharacterIndex); t.Text = BLOCK_COMMENT_START; offset += BLOCK_COMMENT_START.Length; tokens.Add(t); } if (charsToEvaluate.Length > offset) { for (int i = offset; i < charsToEvaluate.Length - 1; i += 1) { if (isBlockCommentEnd(charsToEvaluate, i)) { t = new SqlToken(TokenTypes.BlockCommentBody, oneBasedLineNumber, oneBasedStartCharacterIndex + offset); t.Text = new String(charsToEvaluate, offset, i-offset); tokens.Add(t); t = new SqlToken(TokenTypes.BlockCommentEnd, oneBasedLineNumber, i + 1); t.Text = BLOCK_COMMENT_END; tokens.Add(t); return tokens; } } //We didn't find a block comment end (*/) //hack: fix this. var blockCommentStart = tokens.Where(tok => tok.TokenType == TokenTypes.BlockCommentStart); foreach (var bcs in blockCommentStart) { bcs.IsOpen = true; } t = new SqlToken(TokenTypes.BlockCommentBody, oneBasedLineNumber, oneBasedStartCharacterIndex + offset); t.Text = new String(charsToEvaluate, offset, charsToEvaluate.Length - offset); tokens.Add(t); return tokens; } //hack: fix this. var blockCommentStart1 = tokens.Where(tok => tok.TokenType == TokenTypes.BlockCommentStart); foreach (var bcs in blockCommentStart1) { bcs.IsOpen = true; } return tokens; }
private static bool weAreAlreadyInAnOpenString(SqlToken openTokenIfAny = null) { return (openTokenIfAny != null && openTokenIfAny.TokenType == TokenTypes.StringStart && openTokenIfAny.IsOpen); }
private static bool weAreAlreadyInAnOpenBracket(SqlToken openTokenIfAny = null) { return (openTokenIfAny != null && openTokenIfAny.TokenType == TokenTypes.OpenBracket && openTokenIfAny.IsOpen); }
private static bool weAreAlreadyInAnOpenBracket(SqlToken openTokenIfAny = null) { return(openTokenIfAny != null && openTokenIfAny.TokenType == TokenTypes.OpenBracket && openTokenIfAny.IsOpen); }
private static IList<SqlToken> ExtractPunctuationToken(Char[] charsToEvaluate, int oneBasedLineNumber, int oneBasedStartCharacterIndex) { TokenTypes? tt = punctuationTokenType(charsToEvaluate[0]); if (!tt.HasValue) { throw new ArgumentException("Called Extract Punctuation Token without passing punctuation."); } List<SqlToken> tokens = new List<SqlToken>(); SqlToken t = new SqlToken(tt.Value, oneBasedLineNumber, oneBasedStartCharacterIndex); t.Text = new String(charsToEvaluate,0,1); tokens.Add(t); return tokens; }
private static IList<SqlToken> ExtractKeywordToken(int oneBasedLineNumber, int oneBasedStartCharacterIndex, string keyword) { List<SqlToken> tokens = new List<SqlToken>(); var t = new SqlToken(TokenTypes.Keyword, oneBasedLineNumber, oneBasedStartCharacterIndex); t.Text = keyword; tokens.Add(t); return tokens; }
/// <summary> /// Tokenizes the SQL statements in the stream /// </summary> /// <param name="sqlStream">A stream of the SQL statements to parse.</param> /// <param name="encoding">System.Text.Encoding of the stream. If not specified, defaults to UTF8.</param> /// <returns>The IList of tokens in the SQL stream</returns> static public async Task<IList<SqlToken>> TokenizeAsync(Stream sqlStream, Encoding encoding = null) { if (encoding == null) { encoding = Encoding.UTF8; //best guess } List<SqlToken> tokens = new List<SqlToken>(); StreamReader sql = new StreamReader(sqlStream, encoding); int lineNumber = 0; SqlToken openToken = null; int openTokenCount = 0; do { string line = await sql.ReadLineAsync(); if (line == null) { return tokens; } lineNumber += 1; int charIndex = 0; while (charIndex < line.Length) { List<SqlToken> newTokens = SqlToken.ExtractTokens(line.Substring(charIndex, line.Length - charIndex).ToCharArray(), lineNumber, charIndex + 1, openToken).ToList<SqlToken>(); charIndex += LengthOfTokens(newTokens); foreach(SqlToken t in newTokens) { if (t.IsOpen) { openTokenCount += 1; Debug.Assert(openTokenCount < 2, "There should only ever be 0 or 1 open tokens."); openToken = t; } } if (openTokenCount == 0) { openToken = null; } else { foreach (var t in newTokens) { if (t.TokenType == SqlToken.TokenTypes.BlockCommentEnd && openToken.TokenType == SqlToken.TokenTypes.BlockCommentStart) { openTokenCount -= 1; openToken.IsOpen = false; openToken = null; } else if (t.TokenType == SqlToken.TokenTypes.StringEnd && openToken.TokenType == SqlToken.TokenTypes.StringStart) { openTokenCount -= 1; openToken.IsOpen = false; openToken = null; } } } tokens.AddRange(newTokens); } if (!sql.EndOfStream) { var newLineToken = new SqlToken(SqlToken.TokenTypes.Newline, lineNumber, line.Length + 1); newLineToken.Text = "\r\n"; tokens.Add(newLineToken); } } while (!sql.EndOfStream); return tokens; }
static public IList <SqlToken> ExtractTokens(Char[] charsToEvaluate, int oneBasedLineNumber, int oneBasedStartCharacterIndex, SqlToken openTokenIfAny) { if (openTokenIfAny != null) { if (openTokenIfAny.TokenType == TokenTypes.BlockCommentStart) { return(ExtractBlockCommentTokens(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex, openTokenIfAny)); } else if (openTokenIfAny.TokenType == TokenTypes.StringStart) { return(ExtractStringTokens(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex, openTokenIfAny)); } else if (openTokenIfAny.TokenType == TokenTypes.OpenBracket) { return(ExtractBracketizedTokens(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex)); } throw new InvalidOperationException("Unexpected open token type."); } else if (isWhitespace(charsToEvaluate)) { return(ExtractWhitespaceTokens(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex)); } else if (isLineCommentStart(charsToEvaluate)) { return(ExtractLineCommentTokens(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex)); } else if (isBlockCommentStart(charsToEvaluate)) { return(ExtractBlockCommentTokens(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex)); } else if (isStringStart(charsToEvaluate)) { return(ExtractStringTokens(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex)); } else if (isOpenBracket(charsToEvaluate)) { return(ExtractBracketizedTokens(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex)); } else if (isPunctuation(charsToEvaluate)) { return(ExtractPunctuationToken(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex)); } else { string keyword = SqlKeyWords.GetSqlKeyWord(charsToEvaluate); if (keyword != "") { return(ExtractKeywordToken(oneBasedLineNumber, oneBasedStartCharacterIndex, keyword)); } } return(ExtractUnknownToken(charsToEvaluate, oneBasedLineNumber, oneBasedStartCharacterIndex)); }
private static IList <SqlToken> ExtractStringTokens(Char[] charsToEvaluate, int oneBasedLineNumber, int oneBasedStartCharacterIndex, SqlToken openTokenIfAny = null) { List <SqlToken> tokens = new List <SqlToken>(); int offset = 0; SqlToken t; if (isStringStart(charsToEvaluate) && !weAreAlreadyInAnOpenString(openTokenIfAny)) { t = new SqlToken(TokenTypes.StringStart, oneBasedLineNumber, oneBasedStartCharacterIndex); t.Text = QUOTE_TOKEN; offset += QUOTE_TOKEN.Length; tokens.Add(t); } if (thereAreStillCharactersRemaining(charsToEvaluate, offset)) { StringBuilder remainingChars = new StringBuilder(charsToEvaluate.Length - offset); for (int i = offset; i < charsToEvaluate.Length; i += 1) { if (isEscapedQuote(charsToEvaluate, i)) { remainingChars.Append("'"); i += 1; } else if (isStringEnd(charsToEvaluate, i)) { var body = remainingChars.ToString(); if (body.Length > 0) { t = new SqlToken(TokenTypes.StringBody, oneBasedLineNumber, oneBasedStartCharacterIndex + offset); t.Text = body; tokens.Add(t); } t = new SqlToken(TokenTypes.StringEnd, oneBasedLineNumber, oneBasedStartCharacterIndex + i); t.Text = QUOTE_TOKEN; tokens.Add(t); return(tokens); } else { remainingChars.Append(charsToEvaluate[i]); } } //We didn't find a string end (*/) //hack: fix this. foreach (var s in tokens.Where(tok => tok.TokenType == TokenTypes.StringStart)) { s.IsOpen = true; } t = new SqlToken(TokenTypes.StringBody, oneBasedLineNumber, oneBasedStartCharacterIndex + offset); t.Text = remainingChars.ToString(); tokens.Add(t); return(tokens); } //hack: fix this. foreach (var s in tokens.Where(tok => tok.TokenType == TokenTypes.StringStart)) { s.IsOpen = true; } return(tokens); }
private static IList <SqlToken> ExtractBracketizedTokens(Char[] charsToEvaluate, int oneBasedLineNumber, int oneBasedStartCharacterIndex, SqlToken openTokenIfAny = null) { List <SqlToken> tokens = new List <SqlToken>(); int offset = 0; SqlToken t; if (isOpenBracket(charsToEvaluate) && !weAreAlreadyInAnOpenBracket(openTokenIfAny)) { t = new SqlToken(TokenTypes.OpenBracket, oneBasedLineNumber, oneBasedStartCharacterIndex); t.Text = "["; offset += "[".Length; tokens.Add(t); } if (thereAreStillCharactersRemaining(charsToEvaluate, offset)) { StringBuilder remainingChars = new StringBuilder(charsToEvaluate.Length - offset); for (int i = offset; i < charsToEvaluate.Length; i += 1) { if (isEscapedCloseBracket(charsToEvaluate, i)) { remainingChars.Append("]"); i += 1; } else if (isCloseBracket(charsToEvaluate, i)) { t = new SqlToken(TokenTypes.BracketBody, oneBasedLineNumber, oneBasedStartCharacterIndex + offset); t.Text = remainingChars.ToString(); tokens.Add(t); t = new SqlToken(TokenTypes.CloseBracket, oneBasedLineNumber, oneBasedStartCharacterIndex + i); t.Text = "]"; tokens.Add(t); return(tokens); } else { remainingChars.Append(charsToEvaluate[i]); } } //We didn't find a close bracket. foreach (var b in tokens.Where(tok => tok.TokenType == TokenTypes.OpenBracket)) { b.IsOpen = true; } t = new SqlToken(TokenTypes.BracketBody, oneBasedLineNumber, oneBasedStartCharacterIndex + offset); t.Text = new String(charsToEvaluate, offset, charsToEvaluate.Length - offset); tokens.Add(t); return(tokens); } foreach (var b in tokens.Where(tok => tok.TokenType == TokenTypes.OpenBracket)) { b.IsOpen = true; } return(tokens); }
private static IList <SqlToken> ExtractBlockCommentTokens(Char[] charsToEvaluate, int oneBasedLineNumber, int oneBasedStartCharacterIndex, SqlToken openTokenIfAny = null) { List <SqlToken> tokens = new List <SqlToken>(); int offset = 0; SqlToken t; if (isBlockCommentStart(charsToEvaluate) && !weAreAlreadyInAnOpenBlockComment(openTokenIfAny)) { t = new SqlToken(TokenTypes.BlockCommentStart, oneBasedLineNumber, oneBasedStartCharacterIndex); t.Text = BLOCK_COMMENT_START; offset += BLOCK_COMMENT_START.Length; tokens.Add(t); } if (charsToEvaluate.Length > offset) { for (int i = offset; i < charsToEvaluate.Length - 1; i += 1) { if (isBlockCommentEnd(charsToEvaluate, i)) { t = new SqlToken(TokenTypes.BlockCommentBody, oneBasedLineNumber, oneBasedStartCharacterIndex + offset); t.Text = new String(charsToEvaluate, offset, i - offset); tokens.Add(t); t = new SqlToken(TokenTypes.BlockCommentEnd, oneBasedLineNumber, i + 1); t.Text = BLOCK_COMMENT_END; tokens.Add(t); return(tokens); } } //We didn't find a block comment end (*/) //hack: fix this. var blockCommentStart = tokens.Where(tok => tok.TokenType == TokenTypes.BlockCommentStart); foreach (var bcs in blockCommentStart) { bcs.IsOpen = true; } t = new SqlToken(TokenTypes.BlockCommentBody, oneBasedLineNumber, oneBasedStartCharacterIndex + offset); t.Text = new String(charsToEvaluate, offset, charsToEvaluate.Length - offset); tokens.Add(t); return(tokens); } //hack: fix this. var blockCommentStart1 = tokens.Where(tok => tok.TokenType == TokenTypes.BlockCommentStart); foreach (var bcs in blockCommentStart1) { bcs.IsOpen = true; } return(tokens); }
private static bool weAreAlreadyInAnOpenString(SqlToken openTokenIfAny = null) { return(openTokenIfAny != null && openTokenIfAny.TokenType == TokenTypes.StringStart && openTokenIfAny.IsOpen); }
private static IList<SqlToken> ExtractLineCommentTokens(Char[] charsToEvaluate, int oneBasedLineNumber, int oneBasedStartCharacterIndex) { List<SqlToken> tokens = new List<SqlToken>(); SqlToken t; t = new SqlToken(TokenTypes.LineCommentStart, oneBasedLineNumber, oneBasedStartCharacterIndex); t.Text = LINE_COMMENT_TOKEN; tokens.Add(t); if (thereAreStillCharactersRemaining(charsToEvaluate, LINE_COMMENT_TOKEN.Length)) { t = new SqlToken(TokenTypes.LineCommentBody, oneBasedLineNumber, oneBasedStartCharacterIndex + LINE_COMMENT_TOKEN.Length); t.Text = new String(charsToEvaluate, 2, charsToEvaluate.Length - LINE_COMMENT_TOKEN.Length); tokens.Add(t); } return tokens; }
private static IList<SqlToken> ExtractWhitespaceTokens(Char[] charsToEvaluate, int oneBasedLineNumber, int oneBasedStartCharacterIndex) { List<SqlToken> tokens = new List<SqlToken>(); var t = new SqlToken(TokenTypes.Whitespace, oneBasedLineNumber, oneBasedStartCharacterIndex); for (int i = 1; i < charsToEvaluate.Length; i += 1) { if (!isWhitespace(charsToEvaluate[i])) { t.Text = new String(charsToEvaluate, 0, i); tokens.Add(t); return tokens; } } t.Text = new String(charsToEvaluate); tokens.Add(t); return tokens; }
private static IList<SqlToken> ExtractUnknownToken(Char[] charsToEvaluate, int oneBasedLineNumber, int oneBasedStartCharacterIndex) { List<SqlToken> tokens = new List<SqlToken>(); int unknownTokenEndIndex = -1; for (int charIndex = 0; charIndex < charsToEvaluate.Length; charIndex+=1) { if (isWhitespace(charsToEvaluate[charIndex]) || isLineCommentStart(charsToEvaluate, charIndex) || isBlockCommentStart(charsToEvaluate, charIndex) || isStringStart(charsToEvaluate,charIndex) || isPunctuation(charsToEvaluate,charIndex)) { unknownTokenEndIndex = charIndex - 1; break; } } var t = new SqlToken(TokenTypes.Unknown, oneBasedLineNumber, oneBasedStartCharacterIndex); if (unknownTokenEndIndex == -1) { t.Text = new String(charsToEvaluate); } else { t.Text = new String(charsToEvaluate, 0, unknownTokenEndIndex + 1); } tokens.Add(t); return tokens; }
private static bool weAreAlreadyInAnOpenBlockComment(SqlToken openTokenIfAny = null) { return (openTokenIfAny != null && openTokenIfAny.TokenType == TokenTypes.BlockCommentStart && openTokenIfAny.IsOpen); }
private static bool weAreAlreadyInAnOpenBlockComment(SqlToken openTokenIfAny = null) { return(openTokenIfAny != null && openTokenIfAny.TokenType == TokenTypes.BlockCommentStart && openTokenIfAny.IsOpen); }