示例#1
0
        /// <summary>
        /// Tokenizes the specified script.
        /// </summary>
        /// <param name="script">The script.</param>
        /// <returns></returns>
        public static TokenSet Tokenize(string script)
        {
            parserFile = -1;

            Token    seed  = new Token(script, TokenType.Unknown, 0);
            TokenSet start = new TokenSet();

            start.Add(seed);
            DisplayHTMLParseStep(start, "Starting set", true);

            //pull out tokens for comments, strings, escaped names, etc
            TokenSet escapedTokens = TokenizeDelimited(seed);

            //pull apart everything else on whitespace
            TokenSet whitespaceTokens = new TokenSet();

            foreach (Token token in escapedTokens)
            {
                if (token.Type == TokenType.Unknown)
                {
                    //make life easier by creating optional whitespace
                    string spread = token.Value;
                    foreach (char op in (OPERATORS + ",.();").ToCharArray())
                    {
                        spread = spread.Replace(op.ToString(), " " + op + " ");
                    }
                    int offset = 0;
                    foreach (string piece in spread.Split(" \t\r\n".ToCharArray()))
                    {
                        if (piece.Length == 0)
                        {
                            continue;
                        }

                        offset = token.Value.IndexOf(piece, offset);
                        whitespaceTokens.Add(new Token(piece, TokenType.Unknown, token.StartIndex + offset));
                        offset += piece.Length;                        //don't find the same text twice if it's repeated
                    }
                }
                else
                {
                    whitespaceTokens.Add(token);
                }
            }

            //remove bogus tokens
            TokenSet        finalTokens = new TokenSet();
            TokenEnumerator enumerator  = whitespaceTokens.GetEnumerator();

            while (enumerator.MoveNext())
            {
                //empty tokens
                if (enumerator.Current.Value == "")
                {
                    continue;
                }

                //bogus unicode string markings
                if (enumerator.Current.Value == "N" && enumerator.Next != null && enumerator.Next.Type == TokenType.StringValue)
                {
                    continue;
                }

                finalTokens.Add(enumerator.Current);
            }
            DisplayHTMLParseStep(finalTokens, "After empty tokens removed", false);

            //comments gum things up
            RemoveComments(finalTokens);
            DisplayHTMLParseStep(finalTokens, "After comments removed", false);

            //Categorization
            IdentifySpecialTokens(finalTokens);
            DisplayHTMLParseStep(finalTokens, "After identifying special tokens", false);

            IdentifyRemainingTokens(finalTokens);
            DisplayHTMLParseStep(finalTokens, "After identifying remaining tokens", false);

            //associate the tokens with each other
            finalTokens = CreateTree(finalTokens);
            DisplayHTMLParseStep(finalTokens, "After tree creation", false);

            return(finalTokens);
        }
示例#2
0
        private static void CreateTree_Operator(TokenSet tokens)
        {
            //work on operators
            TokenEnumerator enumerator = tokens.GetEnumerator();

            while (enumerator.MoveNext())
            {
                //make sure this is the start of a new group
                Token starter = enumerator.Current;
                if (starter.Type != TokenType.Operator || starter.Children.Count > 0)
                {
                    CreateTree_Operator(starter.Children);
                    continue;
                }

                Token previous = null;
                if (enumerator.Previous != null && starter.Value != "!" && starter.Value != "~")
                {
                    previous = enumerator.Previous;
                }
                Token next = enumerator.Next;

                //don't bury keywords in the tree
                if (previous != null && (previous.Type == TokenType.Keyword || previous.Type == TokenType.Separator))
                {
                    CreateTree_Operator(starter.Children);
                    continue;
                }

                //add previous operand if not unary (don't remove - screws up adding of next operand)
                if (previous != null && previous.Type != TokenType.Comment && previous.Type != TokenType.Keyword &&
                    previous.Type != TokenType.StringValue)
                {
                    starter.Children.Add(previous);
                }
                else
                {
                    previous = null;
                }

                //add next operand
                while (next != null)
                {
                    starter.Children.Add(next);
                    enumerator.RemoveNext();

                    if (next.Type == TokenType.Operator)
                    {
                        next = enumerator.Next;
                    }
                    else
                    {
                        next = null;
                    }
                }

                //remove any previous operand
                if (previous != null)
                {
                    enumerator.RemovePrevious();
                }

                //make a tree of those children too
                CreateTree_Operator(starter.Children);
            }
        }
示例#3
0
        private static void IdentifySpecialTokens(TokenSet tokens)
        {
            //identify the tokens
            foreach (Token token in tokens)
            {
                //only work on unidentified tokens
                if (token.Type != TokenType.Unknown)
                {
                    continue;
                }

                //identify the operators
                if (token.Value.Length == 1 && OPERATORS.IndexOf(token.Value) > -1)
                {
                    token.Type = TokenType.Operator;
                    continue;
                }

                //identify variables
                if (token.Value.StartsWith("@"))
                {
                    token.Type = TokenType.Variable;
                    continue;
                }

                //pull other types
                switch (token.Value.ToLower())
                {
                case ".":
                    token.Type = TokenType.Dot;
                    break;

                case ";":
                    token.Type = TokenType.Semicolon;
                    break;

                case "begin":
                case "(":
                    token.Type = TokenType.GroupBegin;
                    break;

                case "case":
                    token.Type = TokenType.CaseStatement;
                    break;

                case "end":
                case ")":
                    token.Type = TokenType.GroupEnd;
                    break;

                case "add":
                case "alter":
                case "and":
                case "clustered":
                case "collate":
                case "constraint":
                case "create":
                case "default":
                case "drop":
                case "else":
                case "exists":
                case "for":
                case "from":
                case "function":
                case "go":
                case "identity":
                case "if":
                case "in":
                case "index":
                case "is":
                case "key":
                case "nocheck":
                case "nonclustered":
                case "not":
                case "null":
                case "on":
                case "or":
                case "proc":
                case "procedure":
                case "primary":
                case "select":
                case "table":
                case "tran":
                case "transaction":
                case "trigger":
                case "then":
                case "unique":
                case "view":
                case "when":
                case "where":
                case "with":
                    token.Type = TokenType.Keyword;
                    break;

                case ",":
                    token.Type = TokenType.Separator;
                    break;

                case "'":
                case "\"":
                    token.Type = TokenType.Quote;
                    break;
                }
            }

            //fix misidentified tokens
            TokenEnumerator enumerator = tokens.GetEnumerator();

            while (enumerator.MoveNext())
            {
                Token previous = enumerator.Previous;
                Token current  = enumerator.Current;
                Token next     = enumerator.Next;

                if (current.Type == TokenType.GroupBegin && next.Type == TokenType.Keyword &&
                    current.Value.ToLower() == "begin" && next.Value.ToLower().StartsWith("tran"))
                {
                    current.Type = TokenType.Keyword;
                }
                else if (previous != null && current.Type == TokenType.GroupBegin &&
                         previous.Type == TokenType.Quote && next.Type == TokenType.Quote)
                {
                    current.Type = TokenType.StringValue;
                }
            }

            //coalese operators and things with dots
            enumerator = tokens.GetEnumerator();
            enumerator.MoveLast();
            while (enumerator.MovePrevious())
            {
                Token previous = enumerator.Previous;
                Token current  = enumerator.Current;
                Token next     = enumerator.Next;

                //do the coalesce but don't screw up +5 * -2
                if (next != null && current.Type == TokenType.Operator && next.Type == TokenType.Operator &&
                    next.Value != "-" && next.Value != "+")
                {
                    current.Value = current.Value + next.Value;
                    enumerator.RemoveNext();

                    continue;
                }
                else if (current.Type == TokenType.Dot && previous != null && next != null &&
                         (previous.Type == TokenType.Unknown || previous.Type == TokenType.Identifier || previous.Type == TokenType.Dot) &&
                         (next.Type == TokenType.Unknown || next.Type == TokenType.Identifier || next.Type == TokenType.Dot))
                {
                    current.StartIndex = previous.StartIndex;
                    current.Value      = previous.FlattenTree() + "." + next.FlattenTree();
                    enumerator.RemovePrevious();
                    enumerator.RemoveNext();

                    continue;
                }
            }
        }
示例#4
0
        private static void CreateTree_Grouping(TokenSet tokens)
        {
            //start with grouping constructs
            TokenEnumerator enumerator = tokens.GetEnumerator();

            while (enumerator.MoveNext())
            {
                //make sure this is the start of a group
                if (enumerator.Current.Type != TokenType.GroupBegin)
                {
                    continue;
                }

                //pull in all children
                Stack <Token> GroupStarters = new Stack <Token>();
                GroupStarters.Push(enumerator.Current);

                //push the group under its predecessor for functions...
                if (enumerator.Previous != null && enumerator.Previous.Children.Count == 0 &&
                    (enumerator.Previous.Type == TokenType.Identifier || enumerator.Previous.Type == TokenType.Unknown))
                {
                    enumerator.Previous.Children.Add(enumerator.Current);
                    enumerator.RemoveCurrent();
                }

                while (GroupStarters.Count > 0)
                {
                    enumerator.MoveNext();
                    if (!enumerator.IsValid)
                    {
                        throw new ApplicationException("Unclosed " + GroupStarters.Peek().Value);
                    }

                    Token child = enumerator.Current;
                    if (child == null)
                    {
                        throw new ApplicationException("Unclosed " + GroupStarters.Peek().Value);
                    }
                    enumerator.RemoveCurrent();

                    Token group = GroupStarters.Peek();
                    Token last  = group.Children.Count > 0 ? group.Children.Last : null;
                    if (last != null && last.Children.Count == 0 &&
                        (last.Type == TokenType.Identifier || last.Type == TokenType.Unknown) &&
                        (child.Type == TokenType.GroupBegin || child.Type == TokenType.CaseStatement))
                    {
                        //push the group under its predecessor for functions...
                        last.Children.Add(child);
                    }
                    else
                    {
                        group.Children.Add(child);
                    }

                    if (child.Type == TokenType.GroupBegin || child.Type == TokenType.CaseStatement)
                    {
                        GroupStarters.Push(child);
                    }
                    else if (child.Type == TokenType.GroupEnd)
                    {
                        GroupStarters.Pop();
                    }
                }
            }
        }