Пример #1
0
        private DimStatement.DimVariable translateRawVariableData(List <IToken> tokens)
        {
            if (tokens == null)
            {
                throw new ArgumentNullException("tokens");
            }
            if (tokens.Count == 0)
            {
                throw new ArgumentException("zero tokens - invalid");
            }
            foreach (IToken token in tokens)
            {
                if (token == null)
                {
                    throw new Exception("Invalid token - null");
                }
                if ((!(token is AtomToken)) && (!(token is DateLiteralToken)) && (!(token is StringToken)))
                {
                    throw new Exception("Invalid token - not AtomToken or StringToken");
                }
            }

            // Get name (if no other content, we're all done!)
            var nameToken = tokens[0];

            if (tokens.Count == 1)
            {
                return(new DimStatement.DimVariable(new NameToken(nameToken.Content, nameToken.LineIndex), null));
            }

            // Ensure next token and last token are "(" and ")"
            if (tokens.Count == 2)
            {
                throw new Exception("Invalid token sequence");
            }
            if ((tokens[1].Content != "(") || (tokens[tokens.Count - 1].Content != ")"))
            {
                throw new Exception("Invalid token sequence");
            }

            // If there were only three tokens, we're all done!
            if (tokens.Count == 3)
            {
                return(new DimStatement.DimVariable(new NameToken(nameToken.Content, nameToken.LineIndex), new List <Expression>()));
            }

            // Use base.getEntryList to be flexible and grab dimension declarations
            // as Statement instances
            List <Expression>     dimensions    = new List <Expression>();
            List <List <IToken> > dimStatements = base.getEntryList(tokens, 2, AtomToken.GetNewToken(")", nameToken.LineIndex));

            foreach (List <IToken> dimStatement in dimStatements)
            {
                dimensions.Add(new Expression(dimStatement));
            }

            return(new DimStatement.DimVariable(new NameToken(nameToken.Content, nameToken.LineIndex), dimensions));
        }
Пример #2
0
        // =======================================================================================
        // VBScript BASE SOURCE RE-GENERATION
        // =======================================================================================
        /// <summary>
        /// Re-generate equivalent VBScript source code for this block - there
        /// should not be a line return at the end of the content
        /// </summary>
        public string GenerateBaseSource(SourceRendering.ISourceIndentHandler indenter)
        {
            // The Statement class' GenerateBaseSource has logic about rendering strings of tokens and rules about whitespace around
            // (or not around) particular tokens, so the content from this class is wrapped up as a Statement so that the method may
            // be re-used without copying any of it here
            var assignmentOperator = AtomToken.GetNewToken("=", ValueToSet.Tokens.Last().LineIndex);
            var tokensList         = ValueToSet.Tokens.Concat(new[] { assignmentOperator }).Concat(Expression.Tokens).ToList();

            if (ValueSetType == ValueSetTypeOptions.Set)
            {
                tokensList.Insert(0, AtomToken.GetNewToken("Set", ValueToSet.Tokens.First().LineIndex));
            }

            return((new Statement(tokensList, Statement.CallPrefixOptions.Absent)).GenerateBaseSource(indenter));
        }
Пример #3
0
        // =======================================================================================
        // VBScript BASE SOURCE RE-GENERATION
        // =======================================================================================
        /// <summary>
        /// Re-generate equivalent VBScript source code for this block - there
        /// should not be a line return at the end of the content
        /// </summary>
        public string GenerateBaseSource(SourceRendering.ISourceIndentHandler indenter)
        {
            var tokensList = Tokens.ToList();

            if (CallPrefix == CallPrefixOptions.Present)
            {
                tokensList.Insert(0, AtomToken.GetNewToken("Call", tokensList[0].LineIndex));
            }

            var output = new StringBuilder();

            output.Append(indenter.Indent);
            for (int index = 0; index < tokensList.Count; index++)
            {
                var token = tokensList[index];
                if (token is StringToken)
                {
                    output.Append("\"" + token.Content + "\"");
                }
                else if (token is DateLiteralToken)
                {
                    output.Append("#" + token.Content + "#");
                }
                else
                {
                    output.Append(token.Content);
                }

                var nextToken = (index < (tokensList.Count - 1)) ? tokensList[index + 1] : null;
                if (nextToken == null)
                {
                    continue;
                }

                if ((token is MemberAccessorOrDecimalPointToken) ||
                    (token is OpenBrace) ||
                    (nextToken is MemberAccessorOrDecimalPointToken) ||
                    (nextToken is ArgumentSeparatorToken) ||
                    (nextToken is OpenBrace) ||
                    (nextToken is CloseBrace))
                {
                    continue;
                }

                output.Append(" ");
            }
            return(output.ToString().TrimEnd());
        }
Пример #4
0
        public static IEnumerable <IToken> Combine(IEnumerable <IToken> tokens)
        {
            if (tokens == null)
            {
                throw new ArgumentNullException("tokens");
            }

            // Handle +/- sign combinations
            var additionSubtractionRewrittenTokens = new List <IToken>();
            var buffer             = new List <OperatorToken>();
            var previousTokenIfAny = (IToken)null;

            foreach (var token in tokens)
            {
                if (token == null)
                {
                    throw new ArgumentException("Null reference encountered in tokens set");
                }

                var combinableOperator = TryToGetAsAdditionOrSubtractionToken(token);
                if (combinableOperator == null)
                {
                    var bufferHadContentThatWasReducedToNothing = false;
                    if (buffer.Any())
                    {
                        var condensedToken = CondenseNegations(buffer);
                        if (IsTokenRedundant(condensedToken, previousTokenIfAny))
                        {
                            // If this is a "+" and the last token was an OperatorToken, then this one is redundant (eg. "1 * +1")
                            bufferHadContentThatWasReducedToNothing = true;
                        }
                        else
                        {
                            additionSubtractionRewrittenTokens.Add(condensedToken);
                        }
                        buffer.Clear();
                    }

                    // When a minus-sign/addition-sign buffer is flattened and can be reduced to nothing, if the next token is a numeric value then we
                    // need to apply a bit of a dirty hack since VBScript gives numeric literals special treatment in some cases but does not consider
                    // --1 to be a numeric literal (for example). So we can not replace --1 with 1 since it would change the meaning of some code. To
                    // illustrate, consider the following:
                    //   If ("a" = 1) Then
                    //   If ("a" = --1) Then
                    //   If ("a" = +-1) Then
                    // The first example will result in a Type Mismatch since the numeric literal forces the "a" to be parsed as a number (which fails).
                    // However, the second and third examples return false since their right hand side values are not considered to be numeric literals
                    // and so the left hand sides need not be parsed as numeric values. The workaround is to identify these situations and to wrap the
                    // number in a CInt/CLng/CDbl call. So long as the appropriate function is used, this will not affect the numeric value but it will
                    // prevent it from being identified as a numeric literal later on (this is important to the StatementTranslator). Note: This is why
                    // the NumberRebuilder must have done its work before we get here, since ++1.2 must be recognised as "+", "+", "1.2" so that it can
                    // be translated into "CDbl(1.2)", rather than still being "+", "+", "1", ".", "2", which would translated into "CDbl(1).2", which
                    // would be invalid.
                    var numericValueToken             = token as NumericValueToken;
                    var wrapTokenInNumberFunctionCall = bufferHadContentThatWasReducedToNothing && (numericValueToken != null);
                    if (wrapTokenInNumberFunctionCall)
                    {
                        additionSubtractionRewrittenTokens.Add(new BuiltInFunctionToken(numericValueToken.GetSafeWrapperFunctionName(), token.LineIndex));
                        additionSubtractionRewrittenTokens.Add(new OpenBrace(token.LineIndex));
                    }
                    additionSubtractionRewrittenTokens.Add(token);
                    if (wrapTokenInNumberFunctionCall)
                    {
                        additionSubtractionRewrittenTokens.Add(new CloseBrace(token.LineIndex));
                    }
                    previousTokenIfAny = token;
                }
                else
                {
                    buffer.Add(combinableOperator);
                }
            }
            if (buffer.Any())
            {
                // Note: We don't need to copy all of the logic from above - in fact we can't, since we don't have a current token reference
                var condensedToken = CondenseNegations(buffer);
                if (!IsTokenRedundant(condensedToken, previousTokenIfAny))
                {
                    additionSubtractionRewrittenTokens.Add(condensedToken);
                }
            }

            // Handle comparison token combinations (eg. ">", "=" to ">=")
            var combinations = new[]
            {
                Tuple.Create(Tuple.Create("<", ">"), "<>"),
                Tuple.Create(Tuple.Create("<", "="), "<="),
                Tuple.Create(Tuple.Create(">", "="), ">=")
            };
            var comparisonRewrittenTokens = new List <IToken>();

            for (var index = 0; index < additionSubtractionRewrittenTokens.Count; index++)
            {
                var token = additionSubtractionRewrittenTokens[index];
                if (index == (additionSubtractionRewrittenTokens.Count - 1))
                {
                    comparisonRewrittenTokens.Add(token);
                    continue;
                }

                var nextToken     = additionSubtractionRewrittenTokens[index + 1];
                var combineTokens = (
                    ((token.Content == "<") && (nextToken.Content == ">")) ||
                    ((token.Content == ">") && (nextToken.Content == "=")) ||
                    ((token.Content == "<") && (nextToken.Content == "="))
                    );
                if (combineTokens)
                {
                    comparisonRewrittenTokens.Add(AtomToken.GetNewToken(token.Content + nextToken.Content, token.LineIndex));
                    index++;
                    continue;
                }
                comparisonRewrittenTokens.Add(token);
            }
            return(comparisonRewrittenTokens);
        }
Пример #5
0
        /// <summary>
        /// Break down scriptContent into a combination of StringToken, CommentToken, UnprocessedContentToken and EndOfStatementNewLine instances (the
        /// end of statement tokens will not have been comprehensively handled).  This will never return null nor a set containing any null references.
        /// </summary>
        public static IEnumerable <IToken> SegmentString(string scriptContent)
        {
            if (scriptContent == null)
            {
                throw new ArgumentNullException("scriptContent");
            }

            // Normalise line returns
            scriptContent = scriptContent.Replace("\r\n", "\n").Replace('\r', '\n');

            var index        = 0;
            var tokenContent = "";
            var tokens       = new List <IToken>();
            var lineIndex    = 0;
            var lineIndexForStartOfContent = 0;

            while (index < scriptContent.Length)
            {
                var chr = scriptContent.Substring(index, 1);

                // Check for comment
                bool isComment;
                if (chr == "'")
                {
                    isComment = true;
                }
                else if (index <= (scriptContent.Length - 3))
                {
                    var threeChars = scriptContent.Substring(index, 3);
                    var fourthChar = (index == scriptContent.Length - 3) ? (char?)null : scriptContent[index + 3];
                    if (threeChars.Equals("REM", StringComparison.InvariantCultureIgnoreCase) &&
                        ((fourthChar == null) || _whiteSpaceCharsExceptLineReturn.Contains(fourthChar.Value)))
                    {
                        isComment = true;
                        index    += 2;
                    }
                    else
                    {
                        isComment = false;
                    }
                }
                else
                {
                    isComment = false;
                }
                if (isComment)
                {
                    // Store any previous token content
                    bool isInlineComment;
                    if (tokenContent != "")
                    {
                        // If there has been any one the same line as this comment, then this is an inline comment
                        var contentAfterLastLineReturn = tokenContent.Split('\n').Last();
                        isInlineComment = (contentAfterLastLineReturn.Trim() != "");
                        tokens.Add(new UnprocessedContentToken(tokenContent, lineIndexForStartOfContent));
                        tokenContent = "";
                    }
                    else
                    {
                        isInlineComment = false;
                    }

                    // Move past comment marker and look for end of comment (end of the line) then store in a CommentToken instance
                    // - Note: Always want an EndOfStatementNewLineToken to appear before comments, so ensure this is the case (if the previous token was
                    //   a Comment it doesn't matter, if the previous statement was a String we'll definitely need an end-of-statement, if the previous
                    //   was Unprocessed, we only need end-of-statement if the content didn't end with a line-return)
                    lineIndexForStartOfContent = lineIndex;
                    index++;
                    int breakPoint = scriptContent.IndexOf("\n", index);
                    if (breakPoint == -1)
                    {
                        breakPoint = scriptContent.Length;
                    }
                    if (tokens.Count > 0)
                    {
                        var prevToken = tokens[tokens.Count - 1];
                        if (prevToken is UnprocessedContentToken)
                        {
                            // UnprocessedContentToken MAY conclude with end-of-statement content, we'll need to check
                            if (!prevToken.Content.TrimEnd(_whiteSpaceCharsExceptLineReturn).EndsWith("\n"))
                            {
                                tokens.RemoveAt(tokens.Count - 1);
                                var unprocessedContentToRecord = prevToken.Content.TrimEnd('\t', ' ');
                                if (unprocessedContentToRecord != "")
                                {
                                    tokens.Add(new UnprocessedContentToken(unprocessedContentToRecord, prevToken.LineIndex));
                                    tokens.Add(new EndOfStatementSameLineToken(prevToken.LineIndex));
                                }
                            }
                        }
                    }
                    if (tokens.Any() && ((tokens.Last() is DateLiteralToken) || (tokens.Last() is StringToken)))
                    {
                        // Quoted literals (ie. string or date) CAN'T contain end-of-statement content so we'll definitely need an EndOfStatementNewLineToken
                        // Note: This has to be done after the above work in case there was a literal token then some whitespace (which is removed above)
                        // then a Comment. If the work above wasn't done before this check then "prevToken" would not be a StringToken, it would be the
                        // whitespace - but that would be removed and then the literal would be arranged right next to the Comment, without an end-
                        // of-statement token between them!
                        tokens.Add(new EndOfStatementSameLineToken(lineIndexForStartOfContent));
                    }
                    var commentContent = scriptContent.Substring(index, breakPoint - index);
                    if (isInlineComment)
                    {
                        tokens.Add(new InlineCommentToken(commentContent, lineIndexForStartOfContent));
                    }
                    else
                    {
                        tokens.Add(new CommentToken(commentContent, lineIndexForStartOfContent));
                    }
                    index = breakPoint;
                    lineIndex++;
                    lineIndexForStartOfContent = lineIndex;
                }

                // Check for string content
                else if (chr == "\"")
                {
                    // Store any previous token content
                    if (tokenContent != "")
                    {
                        tokens.Add(new UnprocessedContentToken(tokenContent, lineIndexForStartOfContent));
                        tokenContent = "";
                    }

                    // Try to grab string content
                    lineIndexForStartOfContent = lineIndex;
                    var indexString = index + 1;
                    while (true)
                    {
                        chr = scriptContent.Substring(indexString, 1);
                        if (chr == "\n")
                        {
                            throw new Exception("Encountered line return in string content around line " + (lineIndexForStartOfContent + 1));
                        }
                        if (chr != "\"")
                        {
                            tokenContent += chr;
                        }
                        else
                        {
                            // Quote character - is it doubled (ie. escaped quote)?
                            string chrNext;
                            if (indexString < (scriptContent.Length - 1))
                            {
                                chrNext = scriptContent.Substring(indexString + 1, 1);
                            }
                            else
                            {
                                chrNext = null;
                            }
                            if (chrNext == "\"")
                            {
                                // Escaped quote: push past and add singe chr to content
                                indexString++;
                                tokenContent += "\"";
                            }
                            else
                            {
                                // Non-escaped quote: string end
                                tokens.Add(new StringToken(tokenContent, lineIndexForStartOfContent));
                                tokenContent = "";
                                lineIndexForStartOfContent = lineIndex;
                                index = indexString;
                                break;
                            }
                        }
                        indexString++;
                    }
                }

                // Check for crazy VBScript escaped-name variable content
                // - It's acceptable to name a variable pretty much anything if it's wrapped in square brackets; seems to be any character other than
                //   line returns and a closing square bracket (since there is no support for escaping the closing bracket). This includes single and
                //   double quotes, whitespace, colons, numbers, underscores, anything - in fact a valid variable name is [ ], meaning a single space
                //   wrapped in square brackets! This is a little-known feature but it shouldn't be too hard to parse out at this point.
                else if (chr == "[")
                {
                    // Store any previous token content
                    if (tokenContent != "")
                    {
                        tokens.Add(new UnprocessedContentToken(tokenContent, lineIndexForStartOfContent));
                    }

                    lineIndexForStartOfContent = lineIndex;
                    tokenContent = "[";
                    var indexString = index + 1;
                    while (true)
                    {
                        chr = scriptContent.Substring(indexString, 1);
                        if (chr == "\n")
                        {
                            throw new Exception("Encountered line return in escaped-content variable name");
                        }
                        tokenContent += chr;
                        if (chr == "]")
                        {
                            tokens.Add(AtomToken.GetNewToken(tokenContent, lineIndexForStartOfContent));
                            tokenContent = "";
                            lineIndexForStartOfContent = lineIndex;
                            index = indexString;
                            break;
                        }
                        indexString++;
                    }
                }

                // VBScript supports date literals, wrapped in hashes. These introduce a range of complications - such as literal comparisons requiring
                // special logic, as string and number literals do - eg. ("a" = #2015-5-27#) will fail at runtime as "a" must be parse-able as a date,
                // and it isn't. It also has complications around culture - so the value #1 5 2015# must be parsed as 2015-5-1 in the UK when the
                // translated output is executed but as 2015-1-5 in the US. On top of that, VBScript is very flexible in its acceptance of date formats -
                // amongst these problems is that the year is optional and so #1 5# means 1st of May or 5th of January (depending upon culture) in the
                // current year - however, once a date literal has had a default year set for a given request it must stick to that year; so if the request
                // is unfortunate enough to be slow and cross years, a given date literal must consistently stick to using the year from when the request
                // started. When a new request starts, however, if the year has changed then that new request must default to that new year, it would be no
                // good if the year was determined once (at translation time) and then never changed, since this would be inconsistent with VBScript's behaviour
                // of treating each request as a whole new start-up / serve / tear-down process. This means that the value #29 2# will change by year, being
                // the 29th of February if the current year is a leap year and the 1st of February 2029 if not (since #29 2# will be interpreted as year 29
                // and month 2 since 29 could not be a valid month - and then 29 will be treated as a two-digit year which must be bumped up to 2029). Also
                // note that even in the US #29 2# will be interpreted as the 29th of February (or 1st of February 2029) since there is no way to parse that
                // as a month-then-day format).
                // - Note: This gets the lowest priority in terms of wrapping characters, so [#1 1#] is a variable name and not something containing a
                //   date, likewise "#1 1#" is a string and nothing to do with a date. There are no escape characters. If the wrapped value can not
                //   possibly be valid then an exception will be raised at this point.
                else if (chr == "#")
                {
                    // Store any previous token content
                    if (tokenContent != "")
                    {
                        tokens.Add(new UnprocessedContentToken(tokenContent, lineIndexForStartOfContent));
                    }

                    lineIndexForStartOfContent = lineIndex;
                    tokenContent = "";
                    var indexString = index + 1;
                    while (true)
                    {
                        chr = scriptContent.Substring(indexString, 1);
                        if (chr == "\n")
                        {
                            throw new Exception("Encountered line return in date literal content");
                        }
                        if (chr == "#")
                        {
                            // We can only catch certain kinds of invalid date literal format here since some formats are culture-dependent (eg. "1 May 2010" is
                            // valid in English but not in French) and I don't want to assume that translated programs are running with the same culture as the
                            // translation process. The "limitedDateParser" can catch some invalid formats, which is better than nothing, but others will have
                            // to checked at runtime (see the notes around the instantiation of the limitedDateParser).
                            try
                            {
                                _limitedDateParser.Parse(tokenContent);
                            }
                            catch (Exception e)
                            {
                                throw new ArgumentException("Invalid date literal content encountered on line " + lineIndex + ": #" + tokenContent + "#", e);
                            }
                            tokens.Add(new DateLiteralToken(tokenContent, lineIndexForStartOfContent));
                            tokenContent = "";
                            lineIndexForStartOfContent = lineIndex;
                            index = indexString;
                            break;
                        }
                        else
                        {
                            tokenContent += chr;
                        }
                        indexString++;
                    }
                }

                // Mustn't be neither comment, string, date nor VBScript-escaped-variable-name..
                else
                {
                    tokenContent += chr;
                }

                // Move to next character (if any)..
                index++;
                if (chr == "\n")
                {
                    lineIndex++;
                }
            }

            // Don't let any unhandled content get away!
            if (tokenContent != "")
            {
                tokens.Add(new UnprocessedContentToken(tokenContent, lineIndexForStartOfContent));
            }

            return(tokens);
        }
Пример #6
0
        /// <summary>
        /// Break down an UnprocessedContentToken into a combination of AtomToken and AbstractEndOfStatementToken references. This will never return null nor a set
        /// containing any null references.
        /// </summary>
        public static IEnumerable <IToken> BreakUnprocessedToken(UnprocessedContentToken token)
        {
            if (token == null)
            {
                throw new ArgumentNullException("token");
            }

            var lineIndex = token.LineIndex;
            var buffer    = "";
            var content   = token.Content;
            var tokens    = new List <IToken>();

            for (var index = 0; index < content.Length; index++)
            {
                var chr = content.Substring(index, 1);
                if (char.IsWhiteSpace(chr, 0) && (chr != "\n"))
                {
                    // If we've found a (non-line-return) whitespace character, push content retrieved from the token so far (if any), into a fresh token on the
                    // list and clear the buffer to accept following data.
                    if (buffer != "")
                    {
                        tokens.Add(AtomToken.GetNewToken(buffer, lineIndex));
                    }
                    buffer = "";
                }
                else
                {
                    bool characterIsTokenBreaker;
                    if (TokenBreakChars.IndexOf(chr) != -1)
                    {
                        characterIsTokenBreaker = true;
                    }
                    else if (chr == "_")
                    {
                        // An underscore is a line return continuation character if it follows whitespace, but it must be part of a variable name if it is not
                        // preceded by whitespace (and line return continuation is a token-breaker, as opposed to an underscore that is part of the current
                        // token)
                        characterIsTokenBreaker = (index > 0) && char.IsWhiteSpace(content, index - 1);
                    }
                    else
                    {
                        characterIsTokenBreaker = false;
                    }
                    if (characterIsTokenBreaker)
                    {
                        // If the current character is a "&" then it may be a string concatenation or it may be the start of a hex number (eg. "&h001"), if it's
                        // the latter then we want to represent the content as a single token "&h001" not break the "&" out.
                        if ((chr == "&") && (index <= (content.Length - 3)))
                        {
                            var chrNext     = content.Substring(index + 1, 1);
                            var chrNextNext = content.Substring(index + 2, 1);
                            if (chrNext.Equals("H", StringComparison.InvariantCultureIgnoreCase) && ("0123456789".IndexOf(chrNextNext) != -1))
                            {
                                buffer += chr;
                                continue;
                            }
                        }

                        // If we've found another "break" character (which means a token split is identified, but that we want to keep the break character itself,
                        // unlike with whitespace breaks), then do similar to above.
                        if (buffer != "")
                        {
                            tokens.Add(AtomToken.GetNewToken(buffer, lineIndex));
                        }
                        tokens.Add(AtomToken.GetNewToken(chr, lineIndex));
                        buffer = "";
                    }
                    else
                    {
                        buffer += chr;
                    }
                }
                if (chr == "\n")
                {
                    lineIndex++;
                }
            }
            if (buffer != "")
            {
                tokens.Add(AtomToken.GetNewToken(buffer, lineIndex));
            }

            // Handle ignore-line-return / end-of-statement combinations
            tokens = handleLineReturnCancels(tokens);

            return(tokens);
        }