Example #1
0
        /// <summary>
        /// Creates source unit and parses given <paramref name="code"/>.
        /// </summary>
        /// <param name="code">Source code to be parsed.</param>
        /// <param name="filePath">Source file used for error reporting.</param>
        /// <param name="factory">Nodes factory and error sink.</param>
        /// <param name="errors">Error sink. Can be <c>null</c>.</param>
        /// <param name="recovery">Error recovery. Can be <c>null</c>.</param>
        /// <param name="features">Optional. Language features.</param>
        /// <param name="initialState">
        /// Optional. Initial parser state.
        /// This allows e.g. to parse PHP code without encapsulating the code into opening and closing tags.</param>
        /// <returns>New <see cref="CodeSourceUnit"/> object.</returns>
        public static SourceUnit /*!*/ ParseCode(string code, string filePath,
                                                 INodesFactory <LangElement, Span> factory = null,
                                                 Errors.IErrorSink <Span> errors           = null,
                                                 Errors.IErrorRecovery recovery            = null,
                                                 LanguageFeatures features        = LanguageFeatures.Basic,
                                                 Lexer.LexicalStates initialState = Lexer.LexicalStates.INITIAL)
        {
            var unit = new CodeSourceUnit(code, filePath, Encoding.UTF8, initialState, features);

            if (factory == null)
            {
                factory = new BasicNodesFactory(unit);
            }

            if (errors == null)
            {
                errors = (factory as Errors.IErrorSink <Span>) ?? new EmptyErrorSink <Span>();
            }

            //var lexer = new Lexer(new StringReader(code), Encoding.UTF8, errors, features, 0, initialState);

            unit.Parse(factory, errors, recovery);
            unit.Close();

            //
            return(unit);
        }
Example #2
0
 public CodeSourceUnit(string /*!*/ code, PhpSourceFile /*!*/ sourceFile,
                       Encoding /*!*/ encoding, Lexer.LexicalStates initialState)
     : base(sourceFile, encoding, Text.LineBreaks.Create(code))
 {
     this.code         = code;
     this.initialState = initialState;
 }
Example #3
0
 public CodeSourceUnit(string /*!*/ code, string /*!*/ filePath,
                       Encoding /*!*/ encoding, Lexer.LexicalStates initialState)
     : base(filePath, encoding, Text.LineBreaks.Create(code))
 {
     this.code         = code;
     this.initialState = initialState;
 }
Example #4
0
 public TestSourceUnit(string /*!*/ code, string /*!*/ filePath,
                       Encoding /*!*/ encoding,
                       Lexer.LexicalStates initialState = Lexer.LexicalStates.INITIAL,
                       LanguageFeatures features        = LanguageFeatures.Basic)
     : base(code, filePath, encoding, initialState, features)
 {
     _features = features;
 }
Example #5
0
        public SourceCodeUnit(CompilationUnitBase /*!*/ compilationUnit, string /*!*/ code, PhpSourceFile /*!*/ sourceFile,
                              Encoding /*!*/ encoding, int line, int column)
            : base(compilationUnit, sourceFile, encoding)
        {
            this.code   = code;
            this.line   = line;
            this.column = column;

            this.initialState = Lexer.LexicalStates.ST_IN_SCRIPTING;
        }
Example #6
0
 public CodeSourceUnit(string /*!*/ code, string /*!*/ filePath,
                       Encoding /*!*/ encoding,
                       Lexer.LexicalStates initialState = Lexer.LexicalStates.INITIAL,
                       LanguageFeatures features        = LanguageFeatures.Basic)
     : base(filePath, encoding, Text.LineBreaks.Create(code))
 {
     this.code         = code;
     this.initialState = initialState;
     this.features     = features;
 }
Example #7
0
        public SourceCodeUnit(CompilationUnitBase /*!*/ compilationUnit, string /*!*/ code, PhpSourceFile /*!*/ sourceFile,
                              Encoding /*!*/ encoding, int line, int column)
            : base(compilationUnit, sourceFile, encoding, Text.LineBreaks.Create(code))
        {
            this.code   = code;
            this.line   = line;
            this.column = column;

            // opening and closing script tags are not present
            this.initialState = Lexer.LexicalStates.ST_IN_SCRIPTING;
        }
Example #8
0
        /// <summary>
        /// Creates source unit and parses given <paramref name="code"/>.
        /// </summary>
        /// <param name="code">Source code to be parsed.</param>
        /// <param name="sourceFile">Source file used for error reporting.</param>
        /// <param name="errors">Errors sink. Can be <c>null</c>.</param>
        /// <param name="reductionsSink">Reduction sink. Can be <c>null</c>.</param>
        /// <param name="features">Optional. Language features.</param>
        /// <param name="initialState">
        /// Optional. Initial parser state.
        /// This allows e.g. to parse PHP code without encapsulating the code into opening and closing tags.</param>
        /// <returns></returns>
        public static SourceUnit /*!*/ ParseCode(string code, PhpSourceFile sourceFile,
                                                 ErrorSink /*!*/ errors,
                                                 IReductionsSink /*!*/ reductionsSink = null,
                                                 LanguageFeatures features            = LanguageFeatures.Php5,
                                                 Lexer.LexicalStates initialState     = Lexer.LexicalStates.INITIAL)
        {
            var /*!*/ unit = new CodeSourceUnit(code, sourceFile, Encoding.UTF8, initialState);

            unit.Parse(errors, reductionsSink, features);
            unit.Close();

            //
            return(unit);
        }
Example #9
0
        private static bool IsInString(Lexer.LexicalStates state)
        {
            switch (state)
            {
            case Lexer.LexicalStates.ST_DOUBLE_QUOTES:
            case Lexer.LexicalStates.ST_BACKQUOTE:
            case Lexer.LexicalStates.ST_HEREDOC:
            case Lexer.LexicalStates.ST_IN_HEREDOC:
            case Lexer.LexicalStates.ST_IN_STRING:
            case Lexer.LexicalStates.ST_IN_SHELL:
                return(true);

            default:
                return(false);
            }
        }
Example #10
0
        public void Parse(NodesFactory factory, IErrorSink <Span> errors,
                          IErrorRecovery recovery   = null,
                          LanguageFeatures features = LanguageFeatures.Basic,
                          Lexer.LexicalStates state = Lexer.LexicalStates.INITIAL)
        {
            var parser = new Parser();

            using (var source = new StringReader(SourceText.ToString()))
            {
                using (var provider = new AdditionalSyntaxProvider(
                           new PhpTokenProvider(
                               new Lexer(source, Encoding.UTF8, errors, features, 0, state),
                               this),
                           factory,
                           parser.CreateTypeRef))
                {
                    ast = parser.Parse(provider, factory, features, errors, recovery);
                }
            }
        }
Example #11
0
        public void LexerGetNextTokenByLineTest()
        {
            string path = (string)TestContext.DataRow["files"];

            TestErrorSink errorSink = new TestErrorSink();
            Lexer         lexer     = new Lexer(new StreamReader(path), Encoding.UTF8, errorSink,
                                                LanguageFeatures.ShortOpenTags, 0, Lexer.LexicalStates.INITIAL);

            Lexer.LexicalStates previousState = Lexer.LexicalStates.INITIAL;
            foreach (var line in File.ReadAllLines(path))
            {
                lexer.Initialize(new StringReader(line + Environment.NewLine), previousState, true, 0);

                while (lexer.GetNextToken() != Tokens.EOF)
                {
                    Assert.IsTrue(lexer.TokenSpan.IsValid);
                }
                previousState = lexer.CurrentLexicalState;
            }
        }
Example #12
0
        public void LexerStringsTest()
        {
            TestErrorSink errorSink = new TestErrorSink();
            Lexer         lexer     = new Lexer(new StringReader("\"\""), Encoding.UTF8, errorSink,
                                                LanguageFeatures.ShortOpenTags, 0, Lexer.LexicalStates.INITIAL);

            var charSet = new[] { new [] { '$', '{', 'n', '\0', '\r', '\n', ' ' },
                                  new [] { '\'', '\\', 'x', 'c', '"', '`', '8', '0' },
                                  new [] { '/', '*', '?', '>', ';' } };

            int[]  word = new int[5];
            char[] text = new char[word.Length];

            var states = new Lexer.LexicalStates[] { Lexer.LexicalStates.ST_DOUBLE_QUOTES, Lexer.LexicalStates.ST_SINGLE_QUOTES,
                                                     Lexer.LexicalStates.ST_BACKQUOTE, Lexer.LexicalStates.ST_HEREDOC, Lexer.LexicalStates.ST_NOWDOC, Lexer.LexicalStates.ST_COMMENT,
                                                     Lexer.LexicalStates.ST_DOC_COMMENT, Lexer.LexicalStates.INITIAL, Lexer.LexicalStates.ST_IN_SCRIPTING };

            foreach (var chars in charSet)
            {
                foreach (var state in states)
                {
                    while (Increment(word, chars.Length))
                    {
                        ToArray(word, text, chars);
                        string line = new string(text);
                        lexer.Initialize(new StringReader(line), state, true, 0);
                        Tokens token = Tokens.EOF;
                        int    count = 0;
                        while ((token = lexer.GetNextToken()) != Tokens.EOF && count++ < 100)
                        {
                            Assert.IsTrue(lexer.TokenSpan.IsValid, line);
                            Assert.IsTrue(lexer.TokenSpan.Length >= 0, line + " - " + state.ToString() + " - " + lexer.TokenSpan.Start.ToString());
                        }
                        Assert.IsTrue(count < 100, line);
                    }
                }
            }
        }
Example #13
0
        public GlobalCode Parse(SourceUnit /*!*/ sourceUnit, TextReader /*!*/ reader, ErrorSink /*!*/ errors,
                                IReductionsSink reductionsSink, Parsers.Position initialPosition, Lexer.LexicalStates initialLexicalState,
                                LanguageFeatures features)
        {
            Debug.Assert(reader != null && errors != null);

            // initialization:
            this.sourceUnit     = sourceUnit;
            this.errors         = errors;
            this.features       = features;
            this.reader         = reader;
            this.reductionsSink = reductionsSink ?? NullReductionSink;
            InitializeFields();

            this.scanner = new Scanner(initialPosition, reader, sourceUnit, errors, reductionsSink as ICommentsSink, features);
            this.scanner.CurrentLexicalState = initialLexicalState;
            this.currentScope = new Scope(1);             // starts assigning scopes from 2 (1 is reserved for prepended inclusion)

            this.unicodeSemantics = (features & LanguageFeatures.UnicodeSemantics) != 0;


            base.Scanner = this.scanner;
            base.Parse();

            GlobalCode result = astRoot;

            // clean and let GC collect unused AST and other stuff:
            ClearFields();

            return(result);
        }
Example #14
0
        /// <summary>
        /// Gets category of a token in given lexical context.
        /// </summary>
        /// <param name="token">Token.</param>
        /// <param name="CurrentLexicalState">Current lexical state.</param>
        /// <returns>A token category.</returns>
        public static TokenCategory GetTokenCategory(this Tokens token, Lexer.LexicalStates CurrentLexicalState)
        {
            bool inString = CurrentLexicalState == Lexer.LexicalStates.ST_DOUBLE_QUOTES ||
                            CurrentLexicalState == Lexer.LexicalStates.ST_BACKQUOTE ||
                            CurrentLexicalState == Lexer.LexicalStates.ST_HEREDOC ||
                            CurrentLexicalState == Lexer.LexicalStates.ST_IN_STRING ||
                            CurrentLexicalState == Lexer.LexicalStates.ST_IN_SHELL;

            switch (token)
            {
                #region Special Keywords

            case Tokens.T_GOTO:
            case Tokens.T_TRY:
            case Tokens.T_CATCH:
            case Tokens.T_FINALLY:
            case Tokens.T_THROW:
            case Tokens.T_INTERFACE:
            case Tokens.T_IMPLEMENTS:
            case Tokens.T_CLONE:
            case Tokens.T_ABSTRACT:
            case Tokens.T_FINAL:
            case Tokens.T_PRIVATE:
            case Tokens.T_PROTECTED:
            case Tokens.T_PUBLIC:
            case Tokens.T_INSTANCEOF:
            case Tokens.T_NAMESPACE:
            case Tokens.T_USE:
                return(TokenCategory.Keyword);

                #endregion

                #region Basic Keywords

            case Tokens.T_REQUIRE_ONCE:
            case Tokens.T_REQUIRE:
            case Tokens.T_EVAL:
            case Tokens.T_INCLUDE_ONCE:
            case Tokens.T_INCLUDE:
            case Tokens.T_LOGICAL_OR:               // or
            case Tokens.T_LOGICAL_XOR:              // xor
            case Tokens.T_LOGICAL_AND:              // and
            case Tokens.T_PRINT:
            case Tokens.T_NEW:
            case Tokens.T_EXIT:
            case Tokens.T_IF:
            case Tokens.T_ELSEIF:
            case Tokens.T_ELSE:
            case Tokens.T_ENDIF:
            case Tokens.T_ECHO:
            case Tokens.T_DO:
            case Tokens.T_WHILE:
            case Tokens.T_ENDWHILE:
            case Tokens.T_FOR:
            case Tokens.T_ENDFOR:
            case Tokens.T_FOREACH:
            case Tokens.T_ENDFOREACH:
            case Tokens.T_AS:
            case Tokens.T_SWITCH:
            case Tokens.T_ENDSWITCH:
            case Tokens.T_CASE:
            case Tokens.T_DEFAULT:
            case Tokens.T_BREAK:
            case Tokens.T_CONTINUE:
            case Tokens.T_FUNCTION:
            case Tokens.T_CONST:
            case Tokens.T_RETURN:
            case Tokens.T_YIELD:
            case Tokens.T_GLOBAL:
            case Tokens.T_STATIC:
            case Tokens.T_VAR:
            case Tokens.T_UNSET:
            case Tokens.T_ISSET:
            case Tokens.T_EMPTY:
            case Tokens.T_CLASS:
            case Tokens.T_TRAIT:
            case Tokens.T_INSTEADOF:
            case Tokens.T_EXTENDS:
            case Tokens.T_LIST:
            case Tokens.T_ARRAY:
            case Tokens.T_CLASS_C:                  // __CLASS__
            case Tokens.T_TRAIT_C:                  // __TRAIT__
            case Tokens.T_METHOD_C:                 // __METHOD__
            case Tokens.T_FUNC_C:                   // __FUNCTION__
            case Tokens.T_FILE:                     // __FILE__
            case Tokens.T_LINE:                     // __LINE__
            case Tokens.T_DIR:                      // __DIR__
            case Tokens.T_CALLABLE:                 // callable
                return(TokenCategory.Keyword);

                #endregion

                #region Operators

            case Tokens.T_UNSET_CAST:               // (unset)
            case Tokens.T_BOOL_CAST:                // (bool)
            case Tokens.T_OBJECT_CAST:              // (object)
            case Tokens.T_ARRAY_CAST:               // (array)
            case Tokens.T_STRING_CAST:              // (string)
            case Tokens.T_DOUBLE_CAST:              // (double)
            case Tokens.T_INT_CAST:                 // (int)
            case Tokens.T_AT:                       // @
            case Tokens.T_QUESTION:                 // ?
            case Tokens.T_LT:                       // <
            case Tokens.T_GT:                       // >
            case Tokens.T_PERCENT:                  // %
            case Tokens.T_EXCLAM:                   // !
            case Tokens.T_TILDE:                    // ~
            case Tokens.T_EQ:                       // =
            case Tokens.T_SLASH:                    // /
            case Tokens.T_CARET:                    // ^
            case Tokens.T_AMP:                      // &
            case Tokens.T_PLUS:                     // +
            case Tokens.T_MINUS:                    // -
            case Tokens.T_PIPE:                     // |
            case Tokens.T_MUL:                      // *
            case Tokens.T_POW:                      // **
            case Tokens.T_DOT:                      // .
            case Tokens.T_SR_EQUAL:                 // >>=
            case Tokens.T_SL_EQUAL:                 // <<=
            case Tokens.T_XOR_EQUAL:                // ^=
            case Tokens.T_OR_EQUAL:                 // |=
            case Tokens.T_AND_EQUAL:                // &=
            case Tokens.T_MOD_EQUAL:                // %=
            case Tokens.T_CONCAT_EQUAL:             // .=
            case Tokens.T_DIV_EQUAL:                // /=
            case Tokens.T_MUL_EQUAL:                // *=
            case Tokens.T_POW_EQUAL:                // **=
            case Tokens.T_MINUS_EQUAL:              // -=
            case Tokens.T_PLUS_EQUAL:               // +=
            case Tokens.T_BOOLEAN_OR:               // ||
            case Tokens.T_BOOLEAN_AND:              // &&
            case Tokens.T_IS_NOT_IDENTICAL:         // !==
            case Tokens.T_IS_IDENTICAL:             // ===
            case Tokens.T_IS_NOT_EQUAL:             // !=
            case Tokens.T_IS_EQUAL:                 // ==
            case Tokens.T_IS_GREATER_OR_EQUAL:      // >=
            case Tokens.T_IS_SMALLER_OR_EQUAL:      // <=
            case Tokens.T_SR:                       // >>
            case Tokens.T_SL:                       // <<
            case Tokens.T_DEC:                      // --
            case Tokens.T_INC:                      // ++
            case Tokens.T_DOUBLE_COLON:             // ::
            case Tokens.T_COLON:                    // :
            case Tokens.T_DOUBLE_ARROW:             // =>
            case Tokens.T_ELLIPSIS:                 // ...
                return(TokenCategory.Operator);

                #endregion

                #region Others

            case Tokens.T_LPAREN:                           // (
            case Tokens.T_RPAREN:                           // )
            case Tokens.T_SEMI:                             // ;
            case Tokens.T_COMMA:                            // ,
            case Tokens.T_NS_SEPARATOR:                     // \
                return(TokenCategory.Delimiter);

            //case Tokens.T_NAMESPACE_NAME:               // namespace name
            case Tokens.T_STRING_VARNAME:                   // identifier following encapsulated "${"
                return(TokenCategory.Identifier);

            case Tokens.T_DNUMBER:                          // double (or overflown integer) out of string
            case Tokens.T_LNUMBER:                          // integer (or hex integer) out of string
                return(TokenCategory.Number);

            case Tokens.T_DOUBLE_QUOTES:                    // "
            case Tokens.T_BACKQUOTE:                        // `
            case Tokens.T_START_HEREDOC:                    // <<<XXX
            case Tokens.T_END_HEREDOC:                      // XXX
            case Tokens.T_ENCAPSED_AND_WHITESPACE:          // character(s) in string
            case Tokens.T_CONSTANT_ENCAPSED_STRING:         // quoted string not containing '$'
            case Tokens.T_NUM_STRING:                       // number in string
                return(TokenCategory.String);

            case Tokens.T_DOLLAR_OPEN_CURLY_BRACES:         // "${" in string - starts non-string code
            case Tokens.T_CURLY_OPEN:                       // "{$" in string
                return(TokenCategory.StringCode);

            case Tokens.T_WHITESPACE:
                return(TokenCategory.WhiteSpace);

            case Tokens.T_COMMENT:
            case Tokens.T_DOC_COMMENT:
                return(TokenCategory.Comment);

            case Tokens.T_OPEN_TAG:
            case Tokens.T_OPEN_TAG_WITH_ECHO:
            case Tokens.T_CLOSE_TAG:
                return(TokenCategory.ScriptTags);

            case Tokens.T_INLINE_HTML:
                return(TokenCategory.Html);

                #endregion

                #region Tokens with Ambiguous Category

            case Tokens.T_LBRACKET:                         // [
            case Tokens.T_RBRACKET:                         // ]
            case Tokens.T_LBRACE:                           // {
                return((inString) ? TokenCategory.String : TokenCategory.Delimiter);

            case Tokens.T_RBRACE:                           // }
                if (inString)
                {
                    // we are in string:
                    return(TokenCategory.StringCode);
                }
                else
                {
                    // part of script:
                    return(TokenCategory.Delimiter);
                }

            case Tokens.T_STRING:                           // identifier
                return((inString) ? TokenCategory.String : TokenCategory.Identifier);

            case Tokens.T_DOLLAR:                           // isolated '$'
            case Tokens.T_OBJECT_OPERATOR:                  // ->
                return((inString) ? TokenCategory.StringCode : TokenCategory.Operator);

            case Tokens.T_VARIABLE:                         // identifier
                return((inString) ? TokenCategory.StringCode : TokenCategory.Variable);

                #endregion

            default:
                return(TokenCategory.Unknown);
            }
        }