/// <summary> /// Creates source unit and parses given <paramref name="code"/>. /// </summary> /// <param name="code">Source code to be parsed.</param> /// <param name="filePath">Source file used for error reporting.</param> /// <param name="factory">Nodes factory and error sink.</param> /// <param name="errors">Error sink. Can be <c>null</c>.</param> /// <param name="recovery">Error recovery. Can be <c>null</c>.</param> /// <param name="features">Optional. Language features.</param> /// <param name="initialState"> /// Optional. Initial parser state. /// This allows e.g. to parse PHP code without encapsulating the code into opening and closing tags.</param> /// <returns>New <see cref="CodeSourceUnit"/> object.</returns> public static SourceUnit /*!*/ ParseCode(string code, string filePath, INodesFactory <LangElement, Span> factory = null, Errors.IErrorSink <Span> errors = null, Errors.IErrorRecovery recovery = null, LanguageFeatures features = LanguageFeatures.Basic, Lexer.LexicalStates initialState = Lexer.LexicalStates.INITIAL) { var unit = new CodeSourceUnit(code, filePath, Encoding.UTF8, initialState, features); if (factory == null) { factory = new BasicNodesFactory(unit); } if (errors == null) { errors = (factory as Errors.IErrorSink <Span>) ?? new EmptyErrorSink <Span>(); } //var lexer = new Lexer(new StringReader(code), Encoding.UTF8, errors, features, 0, initialState); unit.Parse(factory, errors, recovery); unit.Close(); // return(unit); }
public CodeSourceUnit(string /*!*/ code, PhpSourceFile /*!*/ sourceFile, Encoding /*!*/ encoding, Lexer.LexicalStates initialState) : base(sourceFile, encoding, Text.LineBreaks.Create(code)) { this.code = code; this.initialState = initialState; }
public CodeSourceUnit(string /*!*/ code, string /*!*/ filePath, Encoding /*!*/ encoding, Lexer.LexicalStates initialState) : base(filePath, encoding, Text.LineBreaks.Create(code)) { this.code = code; this.initialState = initialState; }
public TestSourceUnit(string /*!*/ code, string /*!*/ filePath, Encoding /*!*/ encoding, Lexer.LexicalStates initialState = Lexer.LexicalStates.INITIAL, LanguageFeatures features = LanguageFeatures.Basic) : base(code, filePath, encoding, initialState, features) { _features = features; }
public SourceCodeUnit(CompilationUnitBase /*!*/ compilationUnit, string /*!*/ code, PhpSourceFile /*!*/ sourceFile, Encoding /*!*/ encoding, int line, int column) : base(compilationUnit, sourceFile, encoding) { this.code = code; this.line = line; this.column = column; this.initialState = Lexer.LexicalStates.ST_IN_SCRIPTING; }
public CodeSourceUnit(string /*!*/ code, string /*!*/ filePath, Encoding /*!*/ encoding, Lexer.LexicalStates initialState = Lexer.LexicalStates.INITIAL, LanguageFeatures features = LanguageFeatures.Basic) : base(filePath, encoding, Text.LineBreaks.Create(code)) { this.code = code; this.initialState = initialState; this.features = features; }
public SourceCodeUnit(CompilationUnitBase /*!*/ compilationUnit, string /*!*/ code, PhpSourceFile /*!*/ sourceFile, Encoding /*!*/ encoding, int line, int column) : base(compilationUnit, sourceFile, encoding, Text.LineBreaks.Create(code)) { this.code = code; this.line = line; this.column = column; // opening and closing script tags are not present this.initialState = Lexer.LexicalStates.ST_IN_SCRIPTING; }
/// <summary> /// Creates source unit and parses given <paramref name="code"/>. /// </summary> /// <param name="code">Source code to be parsed.</param> /// <param name="sourceFile">Source file used for error reporting.</param> /// <param name="errors">Errors sink. Can be <c>null</c>.</param> /// <param name="reductionsSink">Reduction sink. Can be <c>null</c>.</param> /// <param name="features">Optional. Language features.</param> /// <param name="initialState"> /// Optional. Initial parser state. /// This allows e.g. to parse PHP code without encapsulating the code into opening and closing tags.</param> /// <returns></returns> public static SourceUnit /*!*/ ParseCode(string code, PhpSourceFile sourceFile, ErrorSink /*!*/ errors, IReductionsSink /*!*/ reductionsSink = null, LanguageFeatures features = LanguageFeatures.Php5, Lexer.LexicalStates initialState = Lexer.LexicalStates.INITIAL) { var /*!*/ unit = new CodeSourceUnit(code, sourceFile, Encoding.UTF8, initialState); unit.Parse(errors, reductionsSink, features); unit.Close(); // return(unit); }
private static bool IsInString(Lexer.LexicalStates state) { switch (state) { case Lexer.LexicalStates.ST_DOUBLE_QUOTES: case Lexer.LexicalStates.ST_BACKQUOTE: case Lexer.LexicalStates.ST_HEREDOC: case Lexer.LexicalStates.ST_IN_HEREDOC: case Lexer.LexicalStates.ST_IN_STRING: case Lexer.LexicalStates.ST_IN_SHELL: return(true); default: return(false); } }
public void Parse(NodesFactory factory, IErrorSink <Span> errors, IErrorRecovery recovery = null, LanguageFeatures features = LanguageFeatures.Basic, Lexer.LexicalStates state = Lexer.LexicalStates.INITIAL) { var parser = new Parser(); using (var source = new StringReader(SourceText.ToString())) { using (var provider = new AdditionalSyntaxProvider( new PhpTokenProvider( new Lexer(source, Encoding.UTF8, errors, features, 0, state), this), factory, parser.CreateTypeRef)) { ast = parser.Parse(provider, factory, features, errors, recovery); } } }
public void LexerGetNextTokenByLineTest() { string path = (string)TestContext.DataRow["files"]; TestErrorSink errorSink = new TestErrorSink(); Lexer lexer = new Lexer(new StreamReader(path), Encoding.UTF8, errorSink, LanguageFeatures.ShortOpenTags, 0, Lexer.LexicalStates.INITIAL); Lexer.LexicalStates previousState = Lexer.LexicalStates.INITIAL; foreach (var line in File.ReadAllLines(path)) { lexer.Initialize(new StringReader(line + Environment.NewLine), previousState, true, 0); while (lexer.GetNextToken() != Tokens.EOF) { Assert.IsTrue(lexer.TokenSpan.IsValid); } previousState = lexer.CurrentLexicalState; } }
public void LexerStringsTest() { TestErrorSink errorSink = new TestErrorSink(); Lexer lexer = new Lexer(new StringReader("\"\""), Encoding.UTF8, errorSink, LanguageFeatures.ShortOpenTags, 0, Lexer.LexicalStates.INITIAL); var charSet = new[] { new [] { '$', '{', 'n', '\0', '\r', '\n', ' ' }, new [] { '\'', '\\', 'x', 'c', '"', '`', '8', '0' }, new [] { '/', '*', '?', '>', ';' } }; int[] word = new int[5]; char[] text = new char[word.Length]; var states = new Lexer.LexicalStates[] { Lexer.LexicalStates.ST_DOUBLE_QUOTES, Lexer.LexicalStates.ST_SINGLE_QUOTES, Lexer.LexicalStates.ST_BACKQUOTE, Lexer.LexicalStates.ST_HEREDOC, Lexer.LexicalStates.ST_NOWDOC, Lexer.LexicalStates.ST_COMMENT, Lexer.LexicalStates.ST_DOC_COMMENT, Lexer.LexicalStates.INITIAL, Lexer.LexicalStates.ST_IN_SCRIPTING }; foreach (var chars in charSet) { foreach (var state in states) { while (Increment(word, chars.Length)) { ToArray(word, text, chars); string line = new string(text); lexer.Initialize(new StringReader(line), state, true, 0); Tokens token = Tokens.EOF; int count = 0; while ((token = lexer.GetNextToken()) != Tokens.EOF && count++ < 100) { Assert.IsTrue(lexer.TokenSpan.IsValid, line); Assert.IsTrue(lexer.TokenSpan.Length >= 0, line + " - " + state.ToString() + " - " + lexer.TokenSpan.Start.ToString()); } Assert.IsTrue(count < 100, line); } } } }
public GlobalCode Parse(SourceUnit /*!*/ sourceUnit, TextReader /*!*/ reader, ErrorSink /*!*/ errors, IReductionsSink reductionsSink, Parsers.Position initialPosition, Lexer.LexicalStates initialLexicalState, LanguageFeatures features) { Debug.Assert(reader != null && errors != null); // initialization: this.sourceUnit = sourceUnit; this.errors = errors; this.features = features; this.reader = reader; this.reductionsSink = reductionsSink ?? NullReductionSink; InitializeFields(); this.scanner = new Scanner(initialPosition, reader, sourceUnit, errors, reductionsSink as ICommentsSink, features); this.scanner.CurrentLexicalState = initialLexicalState; this.currentScope = new Scope(1); // starts assigning scopes from 2 (1 is reserved for prepended inclusion) this.unicodeSemantics = (features & LanguageFeatures.UnicodeSemantics) != 0; base.Scanner = this.scanner; base.Parse(); GlobalCode result = astRoot; // clean and let GC collect unused AST and other stuff: ClearFields(); return(result); }
/// <summary> /// Gets category of a token in given lexical context. /// </summary> /// <param name="token">Token.</param> /// <param name="CurrentLexicalState">Current lexical state.</param> /// <returns>A token category.</returns> public static TokenCategory GetTokenCategory(this Tokens token, Lexer.LexicalStates CurrentLexicalState) { bool inString = CurrentLexicalState == Lexer.LexicalStates.ST_DOUBLE_QUOTES || CurrentLexicalState == Lexer.LexicalStates.ST_BACKQUOTE || CurrentLexicalState == Lexer.LexicalStates.ST_HEREDOC || CurrentLexicalState == Lexer.LexicalStates.ST_IN_STRING || CurrentLexicalState == Lexer.LexicalStates.ST_IN_SHELL; switch (token) { #region Special Keywords case Tokens.T_GOTO: case Tokens.T_TRY: case Tokens.T_CATCH: case Tokens.T_FINALLY: case Tokens.T_THROW: case Tokens.T_INTERFACE: case Tokens.T_IMPLEMENTS: case Tokens.T_CLONE: case Tokens.T_ABSTRACT: case Tokens.T_FINAL: case Tokens.T_PRIVATE: case Tokens.T_PROTECTED: case Tokens.T_PUBLIC: case Tokens.T_INSTANCEOF: case Tokens.T_NAMESPACE: case Tokens.T_USE: return(TokenCategory.Keyword); #endregion #region Basic Keywords case Tokens.T_REQUIRE_ONCE: case Tokens.T_REQUIRE: case Tokens.T_EVAL: case Tokens.T_INCLUDE_ONCE: case Tokens.T_INCLUDE: case Tokens.T_LOGICAL_OR: // or case Tokens.T_LOGICAL_XOR: // xor case Tokens.T_LOGICAL_AND: // and case Tokens.T_PRINT: case Tokens.T_NEW: case Tokens.T_EXIT: case Tokens.T_IF: case Tokens.T_ELSEIF: case Tokens.T_ELSE: case Tokens.T_ENDIF: case Tokens.T_ECHO: case Tokens.T_DO: case Tokens.T_WHILE: case Tokens.T_ENDWHILE: case Tokens.T_FOR: case Tokens.T_ENDFOR: case Tokens.T_FOREACH: case Tokens.T_ENDFOREACH: case Tokens.T_AS: case Tokens.T_SWITCH: case Tokens.T_ENDSWITCH: case Tokens.T_CASE: case Tokens.T_DEFAULT: case Tokens.T_BREAK: case Tokens.T_CONTINUE: case Tokens.T_FUNCTION: case Tokens.T_CONST: case Tokens.T_RETURN: case Tokens.T_YIELD: case Tokens.T_GLOBAL: case Tokens.T_STATIC: case Tokens.T_VAR: case Tokens.T_UNSET: case Tokens.T_ISSET: case Tokens.T_EMPTY: case Tokens.T_CLASS: case Tokens.T_TRAIT: case Tokens.T_INSTEADOF: case Tokens.T_EXTENDS: case Tokens.T_LIST: case Tokens.T_ARRAY: case Tokens.T_CLASS_C: // __CLASS__ case Tokens.T_TRAIT_C: // __TRAIT__ case Tokens.T_METHOD_C: // __METHOD__ case Tokens.T_FUNC_C: // __FUNCTION__ case Tokens.T_FILE: // __FILE__ case Tokens.T_LINE: // __LINE__ case Tokens.T_DIR: // __DIR__ case Tokens.T_CALLABLE: // callable return(TokenCategory.Keyword); #endregion #region Operators case Tokens.T_UNSET_CAST: // (unset) case Tokens.T_BOOL_CAST: // (bool) case Tokens.T_OBJECT_CAST: // (object) case Tokens.T_ARRAY_CAST: // (array) case Tokens.T_STRING_CAST: // (string) case Tokens.T_DOUBLE_CAST: // (double) case Tokens.T_INT_CAST: // (int) case Tokens.T_AT: // @ case Tokens.T_QUESTION: // ? case Tokens.T_LT: // < case Tokens.T_GT: // > case Tokens.T_PERCENT: // % case Tokens.T_EXCLAM: // ! case Tokens.T_TILDE: // ~ case Tokens.T_EQ: // = case Tokens.T_SLASH: // / case Tokens.T_CARET: // ^ case Tokens.T_AMP: // & case Tokens.T_PLUS: // + case Tokens.T_MINUS: // - case Tokens.T_PIPE: // | case Tokens.T_MUL: // * case Tokens.T_POW: // ** case Tokens.T_DOT: // . case Tokens.T_SR_EQUAL: // >>= case Tokens.T_SL_EQUAL: // <<= case Tokens.T_XOR_EQUAL: // ^= case Tokens.T_OR_EQUAL: // |= case Tokens.T_AND_EQUAL: // &= case Tokens.T_MOD_EQUAL: // %= case Tokens.T_CONCAT_EQUAL: // .= case Tokens.T_DIV_EQUAL: // /= case Tokens.T_MUL_EQUAL: // *= case Tokens.T_POW_EQUAL: // **= case Tokens.T_MINUS_EQUAL: // -= case Tokens.T_PLUS_EQUAL: // += case Tokens.T_BOOLEAN_OR: // || case Tokens.T_BOOLEAN_AND: // && case Tokens.T_IS_NOT_IDENTICAL: // !== case Tokens.T_IS_IDENTICAL: // === case Tokens.T_IS_NOT_EQUAL: // != case Tokens.T_IS_EQUAL: // == case Tokens.T_IS_GREATER_OR_EQUAL: // >= case Tokens.T_IS_SMALLER_OR_EQUAL: // <= case Tokens.T_SR: // >> case Tokens.T_SL: // << case Tokens.T_DEC: // -- case Tokens.T_INC: // ++ case Tokens.T_DOUBLE_COLON: // :: case Tokens.T_COLON: // : case Tokens.T_DOUBLE_ARROW: // => case Tokens.T_ELLIPSIS: // ... return(TokenCategory.Operator); #endregion #region Others case Tokens.T_LPAREN: // ( case Tokens.T_RPAREN: // ) case Tokens.T_SEMI: // ; case Tokens.T_COMMA: // , case Tokens.T_NS_SEPARATOR: // \ return(TokenCategory.Delimiter); //case Tokens.T_NAMESPACE_NAME: // namespace name case Tokens.T_STRING_VARNAME: // identifier following encapsulated "${" return(TokenCategory.Identifier); case Tokens.T_DNUMBER: // double (or overflown integer) out of string case Tokens.T_LNUMBER: // integer (or hex integer) out of string return(TokenCategory.Number); case Tokens.T_DOUBLE_QUOTES: // " case Tokens.T_BACKQUOTE: // ` case Tokens.T_START_HEREDOC: // <<<XXX case Tokens.T_END_HEREDOC: // XXX case Tokens.T_ENCAPSED_AND_WHITESPACE: // character(s) in string case Tokens.T_CONSTANT_ENCAPSED_STRING: // quoted string not containing '$' case Tokens.T_NUM_STRING: // number in string return(TokenCategory.String); case Tokens.T_DOLLAR_OPEN_CURLY_BRACES: // "${" in string - starts non-string code case Tokens.T_CURLY_OPEN: // "{$" in string return(TokenCategory.StringCode); case Tokens.T_WHITESPACE: return(TokenCategory.WhiteSpace); case Tokens.T_COMMENT: case Tokens.T_DOC_COMMENT: return(TokenCategory.Comment); case Tokens.T_OPEN_TAG: case Tokens.T_OPEN_TAG_WITH_ECHO: case Tokens.T_CLOSE_TAG: return(TokenCategory.ScriptTags); case Tokens.T_INLINE_HTML: return(TokenCategory.Html); #endregion #region Tokens with Ambiguous Category case Tokens.T_LBRACKET: // [ case Tokens.T_RBRACKET: // ] case Tokens.T_LBRACE: // { return((inString) ? TokenCategory.String : TokenCategory.Delimiter); case Tokens.T_RBRACE: // } if (inString) { // we are in string: return(TokenCategory.StringCode); } else { // part of script: return(TokenCategory.Delimiter); } case Tokens.T_STRING: // identifier return((inString) ? TokenCategory.String : TokenCategory.Identifier); case Tokens.T_DOLLAR: // isolated '$' case Tokens.T_OBJECT_OPERATOR: // -> return((inString) ? TokenCategory.StringCode : TokenCategory.Operator); case Tokens.T_VARIABLE: // identifier return((inString) ? TokenCategory.StringCode : TokenCategory.Variable); #endregion default: return(TokenCategory.Unknown); } }