Exemplo n.º 1
0
 // called from parser at the end of the embedded code
 internal void StringEmbeddedCodeEnd(StringTokenizer terminator) {
     _currentString = terminator;
     COND_LEXPOP();
     CMDARG_LEXPOP();
 }
Exemplo n.º 2
0
        public Tokens GetNextToken() {
            if (_input == null) {
                throw new InvalidOperationException("Uninitialized");
            }

            if (_currentString != null) {
                // TODO:
                RefillBuffer();

                Tokens token = _currentString.Tokenize(this);
                if (token == Tokens.StringEnd || token == Tokens.RegexpEnd) {
                    _currentString = null;
                    _lexicalState = LexicalState.EXPR_END;
                }
                _tokenSpan = new SourceSpan(_currentTokenStart, _currentTokenEnd);
                DumpToken(token);
                return token;
            }

            bool whitespaceSeen = false;
            bool cmdState = _commaStart;
            _commaStart = false;

            while (true) {
                // TODO:
                RefillBuffer();

                Tokens token = Tokenize(whitespaceSeen, cmdState);
            
                _tokenSpan = new SourceSpan(_currentTokenStart, _currentTokenEnd);
                DumpToken(token);
                
                // ignored tokens:
                switch (token) {
                    case Tokens.MultiLineComment:
                    case Tokens.SingleLineComment:
                        break;

                    case Tokens.Whitespace:
                        whitespaceSeen = true;
                        break;

                    case Tokens.EndOfLine: // not considered whitespace
                        break;

                    case Tokens.EndOfFile:
                        _eofReached = true;
                        return token;

                    default:
                        return token;
                }

                if (_verbatim) {
                    return token;
                }
            }
        }
Exemplo n.º 3
0
 // Stores the current string tokenizer into the StringEmbeddedCodeBegin token.
 // It is restored later via call to StringEmbeddedCodeEnd. 
 private Tokens StringEmbeddedCodeBegin() {
     _tokenValue.SetStringTokenizer(_currentString);
     _currentString = null;
     SetState(LexicalState.EXPR_BEG);
     COND_PUSH(0);
     CMDARG_PUSH(0);
     return Tokens.StringEmbeddedCodeBegin;
 }
Exemplo n.º 4
0
 // called from parser at the end of the embedded variable
 internal void StringEmbeddedVariableEnd(StringTokenizer stringTokenizer) {
     _currentString = stringTokenizer;
 }
Exemplo n.º 5
0
        // Quotation start: 
        //   %[QqWwxrs]?[^:alpha-numeric:]
        private Tokens TokenizeQuotationStart() {
            StringType type;
            Tokens token;
            int terminator;

            // c is the character following %
            // note that it could be eoln in which case it needs to be normalized:
            int c = ReadNormalizeEndOfLine();
            switch (c) {
                case 'Q':
                    type = StringType.ExpandsEmbedded;
                    token = Tokens.StringBegin;
                    terminator = ReadNormalizeEndOfLine();
                    break;

                case 'q':
                    type = StringType.Default;
                    token = Tokens.StringBegin;
                    terminator = ReadNormalizeEndOfLine();
                    break;

                case 'W':
                    type = StringType.Words | StringType.ExpandsEmbedded;
                    token = Tokens.WordsBegin;
                    // if the terminator is a whitespace the end will never be matched and syntax error will be reported
                    terminator = ReadNormalizeEndOfLine();
                    break;

                case 'w':
                    type = StringType.Words;
                    token = Tokens.VerbatimWordsBegin;
                    // if the terminator is a whitespace the end will never be matched and syntax error will be reported
                    terminator = ReadNormalizeEndOfLine();
                    break;

                case 'x':
                    type = StringType.ExpandsEmbedded;
                    token = Tokens.ShellStringBegin;
                    terminator = ReadNormalizeEndOfLine();
                    break;

                case 'r':
                    type = StringType.RegularExpression | StringType.ExpandsEmbedded;
                    token = Tokens.RegexpBegin;
                    terminator = ReadNormalizeEndOfLine();
                    break;

                case 's':
                    type = StringType.Symbol;
                    token = Tokens.SymbolBegin;
                    terminator = ReadNormalizeEndOfLine();
                    _lexicalState = LexicalState.EXPR_FNAME;
                    break;

                default:
                    type = StringType.ExpandsEmbedded;
                    token = Tokens.StringBegin;
                    terminator = c;
                    break;
            }

            int parenthesis = terminator;
            switch (terminator) {
                case -1:
                    _unterminatedToken = true;
                    MarkSingleLineTokenEnd();
                    ReportError(Errors.UnterminatedQuotedString);
                    return Tokens.EndOfFile;

                case '(': terminator = ')'; break;
                case '{': terminator = '}'; break;
                case '[': terminator = ']'; break;
                case '<': terminator = '>'; break;

                default:
                    if (IsLetterOrDigit(terminator)) {
                        Back(terminator);
                        MarkSingleLineTokenEnd();
                        ReportError(Errors.UnknownQuotedStringType);
                        return (Tokens)'%';
                    }

                    parenthesis = 0;
                    break;
            }

            bool isMultiline = terminator == '\n';

            if ((type & StringType.Words) != 0) {
                isMultiline |= SkipWhitespace();
            }

            if (isMultiline) {
                MarkMultiLineTokenEnd();
            } else {
                MarkSingleLineTokenEnd();
            }
            
            _currentString = new StringContentTokenizer(type, (char)terminator, (char)parenthesis);
            _tokenValue.SetStringTokenizer(_currentString);
            return token;
        }
Exemplo n.º 6
0
 // Stores the current string tokenizer into the StringEmbeddedVariableBegin token.
 // It is restored later via call to StringEmbeddedVariableEnd. 
 private Tokens StringEmbeddedVariableBegin() {
     _tokenValue.SetStringTokenizer(_currentString);
     _currentString = null;
     SetState(LexicalState.EXPR_BEG);
     return Tokens.StringEmbeddedVariableBegin;
 }
Exemplo n.º 7
0
 // String: '...
 private Tokens ReadSingleQuote() {
     _currentString = new StringContentTokenizer(StringType.Default, '\'');
     _tokenValue.SetStringTokenizer(_currentString);
     return Tokens.StringBegin;
 }
Exemplo n.º 8
0
        // Quotation start: 
        //   %[QqWwxrs]?[^:alpha-numeric:]
        private Tokens ReadQuotationStart(int c) {
            StringType type;
            Tokens token;
            int terminator;

            // c is the character following %
            switch (c) {
                case 'Q':
                    type = StringType.ExpandsEmbedded;
                    token = Tokens.StringBeg;
                    terminator = nextc();
                    break;

                case 'q':
                    type = StringType.Default;
                    token = Tokens.StringBeg;
                    terminator = nextc();
                    break;

                case 'W':
                    type = StringType.Words | StringType.ExpandsEmbedded;
                    token = Tokens.WordsBeg;
                    terminator = nextc();
                    SkipWhitespace();
                    break;

                case 'w':
                    type = StringType.Words;
                    token = Tokens.VerbatimWordsBegin;
                    terminator = nextc();
                    SkipWhitespace();
                    break;

                case 'x':
                    type = StringType.ExpandsEmbedded;
                    token = Tokens.ShellStringBegin;
                    terminator = nextc();
                    break;

                case 'r':
                    type = StringType.RegularExpression | StringType.ExpandsEmbedded;
                    token = Tokens.RegexpBeg;
                    terminator = nextc();
                    break;

                case 's':
                    type = StringType.Symbol;
                    token = Tokens.Symbeg;
                    terminator = nextc();
                    _lexicalState = LexicalState.EXPR_FNAME;
                    break;

                default:
                    type = StringType.ExpandsEmbedded;
                    token = Tokens.StringBeg;
                    terminator = c;
                    break;
            }

            int parenthesis = terminator;
            switch (terminator) {
                case -1:
                    UnterminatedToken = true;
                    ReportError(Errors.UnterminatedQuotedString);
                    return Tokens.EndOfFile;

                case '(': terminator = ')'; break;
                case '{': terminator = '}'; break;
                case '[': terminator = ']'; break;
                case '<': terminator = '>'; break;

                default:
                    if (IsLetterOrDigit(terminator)) {
                        pushback(c);
                        ReportError(Errors.UnknownQuotedStringType);
                        return (Tokens)'%';
                    }
                    parenthesis = 0; 
                    break;
            }

            _currentString = new StringContentTokenizer(type, (char)terminator, (char)parenthesis);
            _tokenValue.SetStringTokenizer(_currentString);
            return token;
        }
Exemplo n.º 9
0
        // String: `...
        // Operator: `
        private Tokens ReadBacktick(bool cmdState) {
            if (_lexicalState == LexicalState.EXPR_FNAME) {
                _lexicalState = LexicalState.EXPR_END;
                return (Tokens)'`';
            }

            if (_lexicalState == LexicalState.EXPR_DOT) {
                _lexicalState = (cmdState) ? LexicalState.EXPR_CMDARG : LexicalState.EXPR_ARG;
                return (Tokens)'`';
            }

            _currentString = new StringContentTokenizer(StringType.ExpandsEmbedded, '`');
            _tokenValue.SetStringTokenizer(_currentString);
            return Tokens.ShellStringBegin;
        }
Exemplo n.º 10
0
 // String: "...
 private Tokens ReadDoubleQuote() {
     _currentString = new StringContentTokenizer(StringType.ExpandsEmbedded, '"');
     _tokenValue.SetStringTokenizer(_currentString);
     return Tokens.StringBegin;
 }
Exemplo n.º 11
0
        // Operators: :: : 
        // Literals: :... (symbol start)
        private Tokens ReadColon(bool whitespaceSeen) {
            int c = Peek();
            if (c == ':') {
                Skip(c);
                if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID ||
                    _lexicalState == LexicalState.EXPR_CLASS || (IS_ARG() && whitespaceSeen)) {
                    
                    _lexicalState = LexicalState.EXPR_BEG;
                    return Tokens.LeadingDoubleColon;
                }

                _lexicalState = LexicalState.EXPR_DOT;
                return Tokens.SeparatingDoubleColon;
            }

            if (_lexicalState == LexicalState.EXPR_END || _lexicalState == LexicalState.EXPR_ENDARG || IsWhiteSpace(c)) {
                _lexicalState = LexicalState.EXPR_BEG;
                return (Tokens)':';
            }

            switch (c) {
                case '\'':
                    Skip(c);
                    _currentString = new StringContentTokenizer(StringType.Symbol, '\'');
                    break;

                case '"':
                    Skip(c);
                    _currentString = new StringContentTokenizer(StringType.Symbol | StringType.ExpandsEmbedded, '"');
                    break;

                default:
                    Debug.Assert(_currentString == null);
                    break;
            }

            _lexicalState = LexicalState.EXPR_FNAME;
            _tokenValue.SetStringTokenizer(_currentString);
            return Tokens.SymbolBegin;
        }
Exemplo n.º 12
0
        // Operators: /
        // Assignments: /=
        // Literals: /... (regex start)
        private Tokens ReadSlash(bool whitespaceSeen) {
            if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID) {
                _currentString = new StringContentTokenizer(StringType.RegularExpression | StringType.ExpandsEmbedded, '/');
                _tokenValue.SetStringTokenizer(_currentString);
                return Tokens.RegexpBegin;
            }

            int c = Peek();
            if (c == '=') {
                Skip(c);
                SetAsciiStringToken(Symbols.Divide);
                _lexicalState = LexicalState.EXPR_BEG;
                return Tokens.Assignment;
            }

            if (IS_ARG() && whitespaceSeen) {
                if (!IsWhiteSpace(c)) {
                    ReportWarning(Errors.AmbiguousFirstArgument);
                    _currentString = new StringContentTokenizer(StringType.RegularExpression | StringType.ExpandsEmbedded, '/');
                    _tokenValue.SetStringTokenizer(_currentString);
                    return Tokens.RegexpBegin;
                }
            }

            switch (_lexicalState) {
                case LexicalState.EXPR_FNAME:
                case LexicalState.EXPR_DOT:
                    _lexicalState = LexicalState.EXPR_ARG;
                    break;

                default:
                    _lexicalState = LexicalState.EXPR_BEG; 
                    break;
            }

            return (Tokens)'/';
        }
Exemplo n.º 13
0
        private Tokens Tokenize() {
            yytext = new StringBuilder();
            bool whitespaceSeen = false;
            
            if (_currentString != null) {
                Tokens token = _currentString.Tokenize(this);
                if (token == Tokens.StringEnd || token == Tokens.RegexpEnd) {
                    _currentString = null;
                    _lexicalState = LexicalState.EXPR_END;
                }
                _tokenSpan = new SourceSpan(_currentTokenStart, _currentTokenEnd);
                DumpToken(token);
                return token;
            }

            bool cmdState = _commaStart;
            _commaStart = false;

            while (true) {
                Tokens token = Tokenize(whitespaceSeen, cmdState);
            
                _tokenSpan = new SourceSpan(_currentTokenStart, _currentTokenEnd);
                DumpToken(token);
                
                // ignored tokens:
                switch (token) {
                    case Tokens.MultiLineComment:
                    case Tokens.SingleLineComment:
                        if (_verbatim) {
                            return token;
                        }
                        continue;

                    case Tokens.Whitespace:
                        whitespaceSeen = true;
                        continue;

                    case Tokens.EndOfLine: // not considered whitespace
                    case Tokens.InvalidCharacter:
                        continue;
                }

                return token;
            }
        }
Exemplo n.º 14
0
 internal void SetStringTokenizer(StringTokenizer value) {
     StringTokenizer = value;
 }
Exemplo n.º 15
0
        private Tokens TokenizeHeredocLabel() {
            int term;
            StringType stringType = StringType.Default;

            int prefixWidth;
            int c = ReadNormalizeEndOfLine(out prefixWidth);
            if (c == '-') {
                c = ReadNormalizeEndOfLine(out prefixWidth);
                prefixWidth++;
                stringType = StringType.IndentedHeredoc;
            }

            string label;
            if (c == '\'' || c == '"' || c == '`') {
                if (c != '\'') {
                    stringType |= StringType.ExpandsEmbedded;
                }

                // do not include quotes:
                int start = _bufferPos;
                term = c;

                while (true) {
                    c = Read(); 
                    if (c == -1) {
                        _unterminatedToken = true;
                        ReportError(Errors.UnterminatedHereDocIdentifier);
                        c = term;
                        break;
                    }

                    if (c == term) {
                        break;
                    }

                    // MRI doesn't do this, it continues reading the label and includes \n into it.
                    // The label cannot be matched with the end label (only single-line comparison is done), so it's better to report error here
                    // Allowing \n in label requires the token to be multi-line.
                    // Note we can ignore \r followed by \n here since it will fail in the next iteration.
                    if (c == '\n') {
                        Back('\n');
                        ReportError(Errors.UnterminatedHereDocIdentifier);
                        c = term;
                        break;
                    }
                }

                label = new String(_lineBuffer, start, _bufferPos - start - 1);
            } else if (IsIdentifier(c)) {
                term = '"';
                stringType |= StringType.ExpandsEmbedded;
                
                int start = _bufferPos - 1;
                SkipVariableName();
                label = new String(_lineBuffer, start, _bufferPos - start);
            } else {
                SeekRelative(-prefixWidth);
                return Tokens.None;
            }

            // note that if we allow \n in the label we must change this to multi-line token!
            MarkSingleLineTokenEnd();
            
            // skip the rest of the line (the content is stored in heredoc string terminal and tokenized upon restore)
            int resume = _bufferPos;
            _bufferPos = _lineLength;
            _currentString = new HeredocTokenizer(stringType, label, resume, _lineBuffer, _lineLength, _currentLine, _currentLineIndex);
            _lineBuffer = new char[InitialBufferSize];
            _tokenValue.SetStringTokenizer(_currentString);

            return term == '`' ? Tokens.ShellStringBegin : Tokens.StringBegin;
        }
Exemplo n.º 16
0
 internal void SetStringTokenizer(StringTokenizer value) {
     StringTokenizer = value;
     _type = TokenValueType.StringTokenizer;
 }
Exemplo n.º 17
0
        private Tokens TokenizeHeredocLabel() {
            int term;
            StringType stringType = StringType.Default;

            int c = nextc();
            if (c == '-') {
                c = nextc();
                stringType = StringType.IndentedHeredoc;
            }

            string label;
            if (c == '\'' || c == '"' || c == '`') {
                if (c != '\'') {
                    stringType |= StringType.ExpandsEmbedded;
                }

                // do not include quotes:
                int start = _bufferPos;
                term = c;

                while (true) {
                    c = nextc();
                    if (c == -1) {
                        UnterminatedToken = true;
                        ReportError(Errors.UnterminatedHereDocIdentifier);
                        c = term;
                        break;
                    }

                    if (c == term) {
                        break;
                    }

                    // MRI doesn't do this, it continues reading the label and includes \n into it.
                    // The label cannot be matched with the end label (only single-line comparison is done), so it's better to report error here
                    // Allowing \n in label requires the token to be multi-line.
                    if (c == '\n') {
                        pushback(c);
                        ReportError(Errors.UnterminatedHereDocIdentifier);
                        c = term;
                        break;
                    }
                }

                label = new String(_lineBuffer, start, _bufferPos - start - 1);
            } else if (IsIdentifier(c)) {
                term = '"';
                stringType |= StringType.ExpandsEmbedded;
                
                int start = _bufferPos - 1;
                SkipVariableName();
                label = new String(_lineBuffer, start, _bufferPos - start);
            } else {
                pushback(c);
                if ((stringType & StringType.IndentedHeredoc) != 0) {
                    pushback('-');
                }
                return Tokens.None;
            }

            // note that if we allow \n in the label we must change this to multi-line token!
            MarkSingleLineTokenEnd();
            
            // skip the rest of the line (the content is stored in heredoc string terminal and tokenized upon restore)
            int resume = _bufferPos;
            _bufferPos = _lineBuffer.Length;
            _currentString = new HeredocTokenizer(stringType, label, resume, _lineBuffer, _currentLine, _currentLineIndex);
            _tokenValue.SetStringTokenizer(_currentString);

            return term == '`' ? Tokens.ShellStringBegin : Tokens.StringBeg;
        }