Example #1
0
        private Tokens Tokenize(bool whitespaceSeen, bool cmdState) {
            MarkTokenStart();
            int c = Read();

            switch (c) {
                case '\0':		// null terminates the input
                    // if tokenizer is asked for the next token it returns EOF again:
                    Back('\0');
                    MarkSingleLineTokenEnd();
                    return Tokens.EndOfFile;

                case -1:		// end of stream
                    MarkSingleLineTokenEnd();
                    return Tokens.EndOfFile;

                // whitespace
                case ' ':
                case '\t':
                case '\f':
                    return MarkSingleLineTokenEnd(ReadNonEolnWhiteSpace());

                case '\n':
                    return MarkMultiLineTokenEnd(GetEndOfLineToken());

                case '\r':
                    if (Read('\n')) {
                        return MarkMultiLineTokenEnd(GetEndOfLineToken());
                    } else {
                        return MarkSingleLineTokenEnd(ReadNonEolnWhiteSpace());
                    }

                case '\\':
                    return TokenizeBackslash();

                case '#':
                    return MarkSingleLineTokenEnd(ReadSingleLineComment());

                case '*':
                    return MarkSingleLineTokenEnd(ReadStar(whitespaceSeen));

                case '!':
                    return MarkSingleLineTokenEnd(ReadBang());

                case '=': 
                    if (ReadMultiLineComment()) {
                        MarkMultiLineTokenEnd();
                        return Tokens.MultiLineComment;
                    }

                    return MarkSingleLineTokenEnd(ReadEquals());

                case '<':
                    return TokenizeLessThan(whitespaceSeen);

                case '>':
                    return MarkSingleLineTokenEnd(ReadGreaterThan());

                case '"':
                    return MarkSingleLineTokenEnd(ReadDoubleQuote());

                case '\'':
                    return MarkSingleLineTokenEnd(ReadSingleQuote());

                case '`':
                    return MarkSingleLineTokenEnd(ReadBacktick(cmdState));

                case '?':
                    return TokenizeQuestionmark();

                case '&':
                    return MarkSingleLineTokenEnd(ReadAmpersand(whitespaceSeen));

                case '|':
                    return MarkSingleLineTokenEnd(ReadPipe());

                case '+':
                    return MarkSingleLineTokenEnd(ReadPlus(whitespaceSeen));

                case '-':
                    return MarkSingleLineTokenEnd(ReadMinus(whitespaceSeen));

                case '.':
                    return MarkSingleLineTokenEnd(ReadDot());

                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                    return MarkSingleLineTokenEnd(ReadUnsignedNumber(c));

                case ':':
                    return MarkSingleLineTokenEnd(ReadColon(whitespaceSeen));

                case '/':
                    return MarkSingleLineTokenEnd(ReadSlash(whitespaceSeen));

                case '^':
                    return MarkSingleLineTokenEnd(ReadCaret());

                case ';':
                    _commaStart = true;
                    _lexicalState = LexicalState.EXPR_BEG;
                    MarkSingleLineTokenEnd();
                    return (Tokens)';';

                case ',':
                    _lexicalState = LexicalState.EXPR_BEG;
                    MarkSingleLineTokenEnd();
                    return (Tokens)',';

                case '~':
                    return MarkSingleLineTokenEnd(ReadTilde());

                case '(':
                    _commaStart = true;
                    return MarkSingleLineTokenEnd(ReadLeftParenthesis(whitespaceSeen));

                case '[':
                    return MarkSingleLineTokenEnd(ReadLeftBracket(whitespaceSeen));

                case '{':
                    return MarkSingleLineTokenEnd(ReadLeftBrace());

                case ')':
                case ']':
                case '}':
                    COND_LEXPOP();
                    CMDARG_LEXPOP();
                    _lexicalState = LexicalState.EXPR_END;
                    MarkSingleLineTokenEnd();
                    return (Tokens)c;

                case '%':
                    return TokenizePercent(whitespaceSeen);

                case '$': 
                    return MarkSingleLineTokenEnd(ReadGlobalVariable());

                case '@':
                    return MarkSingleLineTokenEnd(ReadInstanceOrClassVariable());

                case '_':
                    if (was_bol() && LineContentEquals("__END__", false)) {
                        // if tokenizer is asked for the next token it returns EOF again:
                        Back('_');
                        MarkSingleLineTokenEnd();
                        _dataOffset = _currentLineIndex + _lineLength;
                        return Tokens.EndOfFile;
                    }
                    return MarkSingleLineTokenEnd(ReadIdentifier(c, cmdState));

                default:
                    if (!IsIdentifierInitial(c, _multiByteIdentifier)) {
                        // UTF-8 BOM detection:
                        if (_compatibility == RubyCompatibility.Ruby18 && _currentLineIndex == 0 && _bufferPos == 1 &&
                            (c == 0xEF && Peek() == 0xBB && Peek(1) == 0xBF)) {
                            ReportError(Errors.InvalidUseOfByteOrderMark);
                            // skip BOM and continue parsing as if it was whitespace:
                            Read();
                            Read();
                            MarkSingleLineTokenEnd();
                            return Tokens.Whitespace;
                        } else {
                            ReportError(Errors.InvalidCharacterInExpression, (char)c);
                            MarkSingleLineTokenEnd();
                            return Tokens.InvalidCharacter;
                        }
                    }

                    return MarkSingleLineTokenEnd(ReadIdentifier(c, cmdState));
            }
        }
Example #2
0
        // Identifiers:
        //   [:alpha:_][:identifier:]+
        // Method names:
        //   [:alpha:_][:identifier:]+[?][^=]
        //   [:alpha:_][:identifier:]+[!][^=]
        //   [:alpha:_][:identifier:]+[=][^=~>]
        //   [:alpha:_][:identifier:]+[=] immediately followed by =>
        // Keywords
        private Tokens ReadIdentifier(int firstCharacter, bool cmdState) {
            // the first character already read:
            int start = _bufferPos - 1;
            SkipVariableName();

            // reads token suffix (!, ?, =) and returns the the token kind based upon the suffix:
            Tokens result = ReadIdentifierSuffix(firstCharacter);

            // TODO: possible optimization: ~15% are keywords, ~15% are existing local variables -> we can save allocations
            string identifier = new String(_lineBuffer, start, _bufferPos - start);
            
            if (_lexicalState != LexicalState.EXPR_DOT) {
                if (_lexicalState == LexicalState.EXPR_FNAME) {
                    SetStringToken(identifier);
                }

                Tokens keyword = StringToKeyword(identifier);
                if (keyword != Tokens.None) {
                    return keyword;
                }
            }

            if (_lexicalState == LexicalState.EXPR_BEG ||
                _lexicalState == LexicalState.EXPR_MID ||
                _lexicalState == LexicalState.EXPR_DOT ||
                _lexicalState == LexicalState.EXPR_ARG ||
                _lexicalState == LexicalState.EXPR_CMDARG) {

                if (_localVariableResolver.IsLocalVariable(identifier)) {
                    _lexicalState = LexicalState.EXPR_END;
                } else if (cmdState) {
                    _lexicalState = LexicalState.EXPR_CMDARG;
                } else {
                    _lexicalState = LexicalState.EXPR_ARG;
                }
            } else {
                _lexicalState = LexicalState.EXPR_END;
            }

            SetStringToken(identifier);
            return result;
        }
Example #3
0
 internal void SetState(LexicalState state) {
     _lexicalState = state;
 }
Example #4
0
        // INTEGER:
        // [1-9]([0-9_]*[1-9])?
        // 0([0-7_]*[0-7])?
        // 0[xX][0-9a-fA-F]([0-9a-fA-F_]*[0-9a-fA-F])?
        // 0[dD][0-9]([0-9_]*[0-9])?
        // 0[bB][01]([01_]*[01])?
        // 0[oO][0-7]([0-7_]*[0-7])?
        //
        // FLOAT:
        // (0|[1-9]([0-9_]*[0-9])?)[.][0-9_]*[0-9]([eE][+-]?[0-9]([0-9_]*[0-9])?)
        //
        // Takes the first decimal digit of the number.
        //
        private Tokens ReadUnsignedNumber(int c) {
            _lexicalState = LexicalState.EXPR_END;
           
            if (c == '0') {
                switch (Peek()) {
                    case 'x':
                    case 'X':
                        Skip();
                        return ReadInteger(16, NumericCharKind.None);

                    case 'b':
                    case 'B':
                        Skip();
                        return ReadInteger(2, NumericCharKind.None);

                    case 'o':
                    case 'O':
                        Skip();
                        return ReadInteger(8, NumericCharKind.None);

                    case 'd':
                    case 'D':
                        Skip();
                        return ReadInteger(10, NumericCharKind.None);

                    case 'e':
                    case 'E': {
                            // 0e[+-]...    
                            int sign;
                            int start = _bufferPos - 1;

                            if (TryReadExponentSign(1, out sign)) {
                                return ReadDoubleExponent(start, sign);
                            }

                            _tokenValue.SetInteger(0);
                            return Tokens.Integer;
                        }

                    case '.':
                        // 0.
                        if (IsDecimalDigit(Peek(1))) {
                            Skip('.');
                            return ReadDouble(_bufferPos - 2);
                        }

                        _tokenValue.SetInteger(0);
                        return Tokens.Integer;

                    case '0':
                    case '1':
                    case '2':
                    case '3':
                    case '4':
                    case '5':
                    case '6':
                    case '7':
                    case '_':
                        // the previous character is '0' digit:
                        return ReadInteger(8, NumericCharKind.Digit);

                    case '8':
                    case '9':
                        ReportError(Errors.IllegalOctalDigit);
                        // treat the number as decimal
                        return ReadInteger(10, NumericCharKind.Digit);

                    default:
                        _tokenValue.SetInteger(0);
                        return Tokens.Integer;
                }
            }

            return ReadDecimalNumber(c);
        }
Example #5
0
        // Operators: & &&
        // Assignments: &=
        private Tokens ReadAmpersand(bool whitespaceSeen) {
            int c = Peek();
            
            if (c == '&') {
                Skip(c);
                _lexicalState = LexicalState.EXPR_BEG;
                
                if (Read('=')) {
                    SetAsciiStringToken(Symbols.And);
                    return Tokens.Assignment;
                }

                return Tokens.LogicalAnd;
            } 
            
            if (c == '=') {
                Skip(c);
                _lexicalState = LexicalState.EXPR_BEG;
                SetAsciiStringToken(Symbols.BitwiseAnd);
                return Tokens.Assignment;
            }

            Tokens result;
            if (IS_ARG() && whitespaceSeen && !IsWhiteSpace(c)) {
                // we are in command argument and there is a whitespace between ampersand: "foo &bar"
                ReportWarning(Errors.AmpersandInterpretedAsProcArgument);
                result = Tokens.Ampersand;
            } else if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID) {
                result = Tokens.Ampersand;
            } else {
                result = (Tokens)'&';
            }

            switch (_lexicalState) {
                case LexicalState.EXPR_FNAME:
                case LexicalState.EXPR_DOT:
                    _lexicalState = LexicalState.EXPR_ARG;
                    break;

                default:
                    _lexicalState = LexicalState.EXPR_BEG;
                    break;
            }

            return result;
        }
Example #6
0
        // Operators: . .. ...
        // Errors: .[:digit:]
        private Tokens ReadDot() {
            _lexicalState = LexicalState.EXPR_BEG;
            
            int c = Peek();
            if (c == '.') {
                Skip(c);
                return Read('.') ? Tokens.Dot3 : Tokens.Dot2;
            }

            if (IsDecimalDigit(c)) {
                ReportError(Errors.NoFloatingLiteral);
            }

            _lexicalState = LexicalState.EXPR_DOT;
            return (Tokens)'.';
        }
Example #7
0
        // Brackets: {
        private Tokens ReadLeftBrace() {
            Tokens result;

            if (IS_ARG() || _lexicalState == LexicalState.EXPR_END) {
                result = (Tokens)'{';        // block (primary)
            } else if (_lexicalState == LexicalState.EXPR_ENDARG) {
                result = Tokens.LbraceArg;   // block (expr)
            } else {
                result = Tokens.Lbrace;      // hash
            }

            COND_PUSH(0);
            CMDARG_PUSH(0);
            _lexicalState = LexicalState.EXPR_BEG;
            return result;
        }
Example #8
0
        // String: `...
        // Operator: `
        private Tokens ReadBacktick(bool cmdState) {
            if (_lexicalState == LexicalState.EXPR_FNAME) {
                _lexicalState = LexicalState.EXPR_END;
                return (Tokens)'`';
            }

            if (_lexicalState == LexicalState.EXPR_DOT) {
                _lexicalState = (cmdState) ? LexicalState.EXPR_CMDARG : LexicalState.EXPR_ARG;
                return (Tokens)'`';
            }

            _currentString = new StringContentTokenizer(StringType.ExpandsEmbedded, '`');
            _tokenValue.SetStringTokenizer(_currentString);
            return Tokens.ShellStringBegin;
        }
Example #9
0
        // Global variables: 
        //   $[_~*$?!@/\;,.=:<>"] 
        //   $-[:identifier:] 
        //   $[:identifier:]
        // Match references: 
        //   $[&`'+] 
        //   $[1-9][0-9]+
        // Dollar:
        //   $
        private Tokens ReadGlobalVariable() {
            _lexicalState = LexicalState.EXPR_END;

            // start right after $, the resulting symbol doesn't contain $
            int start = _bufferPos;
            
            int c = Read();
            switch (c) {
                case '_':
                    if (IsIdentifier(Peek())) {
                        SkipVariableName();
                        SetStringToken(start, _bufferPos - start);
                        return Tokens.GlobalVariable;
                    }
                    return GlobalVariableToken(Symbols.LastInputLine);

                // exceptions:
                case '!': return GlobalVariableToken(Symbols.CurrentException);
                case '@': return GlobalVariableToken(Symbols.CurrentExceptionBacktrace);

                // options:
                case '-':
                    if (IsIdentifier(Peek())) {
                        Read();
                        SetStringToken(start, 2);
                    } else {
                        SetAsciiStringToken("-");
                    }
                    return Tokens.GlobalVariable;

                // others:
                case ',': return GlobalVariableToken(Symbols.ItemSeparator);
                case ';': return GlobalVariableToken(Symbols.StringSeparator);
                case '/': return GlobalVariableToken(Symbols.InputSeparator);
                case '\\': return GlobalVariableToken(Symbols.OutputSeparator);
                case '*': return GlobalVariableToken(Symbols.CommandLineArguments);
                case '$': return GlobalVariableToken(Symbols.CurrentProcessId);
                case '?': return GlobalVariableToken(Symbols.ChildProcessExitStatus);
                case '=': return GlobalVariableToken(Symbols.IgnoreCaseComparator);
                case ':': return GlobalVariableToken(Symbols.LoadPath);
                case '"': return GlobalVariableToken(Symbols.LoadedFiles);
                case '<': return GlobalVariableToken(Symbols.InputContent);
                case '>': return GlobalVariableToken(Symbols.OutputStream);
                case '.': return GlobalVariableToken(Symbols.LastInputLineNumber);

                // regex:
                case '~': 
                    return GlobalVariableToken(Symbols.MatchData);
                
                case '&':
                    _tokenValue.SetInteger(RegexMatchReference.EntireMatch);
                    return Tokens.MatchReference;

                case '`':
                    _tokenValue.SetInteger(RegexMatchReference.MatchPrefix);
                    return Tokens.MatchReference;

                case '\'':		
                    _tokenValue.SetInteger(RegexMatchReference.MatchSuffix);
                    return Tokens.MatchReference;

                case '+':
                    _tokenValue.SetInteger(RegexMatchReference.MatchLastGroup);
                    return Tokens.MatchReference;

                case '0':
                    if (IsIdentifier(Peek())) {
                        // $0[A-Za-z0-9_] are invalid:
                        SkipVariableName();
                        ReportError(Errors.InvalidGlobalVariableName, new String(_lineBuffer, start - 1, _bufferPos - start));
                        SetAsciiStringToken(Symbols.ErrorVariable);
                        return Tokens.GlobalVariable;
                    }

                    return GlobalVariableToken(Symbols.CommandLineProgramPath);

                default:
                    if (IsDecimalDigit(c)) {
                        return ReadMatchGroupReferenceVariable(c);
                    }

                    if (IsIdentifier(c)) {
                        SkipVariableName();
                        SetStringToken(start, _bufferPos - start);
                        return Tokens.GlobalVariable;
                    }

                    Back(c);
                    return (Tokens)'$';
            }
        }
Example #10
0
        // Assignments: %=
        // Operators: % 
        // Literals: %{... (quotation start)
        private Tokens TokenizePercent(bool whitespaceSeen) {
            if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID) {
                return TokenizeQuotationStart();
            }

            int c = Peek();
            if (c == '=') {
                Skip(c);
                SetAsciiStringToken(Symbols.Mod);
                _lexicalState = LexicalState.EXPR_BEG;
                MarkSingleLineTokenEnd();
                return Tokens.Assignment;
            }

            if (IS_ARG() && whitespaceSeen && !IsWhiteSpace(c)) {
                return TokenizeQuotationStart();
            }

            switch (_lexicalState) {
                case LexicalState.EXPR_FNAME:
                case LexicalState.EXPR_DOT:
                    _lexicalState = LexicalState.EXPR_ARG; 
                    break;

                default:
                    _lexicalState = LexicalState.EXPR_BEG; 
                    break;
            }

            MarkSingleLineTokenEnd();
            return (Tokens)'%';
        }
Example #11
0
        // Instance variables:
        //   @[:alpha:_][:identifier:]*
        // Class variables:
        //   @@[:alpha:_][:identifier:]*
        // At:
        //   @
        private Tokens ReadInstanceOrClassVariable() {
            Tokens result;

            // start right before @/@@, the resulting symbol starts with @/@@
            int start = _bufferPos - 1;

            int c = Peek(0);
            if (c == '@') {
                c = Peek(1);
                result = Tokens.ClassVariable;
            } else {
                result = Tokens.InstanceVariable;
            }

            // c follows @ or @@
            if (IsDecimalDigit(c)) {
                ReportError(result == Tokens.InstanceVariable ? Errors.InvalidInstanceVariableName : Errors.InvalidClassVariableName, (char)c);
            } else if (IsIdentifierInitial(c)) {
                if (result == Tokens.ClassVariable) {
                    Skip('@');
                }
                Skip(c);

                SkipVariableName();
                SetStringToken(start, _bufferPos - start);
                _lexicalState = LexicalState.EXPR_END;
                return result;
            }

            return (Tokens)'@';
        }
Example #12
0
        // Brackets: (
        private Tokens ReadLeftParenthesis(bool whitespaceSeen) {
            Tokens result = (Tokens)'(';
            
            if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID) {
                result = Tokens.LeftParen;
            } else if (whitespaceSeen) {
                if (_lexicalState == LexicalState.EXPR_CMDARG) {
                    result = Tokens.LparenArg;
                } else if (_lexicalState == LexicalState.EXPR_ARG) {
                    ReportWarning(Errors.WhitespaceBeforeArgumentParentheses);
                }
            }

            COND_PUSH(0);
            CMDARG_PUSH(0);
            _lexicalState = LexicalState.EXPR_BEG;
            return result;
        }
Example #13
0
        // Assignment: =
        // Operators: == === =~ =>
        private Tokens ReadEquals() {
            switch (_lexicalState) {
                case LexicalState.EXPR_FNAME:
                case LexicalState.EXPR_DOT:
                    _lexicalState = LexicalState.EXPR_ARG; 
                    break;

                default:
                    _lexicalState = LexicalState.EXPR_BEG; 
                    break;
            }

            switch (Peek()) {
                case '=':
                    Skip('=');
                    return Read('=') ? Tokens.Eqq : Tokens.Eq;

                case '~':
                    Skip('~');
                    return Tokens.Match;

                case '>':
                    Skip('>');
                    return Tokens.Assoc;

                default:
                    return (Tokens)'=';
            }
        }
Example #14
0
        private Tokens ReturnDoKeyword() {
            LexicalState oldState = _lexicalState;
            _lexicalState = LexicalState.EXPR_BEG;

            // if last conditional opening is a parenthesis:
            if (COND_P()) {
                return Tokens.LoopDo;
            }

            if (CMDARG_P() && oldState != LexicalState.EXPR_CMDARG) {
                return Tokens.BlockDo;
            }

            if (oldState == LexicalState.EXPR_ENDARG) {
                return Tokens.BlockDo;
            }

            return Tokens.Do;
        }      
Example #15
0
        // Brackets: [
        // Operators: [] []=
        private Tokens ReadLeftBracket(bool whitespaceSeen) {
            if (_lexicalState == LexicalState.EXPR_FNAME || _lexicalState == LexicalState.EXPR_DOT) {
                _lexicalState = LexicalState.EXPR_ARG;
                
                return Read(']') ? (Read('=') ? Tokens.Aset : Tokens.Aref) : (Tokens)'[';
            }

            Tokens result;
            if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID) {
                result = Tokens.Lbrack;
            } else if (IS_ARG() && whitespaceSeen) {
                result = Tokens.Lbrack;
            } else {
                result = (Tokens)'[';
            }

            _lexicalState = LexicalState.EXPR_BEG;
            COND_PUSH(0);
            CMDARG_PUSH(0);
            return result;
        }
Example #16
0
        // Assignment: >>=
        // Operators: > >= >>
        private Tokens ReadGreaterThan() {
            switch (_lexicalState) {
                case LexicalState.EXPR_FNAME:
                case LexicalState.EXPR_DOT:
                    _lexicalState = LexicalState.EXPR_ARG; 
                    break;

                default:
                    _lexicalState = LexicalState.EXPR_BEG; 
                    break;
            }

            int c = Peek();
            if (c == '=') {
                Skip(c);
                return Tokens.Geq;
            }

            if (c == '>') {
                Skip(c);
                if (Read('=')) {
                    SetAsciiStringToken(Symbols.RightShift);
                    _lexicalState = LexicalState.EXPR_BEG;
                    return Tokens.Assignment;
                }
                return Tokens.Rshft;
            }

            return (Tokens)'>';
        }
Example #17
0
        // Operators: ~ ~@
        private Tokens ReadTilde() {
            if (_lexicalState == LexicalState.EXPR_FNAME || _lexicalState == LexicalState.EXPR_DOT) {
                // ~@
                Read('@');
            }

            switch (_lexicalState) {
                case LexicalState.EXPR_FNAME:
                case LexicalState.EXPR_DOT:
                    _lexicalState = LexicalState.EXPR_ARG; 
                    break;

                default:
                    _lexicalState = LexicalState.EXPR_BEG; 
                    break;
            }

            return (Tokens)'~';
        }
Example #18
0
        // Operators: ? (conditional)
        // Literals: ?[:char:] ?{escape}
        // Errors: ?[:EOF:]
        private Tokens TokenizeQuestionmark() {
            if (_lexicalState == LexicalState.EXPR_END || _lexicalState == LexicalState.EXPR_ENDARG) {
                _lexicalState = LexicalState.EXPR_BEG;
                MarkSingleLineTokenEnd();
                return (Tokens)'?';
            }

            // ?[:EOF:]
            int c = Peek();
            if (c == -1) {
                _unterminatedToken = true;
                MarkSingleLineTokenEnd();
                ReportError(Errors.IncompleteCharacter);
                return Tokens.EndOfFile;
            }

            // TODO: ?x, ?\u1234, ?\u{123456} -> string in 1.9
            // ?[:whitespace:]
            if (IsWhiteSpace(c)) {
                if (!IS_ARG()) {
                    int c2 = 0;
                    switch (c) {
                        case ' ': c2 = 's'; break;
                        case '\n': c2 = 'n'; break;
                        case '\t': c2 = 't'; break;
                        case '\v': c2 = 'v'; break;
                        case '\r': c2 = (Peek(1) == '\n') ? 'n' : 'r'; break;
                        case '\f': c2 = 'f'; break;
                    }

                    if (c2 != 0) {
                        ReportWarning(Errors.InvalidCharacterSyntax, (char)c2);
                    }
                }
                _lexicalState = LexicalState.EXPR_BEG;
                MarkSingleLineTokenEnd();
                return (Tokens)'?';
            } 
            
            // ?{identifier}
            if ((IsLetterOrDigit(c) || c == '_') && IsIdentifier(Peek(1))) {
                _lexicalState = LexicalState.EXPR_BEG;
                MarkSingleLineTokenEnd();
                return (Tokens)'?';
            }

            Skip(c);
            
            // ?\{escape}
            if (c == '\\') {
                // TODO: ?\xx, ?\u1234, ?\u{123456} -> string in 1.9
                c = ReadEscape();

                // \M-{eoln} eats the eoln:
                MarkMultiLineTokenEnd();
            } else {
                MarkSingleLineTokenEnd();
            }

            // TODO: ?x -> string in 1.9
            c &= 0xff;
            _lexicalState = LexicalState.EXPR_END;
            _tokenValue.SetInteger(c);

            return Tokens.Integer;
        }
Example #19
0
        // Assignments: ^=
        // Operators: ^
        private Tokens ReadCaret() {
            if (Read('=')) {
                SetAsciiStringToken(Symbols.Xor);
                _lexicalState = LexicalState.EXPR_BEG;
                return Tokens.Assignment;
            }

            switch (_lexicalState) {
                case LexicalState.EXPR_FNAME:
                case LexicalState.EXPR_DOT:
                    _lexicalState = LexicalState.EXPR_ARG;
                    break;

                default:
                    _lexicalState = LexicalState.EXPR_BEG; 
                    break;
            }

            return (Tokens)'^';
        }
Example #20
0
        // Operators: | ||
        // Assignments: |= ||=
        private Tokens ReadPipe() {
            int c = Peek();

            if (c == '|') {
                Skip(c);
                _lexicalState = LexicalState.EXPR_BEG;

                if (Read('=')) {
                    SetAsciiStringToken(Symbols.Or);
                    _lexicalState = LexicalState.EXPR_BEG;
                    return Tokens.Assignment;
                }
                return Tokens.LogicalOr;
            }

            if (c == '=') {
                Skip(c);
                SetAsciiStringToken(Symbols.BitwiseOr);
                _lexicalState = LexicalState.EXPR_BEG;
                return Tokens.Assignment;
            }

            if (_lexicalState == LexicalState.EXPR_FNAME || _lexicalState == LexicalState.EXPR_DOT) {
                _lexicalState = LexicalState.EXPR_ARG;
            } else {
                _lexicalState = LexicalState.EXPR_BEG;
            }

            return (Tokens)'|';
        }
Example #21
0
        // Operators: /
        // Assignments: /=
        // Literals: /... (regex start)
        private Tokens ReadSlash(bool whitespaceSeen) {
            if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID) {
                _currentString = new StringContentTokenizer(StringType.RegularExpression | StringType.ExpandsEmbedded, '/');
                _tokenValue.SetStringTokenizer(_currentString);
                return Tokens.RegexpBegin;
            }

            int c = Peek();
            if (c == '=') {
                Skip(c);
                SetAsciiStringToken(Symbols.Divide);
                _lexicalState = LexicalState.EXPR_BEG;
                return Tokens.Assignment;
            }

            if (IS_ARG() && whitespaceSeen) {
                if (!IsWhiteSpace(c)) {
                    ReportWarning(Errors.AmbiguousFirstArgument);
                    _currentString = new StringContentTokenizer(StringType.RegularExpression | StringType.ExpandsEmbedded, '/');
                    _tokenValue.SetStringTokenizer(_currentString);
                    return Tokens.RegexpBegin;
                }
            }

            switch (_lexicalState) {
                case LexicalState.EXPR_FNAME:
                case LexicalState.EXPR_DOT:
                    _lexicalState = LexicalState.EXPR_ARG;
                    break;

                default:
                    _lexicalState = LexicalState.EXPR_BEG; 
                    break;
            }

            return (Tokens)'/';
        }
Example #22
0
        // Operators: - -@
        // Assignments: -=
        // Literals: -... (negative number sign)
        private Tokens ReadMinus(bool whitespaceSeen) {
            if (_lexicalState == LexicalState.EXPR_FNAME || _lexicalState == LexicalState.EXPR_DOT) {
                _lexicalState = LexicalState.EXPR_ARG;
                return Read('@') ? Tokens.Uminus : (Tokens)'-';
            }

            int c = Peek();
            if (c == '=') {
                Skip(c);
                SetAsciiStringToken(Symbols.Minus);
                _lexicalState = LexicalState.EXPR_BEG;
                return Tokens.Assignment;
            }

            if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID ||
                (IS_ARG() && whitespaceSeen && !IsWhiteSpace(c))) {

                if (IS_ARG()) {
                    ReportWarning(Errors.AmbiguousFirstArgument);
                }

                _lexicalState = LexicalState.EXPR_BEG;
                return IsDecimalDigit(c) ? Tokens.UminusNum : Tokens.Uminus;
            }

            _lexicalState = LexicalState.EXPR_BEG;
            return (Tokens)'-';
        }
Example #23
0
        // Operators: :: : 
        // Literals: :... (symbol start)
        private Tokens ReadColon(bool whitespaceSeen) {
            int c = Peek();
            if (c == ':') {
                Skip(c);
                if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID ||
                    _lexicalState == LexicalState.EXPR_CLASS || (IS_ARG() && whitespaceSeen)) {
                    
                    _lexicalState = LexicalState.EXPR_BEG;
                    return Tokens.LeadingDoubleColon;
                }

                _lexicalState = LexicalState.EXPR_DOT;
                return Tokens.SeparatingDoubleColon;
            }

            if (_lexicalState == LexicalState.EXPR_END || _lexicalState == LexicalState.EXPR_ENDARG || IsWhiteSpace(c)) {
                _lexicalState = LexicalState.EXPR_BEG;
                return (Tokens)':';
            }

            switch (c) {
                case '\'':
                    Skip(c);
                    _currentString = new StringContentTokenizer(StringType.Symbol, '\'');
                    break;

                case '"':
                    Skip(c);
                    _currentString = new StringContentTokenizer(StringType.Symbol | StringType.ExpandsEmbedded, '"');
                    break;

                default:
                    Debug.Assert(_currentString == null);
                    break;
            }

            _lexicalState = LexicalState.EXPR_FNAME;
            _tokenValue.SetStringTokenizer(_currentString);
            return Tokens.SymbolBegin;
        }
Example #24
0
        // Quotation start: 
        //   %[QqWwxrs]?[^:alpha-numeric:]
        private Tokens TokenizeQuotationStart() {
            StringType type;
            Tokens token;
            int terminator;

            // c is the character following %
            // note that it could be eoln in which case it needs to be normalized:
            int c = ReadNormalizeEndOfLine();
            switch (c) {
                case 'Q':
                    type = StringType.ExpandsEmbedded;
                    token = Tokens.StringBegin;
                    terminator = ReadNormalizeEndOfLine();
                    break;

                case 'q':
                    type = StringType.Default;
                    token = Tokens.StringBegin;
                    terminator = ReadNormalizeEndOfLine();
                    break;

                case 'W':
                    type = StringType.Words | StringType.ExpandsEmbedded;
                    token = Tokens.WordsBegin;
                    // if the terminator is a whitespace the end will never be matched and syntax error will be reported
                    terminator = ReadNormalizeEndOfLine();
                    break;

                case 'w':
                    type = StringType.Words;
                    token = Tokens.VerbatimWordsBegin;
                    // if the terminator is a whitespace the end will never be matched and syntax error will be reported
                    terminator = ReadNormalizeEndOfLine();
                    break;

                case 'x':
                    type = StringType.ExpandsEmbedded;
                    token = Tokens.ShellStringBegin;
                    terminator = ReadNormalizeEndOfLine();
                    break;

                case 'r':
                    type = StringType.RegularExpression | StringType.ExpandsEmbedded;
                    token = Tokens.RegexpBegin;
                    terminator = ReadNormalizeEndOfLine();
                    break;

                case 's':
                    type = StringType.Symbol;
                    token = Tokens.SymbolBegin;
                    terminator = ReadNormalizeEndOfLine();
                    _lexicalState = LexicalState.EXPR_FNAME;
                    break;

                default:
                    type = StringType.ExpandsEmbedded;
                    token = Tokens.StringBegin;
                    terminator = c;
                    break;
            }

            int parenthesis = terminator;
            switch (terminator) {
                case -1:
                    _unterminatedToken = true;
                    MarkSingleLineTokenEnd();
                    ReportError(Errors.UnterminatedQuotedString);
                    return Tokens.EndOfFile;

                case '(': terminator = ')'; break;
                case '{': terminator = '}'; break;
                case '[': terminator = ']'; break;
                case '<': terminator = '>'; break;

                default:
                    if (IsLetterOrDigit(terminator)) {
                        Back(terminator);
                        MarkSingleLineTokenEnd();
                        ReportError(Errors.UnknownQuotedStringType);
                        return (Tokens)'%';
                    }

                    parenthesis = 0;
                    break;
            }

            bool isMultiline = terminator == '\n';

            if ((type & StringType.Words) != 0) {
                isMultiline |= SkipWhitespace();
            }

            if (isMultiline) {
                MarkMultiLineTokenEnd();
            } else {
                MarkSingleLineTokenEnd();
            }
            
            _currentString = new StringContentTokenizer(type, (char)terminator, (char)parenthesis);
            _tokenValue.SetStringTokenizer(_currentString);
            return token;
        }
Example #25
0
        // Assignments: **= *= 
        // Operators: ** * splat
        private Tokens ReadStar(bool whitespaceSeen) {
            Tokens result;

            int c = Peek();
            if (c == '*') {
                Skip(c);
                if (Read('=')) {
                    SetAsciiStringToken(Symbols.Power);
                    _lexicalState = LexicalState.EXPR_BEG;
                    
                    return Tokens.Assignment;
                }

                result = Tokens.Pow;
            } else if (c == '=') {
                Skip(c);

                SetAsciiStringToken(Symbols.Multiply);
                _lexicalState = LexicalState.EXPR_BEG;
                return Tokens.Assignment;
            } else if (IS_ARG() && whitespaceSeen && !IsWhiteSpace(c)) {
                ReportWarning(Errors.StarInterpretedAsSplatArgument);
                result = Tokens.Star;
            } else if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID) {
                result = Tokens.Star;
            } else {
                result = (Tokens)'*';
            }

            switch (_lexicalState) {
                case LexicalState.EXPR_FNAME:
                case LexicalState.EXPR_DOT:
                    _lexicalState = LexicalState.EXPR_ARG;
                    break;

                default:
                    _lexicalState = LexicalState.EXPR_BEG;
                    break;
            }

            return result;
        }
Example #26
0
        public Tokens GetNextToken() {
            if (_input == null) {
                throw new InvalidOperationException("Uninitialized");
            }

            if (_currentString != null) {
                // TODO:
                RefillBuffer();

                Tokens token = _currentString.Tokenize(this);
                if (token == Tokens.StringEnd || token == Tokens.RegexpEnd) {
                    _currentString = null;
                    _lexicalState = LexicalState.EXPR_END;
                }
                _tokenSpan = new SourceSpan(_currentTokenStart, _currentTokenEnd);
                DumpToken(token);
                return token;
            }

            bool whitespaceSeen = false;
            bool cmdState = _commaStart;
            _commaStart = false;

            while (true) {
                // TODO:
                RefillBuffer();

                Tokens token = Tokenize(whitespaceSeen, cmdState);
            
                _tokenSpan = new SourceSpan(_currentTokenStart, _currentTokenEnd);
                DumpToken(token);
                
                // ignored tokens:
                switch (token) {
                    case Tokens.MultiLineComment:
                    case Tokens.SingleLineComment:
                        break;

                    case Tokens.Whitespace:
                        whitespaceSeen = true;
                        break;

                    case Tokens.EndOfLine: // not considered whitespace
                        break;

                    case Tokens.EndOfFile:
                        _eofReached = true;
                        return token;

                    default:
                        return token;
                }

                if (_verbatim) {
                    return token;
                }
            }
        }
Example #27
0
        // Operators: ! != !~
        private Tokens ReadBang() {
            _lexicalState = LexicalState.EXPR_BEG;
            
            int c = Peek();
            if (c == '=') {
                Skip(c);
                return Tokens.Neq;
            } else if (c == '~') {
                Skip(c);
                return Tokens.Nmatch;
            }

            return (Tokens)'!';
        }
Example #28
0
        private Tokens GetEndOfLineToken() {
            if (_lexicalState == LexicalState.EXPR_BEG ||
                _lexicalState == LexicalState.EXPR_FNAME ||
                _lexicalState == LexicalState.EXPR_DOT ||
                _lexicalState == LexicalState.EXPR_CLASS) {

                return Tokens.EndOfLine;
            }

            _commaStart = true;
            _lexicalState = LexicalState.EXPR_BEG;
            return (Tokens)'\n';
        }
Example #29
0
        // String: <<HEREDOC_LABEL
        // Assignment: <<=
        // Operators: << <= <=> <
        private Tokens TokenizeLessThan(bool whitespaceSeen) {
            int c = Read();

            if (c == '<' &&
                _lexicalState != LexicalState.EXPR_END &&
                _lexicalState != LexicalState.EXPR_DOT &&
                _lexicalState != LexicalState.EXPR_ENDARG &&
                _lexicalState != LexicalState.EXPR_CLASS && 
                (!IS_ARG() || whitespaceSeen)) {

                Tokens token = TokenizeHeredocLabel();
                if (token != Tokens.None) {
                    return token;
                }
            }

            switch (_lexicalState) {
                case LexicalState.EXPR_FNAME:
                case LexicalState.EXPR_DOT:
                    _lexicalState = LexicalState.EXPR_ARG;
                    break;

                default:
                    _lexicalState = LexicalState.EXPR_BEG; 
                    break;
            }

            if (c == '=') {
                if (Read('>')) {
                    MarkSingleLineTokenEnd();
                    return Tokens.Cmp;
                }
                MarkSingleLineTokenEnd();
                return Tokens.Leq;
            }

            if (c == '<') {
                if (Read('=')) {
                    SetAsciiStringToken(Symbols.LeftShift);
                    _lexicalState = LexicalState.EXPR_BEG;
                    MarkSingleLineTokenEnd();
                    return Tokens.Assignment;
                }
                MarkSingleLineTokenEnd();
                return Tokens.Lshft;
            }

            Back(c);
            MarkSingleLineTokenEnd();
            return (Tokens)'<';
        }
Example #30
0
 /// <summary>
 /// Tries to parse an entity from the specified lexical machine state.
 /// In case of success returns true and advances parsing position.
 /// </summary>
 public override bool Parse(LexicalState state)
 {
     return ParseExcept(state, m_main, m_exception);
 }
Example #31
0
 public AssertTokenizer /*!*/ State(LexicalState expected)
 {
     _tests.Assert(Tokenizer.LexicalState == expected);
     return(this);
 }