private Tokens Tokenize(bool whitespaceSeen, bool cmdState) { MarkTokenStart(); int c = Read(); switch (c) { case '\0': // null terminates the input // if tokenizer is asked for the next token it returns EOF again: Back('\0'); MarkSingleLineTokenEnd(); return Tokens.EndOfFile; case -1: // end of stream MarkSingleLineTokenEnd(); return Tokens.EndOfFile; // whitespace case ' ': case '\t': case '\f': return MarkSingleLineTokenEnd(ReadNonEolnWhiteSpace()); case '\n': return MarkMultiLineTokenEnd(GetEndOfLineToken()); case '\r': if (Read('\n')) { return MarkMultiLineTokenEnd(GetEndOfLineToken()); } else { return MarkSingleLineTokenEnd(ReadNonEolnWhiteSpace()); } case '\\': return TokenizeBackslash(); case '#': return MarkSingleLineTokenEnd(ReadSingleLineComment()); case '*': return MarkSingleLineTokenEnd(ReadStar(whitespaceSeen)); case '!': return MarkSingleLineTokenEnd(ReadBang()); case '=': if (ReadMultiLineComment()) { MarkMultiLineTokenEnd(); return Tokens.MultiLineComment; } return MarkSingleLineTokenEnd(ReadEquals()); case '<': return TokenizeLessThan(whitespaceSeen); case '>': return MarkSingleLineTokenEnd(ReadGreaterThan()); case '"': return MarkSingleLineTokenEnd(ReadDoubleQuote()); case '\'': return MarkSingleLineTokenEnd(ReadSingleQuote()); case '`': return MarkSingleLineTokenEnd(ReadBacktick(cmdState)); case '?': return TokenizeQuestionmark(); case '&': return MarkSingleLineTokenEnd(ReadAmpersand(whitespaceSeen)); case '|': return MarkSingleLineTokenEnd(ReadPipe()); case '+': return MarkSingleLineTokenEnd(ReadPlus(whitespaceSeen)); case '-': return MarkSingleLineTokenEnd(ReadMinus(whitespaceSeen)); case '.': return MarkSingleLineTokenEnd(ReadDot()); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return MarkSingleLineTokenEnd(ReadUnsignedNumber(c)); case ':': return MarkSingleLineTokenEnd(ReadColon(whitespaceSeen)); case '/': return MarkSingleLineTokenEnd(ReadSlash(whitespaceSeen)); case '^': return MarkSingleLineTokenEnd(ReadCaret()); case ';': _commaStart = true; _lexicalState = LexicalState.EXPR_BEG; MarkSingleLineTokenEnd(); return (Tokens)';'; case ',': _lexicalState = LexicalState.EXPR_BEG; MarkSingleLineTokenEnd(); return (Tokens)','; case '~': return MarkSingleLineTokenEnd(ReadTilde()); case '(': _commaStart = true; return MarkSingleLineTokenEnd(ReadLeftParenthesis(whitespaceSeen)); case '[': return MarkSingleLineTokenEnd(ReadLeftBracket(whitespaceSeen)); case '{': return MarkSingleLineTokenEnd(ReadLeftBrace()); case ')': case ']': case '}': COND_LEXPOP(); CMDARG_LEXPOP(); _lexicalState = LexicalState.EXPR_END; MarkSingleLineTokenEnd(); return (Tokens)c; case '%': return TokenizePercent(whitespaceSeen); case '$': return MarkSingleLineTokenEnd(ReadGlobalVariable()); case '@': return MarkSingleLineTokenEnd(ReadInstanceOrClassVariable()); case '_': if (was_bol() && LineContentEquals("__END__", false)) { // if tokenizer is asked for the next token it returns EOF again: Back('_'); MarkSingleLineTokenEnd(); _dataOffset = _currentLineIndex + _lineLength; return Tokens.EndOfFile; } return MarkSingleLineTokenEnd(ReadIdentifier(c, cmdState)); default: if (!IsIdentifierInitial(c, _multiByteIdentifier)) { // UTF-8 BOM detection: if (_compatibility == RubyCompatibility.Ruby18 && _currentLineIndex == 0 && _bufferPos == 1 && (c == 0xEF && Peek() == 0xBB && Peek(1) == 0xBF)) { ReportError(Errors.InvalidUseOfByteOrderMark); // skip BOM and continue parsing as if it was whitespace: Read(); Read(); MarkSingleLineTokenEnd(); return Tokens.Whitespace; } else { ReportError(Errors.InvalidCharacterInExpression, (char)c); MarkSingleLineTokenEnd(); return Tokens.InvalidCharacter; } } return MarkSingleLineTokenEnd(ReadIdentifier(c, cmdState)); } }
// Identifiers: // [:alpha:_][:identifier:]+ // Method names: // [:alpha:_][:identifier:]+[?][^=] // [:alpha:_][:identifier:]+[!][^=] // [:alpha:_][:identifier:]+[=][^=~>] // [:alpha:_][:identifier:]+[=] immediately followed by => // Keywords private Tokens ReadIdentifier(int firstCharacter, bool cmdState) { // the first character already read: int start = _bufferPos - 1; SkipVariableName(); // reads token suffix (!, ?, =) and returns the the token kind based upon the suffix: Tokens result = ReadIdentifierSuffix(firstCharacter); // TODO: possible optimization: ~15% are keywords, ~15% are existing local variables -> we can save allocations string identifier = new String(_lineBuffer, start, _bufferPos - start); if (_lexicalState != LexicalState.EXPR_DOT) { if (_lexicalState == LexicalState.EXPR_FNAME) { SetStringToken(identifier); } Tokens keyword = StringToKeyword(identifier); if (keyword != Tokens.None) { return keyword; } } if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID || _lexicalState == LexicalState.EXPR_DOT || _lexicalState == LexicalState.EXPR_ARG || _lexicalState == LexicalState.EXPR_CMDARG) { if (_localVariableResolver.IsLocalVariable(identifier)) { _lexicalState = LexicalState.EXPR_END; } else if (cmdState) { _lexicalState = LexicalState.EXPR_CMDARG; } else { _lexicalState = LexicalState.EXPR_ARG; } } else { _lexicalState = LexicalState.EXPR_END; } SetStringToken(identifier); return result; }
internal void SetState(LexicalState state) { _lexicalState = state; }
// INTEGER: // [1-9]([0-9_]*[1-9])? // 0([0-7_]*[0-7])? // 0[xX][0-9a-fA-F]([0-9a-fA-F_]*[0-9a-fA-F])? // 0[dD][0-9]([0-9_]*[0-9])? // 0[bB][01]([01_]*[01])? // 0[oO][0-7]([0-7_]*[0-7])? // // FLOAT: // (0|[1-9]([0-9_]*[0-9])?)[.][0-9_]*[0-9]([eE][+-]?[0-9]([0-9_]*[0-9])?) // // Takes the first decimal digit of the number. // private Tokens ReadUnsignedNumber(int c) { _lexicalState = LexicalState.EXPR_END; if (c == '0') { switch (Peek()) { case 'x': case 'X': Skip(); return ReadInteger(16, NumericCharKind.None); case 'b': case 'B': Skip(); return ReadInteger(2, NumericCharKind.None); case 'o': case 'O': Skip(); return ReadInteger(8, NumericCharKind.None); case 'd': case 'D': Skip(); return ReadInteger(10, NumericCharKind.None); case 'e': case 'E': { // 0e[+-]... int sign; int start = _bufferPos - 1; if (TryReadExponentSign(1, out sign)) { return ReadDoubleExponent(start, sign); } _tokenValue.SetInteger(0); return Tokens.Integer; } case '.': // 0. if (IsDecimalDigit(Peek(1))) { Skip('.'); return ReadDouble(_bufferPos - 2); } _tokenValue.SetInteger(0); return Tokens.Integer; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '_': // the previous character is '0' digit: return ReadInteger(8, NumericCharKind.Digit); case '8': case '9': ReportError(Errors.IllegalOctalDigit); // treat the number as decimal return ReadInteger(10, NumericCharKind.Digit); default: _tokenValue.SetInteger(0); return Tokens.Integer; } } return ReadDecimalNumber(c); }
// Operators: & && // Assignments: &= private Tokens ReadAmpersand(bool whitespaceSeen) { int c = Peek(); if (c == '&') { Skip(c); _lexicalState = LexicalState.EXPR_BEG; if (Read('=')) { SetAsciiStringToken(Symbols.And); return Tokens.Assignment; } return Tokens.LogicalAnd; } if (c == '=') { Skip(c); _lexicalState = LexicalState.EXPR_BEG; SetAsciiStringToken(Symbols.BitwiseAnd); return Tokens.Assignment; } Tokens result; if (IS_ARG() && whitespaceSeen && !IsWhiteSpace(c)) { // we are in command argument and there is a whitespace between ampersand: "foo &bar" ReportWarning(Errors.AmpersandInterpretedAsProcArgument); result = Tokens.Ampersand; } else if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID) { result = Tokens.Ampersand; } else { result = (Tokens)'&'; } switch (_lexicalState) { case LexicalState.EXPR_FNAME: case LexicalState.EXPR_DOT: _lexicalState = LexicalState.EXPR_ARG; break; default: _lexicalState = LexicalState.EXPR_BEG; break; } return result; }
// Operators: . .. ... // Errors: .[:digit:] private Tokens ReadDot() { _lexicalState = LexicalState.EXPR_BEG; int c = Peek(); if (c == '.') { Skip(c); return Read('.') ? Tokens.Dot3 : Tokens.Dot2; } if (IsDecimalDigit(c)) { ReportError(Errors.NoFloatingLiteral); } _lexicalState = LexicalState.EXPR_DOT; return (Tokens)'.'; }
// Brackets: { private Tokens ReadLeftBrace() { Tokens result; if (IS_ARG() || _lexicalState == LexicalState.EXPR_END) { result = (Tokens)'{'; // block (primary) } else if (_lexicalState == LexicalState.EXPR_ENDARG) { result = Tokens.LbraceArg; // block (expr) } else { result = Tokens.Lbrace; // hash } COND_PUSH(0); CMDARG_PUSH(0); _lexicalState = LexicalState.EXPR_BEG; return result; }
// String: `... // Operator: ` private Tokens ReadBacktick(bool cmdState) { if (_lexicalState == LexicalState.EXPR_FNAME) { _lexicalState = LexicalState.EXPR_END; return (Tokens)'`'; } if (_lexicalState == LexicalState.EXPR_DOT) { _lexicalState = (cmdState) ? LexicalState.EXPR_CMDARG : LexicalState.EXPR_ARG; return (Tokens)'`'; } _currentString = new StringContentTokenizer(StringType.ExpandsEmbedded, '`'); _tokenValue.SetStringTokenizer(_currentString); return Tokens.ShellStringBegin; }
// Global variables: // $[_~*$?!@/\;,.=:<>"] // $-[:identifier:] // $[:identifier:] // Match references: // $[&`'+] // $[1-9][0-9]+ // Dollar: // $ private Tokens ReadGlobalVariable() { _lexicalState = LexicalState.EXPR_END; // start right after $, the resulting symbol doesn't contain $ int start = _bufferPos; int c = Read(); switch (c) { case '_': if (IsIdentifier(Peek())) { SkipVariableName(); SetStringToken(start, _bufferPos - start); return Tokens.GlobalVariable; } return GlobalVariableToken(Symbols.LastInputLine); // exceptions: case '!': return GlobalVariableToken(Symbols.CurrentException); case '@': return GlobalVariableToken(Symbols.CurrentExceptionBacktrace); // options: case '-': if (IsIdentifier(Peek())) { Read(); SetStringToken(start, 2); } else { SetAsciiStringToken("-"); } return Tokens.GlobalVariable; // others: case ',': return GlobalVariableToken(Symbols.ItemSeparator); case ';': return GlobalVariableToken(Symbols.StringSeparator); case '/': return GlobalVariableToken(Symbols.InputSeparator); case '\\': return GlobalVariableToken(Symbols.OutputSeparator); case '*': return GlobalVariableToken(Symbols.CommandLineArguments); case '$': return GlobalVariableToken(Symbols.CurrentProcessId); case '?': return GlobalVariableToken(Symbols.ChildProcessExitStatus); case '=': return GlobalVariableToken(Symbols.IgnoreCaseComparator); case ':': return GlobalVariableToken(Symbols.LoadPath); case '"': return GlobalVariableToken(Symbols.LoadedFiles); case '<': return GlobalVariableToken(Symbols.InputContent); case '>': return GlobalVariableToken(Symbols.OutputStream); case '.': return GlobalVariableToken(Symbols.LastInputLineNumber); // regex: case '~': return GlobalVariableToken(Symbols.MatchData); case '&': _tokenValue.SetInteger(RegexMatchReference.EntireMatch); return Tokens.MatchReference; case '`': _tokenValue.SetInteger(RegexMatchReference.MatchPrefix); return Tokens.MatchReference; case '\'': _tokenValue.SetInteger(RegexMatchReference.MatchSuffix); return Tokens.MatchReference; case '+': _tokenValue.SetInteger(RegexMatchReference.MatchLastGroup); return Tokens.MatchReference; case '0': if (IsIdentifier(Peek())) { // $0[A-Za-z0-9_] are invalid: SkipVariableName(); ReportError(Errors.InvalidGlobalVariableName, new String(_lineBuffer, start - 1, _bufferPos - start)); SetAsciiStringToken(Symbols.ErrorVariable); return Tokens.GlobalVariable; } return GlobalVariableToken(Symbols.CommandLineProgramPath); default: if (IsDecimalDigit(c)) { return ReadMatchGroupReferenceVariable(c); } if (IsIdentifier(c)) { SkipVariableName(); SetStringToken(start, _bufferPos - start); return Tokens.GlobalVariable; } Back(c); return (Tokens)'$'; } }
// Assignments: %= // Operators: % // Literals: %{... (quotation start) private Tokens TokenizePercent(bool whitespaceSeen) { if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID) { return TokenizeQuotationStart(); } int c = Peek(); if (c == '=') { Skip(c); SetAsciiStringToken(Symbols.Mod); _lexicalState = LexicalState.EXPR_BEG; MarkSingleLineTokenEnd(); return Tokens.Assignment; } if (IS_ARG() && whitespaceSeen && !IsWhiteSpace(c)) { return TokenizeQuotationStart(); } switch (_lexicalState) { case LexicalState.EXPR_FNAME: case LexicalState.EXPR_DOT: _lexicalState = LexicalState.EXPR_ARG; break; default: _lexicalState = LexicalState.EXPR_BEG; break; } MarkSingleLineTokenEnd(); return (Tokens)'%'; }
// Instance variables: // @[:alpha:_][:identifier:]* // Class variables: // @@[:alpha:_][:identifier:]* // At: // @ private Tokens ReadInstanceOrClassVariable() { Tokens result; // start right before @/@@, the resulting symbol starts with @/@@ int start = _bufferPos - 1; int c = Peek(0); if (c == '@') { c = Peek(1); result = Tokens.ClassVariable; } else { result = Tokens.InstanceVariable; } // c follows @ or @@ if (IsDecimalDigit(c)) { ReportError(result == Tokens.InstanceVariable ? Errors.InvalidInstanceVariableName : Errors.InvalidClassVariableName, (char)c); } else if (IsIdentifierInitial(c)) { if (result == Tokens.ClassVariable) { Skip('@'); } Skip(c); SkipVariableName(); SetStringToken(start, _bufferPos - start); _lexicalState = LexicalState.EXPR_END; return result; } return (Tokens)'@'; }
// Brackets: ( private Tokens ReadLeftParenthesis(bool whitespaceSeen) { Tokens result = (Tokens)'('; if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID) { result = Tokens.LeftParen; } else if (whitespaceSeen) { if (_lexicalState == LexicalState.EXPR_CMDARG) { result = Tokens.LparenArg; } else if (_lexicalState == LexicalState.EXPR_ARG) { ReportWarning(Errors.WhitespaceBeforeArgumentParentheses); } } COND_PUSH(0); CMDARG_PUSH(0); _lexicalState = LexicalState.EXPR_BEG; return result; }
// Assignment: = // Operators: == === =~ => private Tokens ReadEquals() { switch (_lexicalState) { case LexicalState.EXPR_FNAME: case LexicalState.EXPR_DOT: _lexicalState = LexicalState.EXPR_ARG; break; default: _lexicalState = LexicalState.EXPR_BEG; break; } switch (Peek()) { case '=': Skip('='); return Read('=') ? Tokens.Eqq : Tokens.Eq; case '~': Skip('~'); return Tokens.Match; case '>': Skip('>'); return Tokens.Assoc; default: return (Tokens)'='; } }
private Tokens ReturnDoKeyword() { LexicalState oldState = _lexicalState; _lexicalState = LexicalState.EXPR_BEG; // if last conditional opening is a parenthesis: if (COND_P()) { return Tokens.LoopDo; } if (CMDARG_P() && oldState != LexicalState.EXPR_CMDARG) { return Tokens.BlockDo; } if (oldState == LexicalState.EXPR_ENDARG) { return Tokens.BlockDo; } return Tokens.Do; }
// Brackets: [ // Operators: [] []= private Tokens ReadLeftBracket(bool whitespaceSeen) { if (_lexicalState == LexicalState.EXPR_FNAME || _lexicalState == LexicalState.EXPR_DOT) { _lexicalState = LexicalState.EXPR_ARG; return Read(']') ? (Read('=') ? Tokens.Aset : Tokens.Aref) : (Tokens)'['; } Tokens result; if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID) { result = Tokens.Lbrack; } else if (IS_ARG() && whitespaceSeen) { result = Tokens.Lbrack; } else { result = (Tokens)'['; } _lexicalState = LexicalState.EXPR_BEG; COND_PUSH(0); CMDARG_PUSH(0); return result; }
// Assignment: >>= // Operators: > >= >> private Tokens ReadGreaterThan() { switch (_lexicalState) { case LexicalState.EXPR_FNAME: case LexicalState.EXPR_DOT: _lexicalState = LexicalState.EXPR_ARG; break; default: _lexicalState = LexicalState.EXPR_BEG; break; } int c = Peek(); if (c == '=') { Skip(c); return Tokens.Geq; } if (c == '>') { Skip(c); if (Read('=')) { SetAsciiStringToken(Symbols.RightShift); _lexicalState = LexicalState.EXPR_BEG; return Tokens.Assignment; } return Tokens.Rshft; } return (Tokens)'>'; }
// Operators: ~ ~@ private Tokens ReadTilde() { if (_lexicalState == LexicalState.EXPR_FNAME || _lexicalState == LexicalState.EXPR_DOT) { // ~@ Read('@'); } switch (_lexicalState) { case LexicalState.EXPR_FNAME: case LexicalState.EXPR_DOT: _lexicalState = LexicalState.EXPR_ARG; break; default: _lexicalState = LexicalState.EXPR_BEG; break; } return (Tokens)'~'; }
// Operators: ? (conditional) // Literals: ?[:char:] ?{escape} // Errors: ?[:EOF:] private Tokens TokenizeQuestionmark() { if (_lexicalState == LexicalState.EXPR_END || _lexicalState == LexicalState.EXPR_ENDARG) { _lexicalState = LexicalState.EXPR_BEG; MarkSingleLineTokenEnd(); return (Tokens)'?'; } // ?[:EOF:] int c = Peek(); if (c == -1) { _unterminatedToken = true; MarkSingleLineTokenEnd(); ReportError(Errors.IncompleteCharacter); return Tokens.EndOfFile; } // TODO: ?x, ?\u1234, ?\u{123456} -> string in 1.9 // ?[:whitespace:] if (IsWhiteSpace(c)) { if (!IS_ARG()) { int c2 = 0; switch (c) { case ' ': c2 = 's'; break; case '\n': c2 = 'n'; break; case '\t': c2 = 't'; break; case '\v': c2 = 'v'; break; case '\r': c2 = (Peek(1) == '\n') ? 'n' : 'r'; break; case '\f': c2 = 'f'; break; } if (c2 != 0) { ReportWarning(Errors.InvalidCharacterSyntax, (char)c2); } } _lexicalState = LexicalState.EXPR_BEG; MarkSingleLineTokenEnd(); return (Tokens)'?'; } // ?{identifier} if ((IsLetterOrDigit(c) || c == '_') && IsIdentifier(Peek(1))) { _lexicalState = LexicalState.EXPR_BEG; MarkSingleLineTokenEnd(); return (Tokens)'?'; } Skip(c); // ?\{escape} if (c == '\\') { // TODO: ?\xx, ?\u1234, ?\u{123456} -> string in 1.9 c = ReadEscape(); // \M-{eoln} eats the eoln: MarkMultiLineTokenEnd(); } else { MarkSingleLineTokenEnd(); } // TODO: ?x -> string in 1.9 c &= 0xff; _lexicalState = LexicalState.EXPR_END; _tokenValue.SetInteger(c); return Tokens.Integer; }
// Assignments: ^= // Operators: ^ private Tokens ReadCaret() { if (Read('=')) { SetAsciiStringToken(Symbols.Xor); _lexicalState = LexicalState.EXPR_BEG; return Tokens.Assignment; } switch (_lexicalState) { case LexicalState.EXPR_FNAME: case LexicalState.EXPR_DOT: _lexicalState = LexicalState.EXPR_ARG; break; default: _lexicalState = LexicalState.EXPR_BEG; break; } return (Tokens)'^'; }
// Operators: | || // Assignments: |= ||= private Tokens ReadPipe() { int c = Peek(); if (c == '|') { Skip(c); _lexicalState = LexicalState.EXPR_BEG; if (Read('=')) { SetAsciiStringToken(Symbols.Or); _lexicalState = LexicalState.EXPR_BEG; return Tokens.Assignment; } return Tokens.LogicalOr; } if (c == '=') { Skip(c); SetAsciiStringToken(Symbols.BitwiseOr); _lexicalState = LexicalState.EXPR_BEG; return Tokens.Assignment; } if (_lexicalState == LexicalState.EXPR_FNAME || _lexicalState == LexicalState.EXPR_DOT) { _lexicalState = LexicalState.EXPR_ARG; } else { _lexicalState = LexicalState.EXPR_BEG; } return (Tokens)'|'; }
// Operators: / // Assignments: /= // Literals: /... (regex start) private Tokens ReadSlash(bool whitespaceSeen) { if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID) { _currentString = new StringContentTokenizer(StringType.RegularExpression | StringType.ExpandsEmbedded, '/'); _tokenValue.SetStringTokenizer(_currentString); return Tokens.RegexpBegin; } int c = Peek(); if (c == '=') { Skip(c); SetAsciiStringToken(Symbols.Divide); _lexicalState = LexicalState.EXPR_BEG; return Tokens.Assignment; } if (IS_ARG() && whitespaceSeen) { if (!IsWhiteSpace(c)) { ReportWarning(Errors.AmbiguousFirstArgument); _currentString = new StringContentTokenizer(StringType.RegularExpression | StringType.ExpandsEmbedded, '/'); _tokenValue.SetStringTokenizer(_currentString); return Tokens.RegexpBegin; } } switch (_lexicalState) { case LexicalState.EXPR_FNAME: case LexicalState.EXPR_DOT: _lexicalState = LexicalState.EXPR_ARG; break; default: _lexicalState = LexicalState.EXPR_BEG; break; } return (Tokens)'/'; }
// Operators: - -@ // Assignments: -= // Literals: -... (negative number sign) private Tokens ReadMinus(bool whitespaceSeen) { if (_lexicalState == LexicalState.EXPR_FNAME || _lexicalState == LexicalState.EXPR_DOT) { _lexicalState = LexicalState.EXPR_ARG; return Read('@') ? Tokens.Uminus : (Tokens)'-'; } int c = Peek(); if (c == '=') { Skip(c); SetAsciiStringToken(Symbols.Minus); _lexicalState = LexicalState.EXPR_BEG; return Tokens.Assignment; } if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID || (IS_ARG() && whitespaceSeen && !IsWhiteSpace(c))) { if (IS_ARG()) { ReportWarning(Errors.AmbiguousFirstArgument); } _lexicalState = LexicalState.EXPR_BEG; return IsDecimalDigit(c) ? Tokens.UminusNum : Tokens.Uminus; } _lexicalState = LexicalState.EXPR_BEG; return (Tokens)'-'; }
// Operators: :: : // Literals: :... (symbol start) private Tokens ReadColon(bool whitespaceSeen) { int c = Peek(); if (c == ':') { Skip(c); if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID || _lexicalState == LexicalState.EXPR_CLASS || (IS_ARG() && whitespaceSeen)) { _lexicalState = LexicalState.EXPR_BEG; return Tokens.LeadingDoubleColon; } _lexicalState = LexicalState.EXPR_DOT; return Tokens.SeparatingDoubleColon; } if (_lexicalState == LexicalState.EXPR_END || _lexicalState == LexicalState.EXPR_ENDARG || IsWhiteSpace(c)) { _lexicalState = LexicalState.EXPR_BEG; return (Tokens)':'; } switch (c) { case '\'': Skip(c); _currentString = new StringContentTokenizer(StringType.Symbol, '\''); break; case '"': Skip(c); _currentString = new StringContentTokenizer(StringType.Symbol | StringType.ExpandsEmbedded, '"'); break; default: Debug.Assert(_currentString == null); break; } _lexicalState = LexicalState.EXPR_FNAME; _tokenValue.SetStringTokenizer(_currentString); return Tokens.SymbolBegin; }
// Quotation start: // %[QqWwxrs]?[^:alpha-numeric:] private Tokens TokenizeQuotationStart() { StringType type; Tokens token; int terminator; // c is the character following % // note that it could be eoln in which case it needs to be normalized: int c = ReadNormalizeEndOfLine(); switch (c) { case 'Q': type = StringType.ExpandsEmbedded; token = Tokens.StringBegin; terminator = ReadNormalizeEndOfLine(); break; case 'q': type = StringType.Default; token = Tokens.StringBegin; terminator = ReadNormalizeEndOfLine(); break; case 'W': type = StringType.Words | StringType.ExpandsEmbedded; token = Tokens.WordsBegin; // if the terminator is a whitespace the end will never be matched and syntax error will be reported terminator = ReadNormalizeEndOfLine(); break; case 'w': type = StringType.Words; token = Tokens.VerbatimWordsBegin; // if the terminator is a whitespace the end will never be matched and syntax error will be reported terminator = ReadNormalizeEndOfLine(); break; case 'x': type = StringType.ExpandsEmbedded; token = Tokens.ShellStringBegin; terminator = ReadNormalizeEndOfLine(); break; case 'r': type = StringType.RegularExpression | StringType.ExpandsEmbedded; token = Tokens.RegexpBegin; terminator = ReadNormalizeEndOfLine(); break; case 's': type = StringType.Symbol; token = Tokens.SymbolBegin; terminator = ReadNormalizeEndOfLine(); _lexicalState = LexicalState.EXPR_FNAME; break; default: type = StringType.ExpandsEmbedded; token = Tokens.StringBegin; terminator = c; break; } int parenthesis = terminator; switch (terminator) { case -1: _unterminatedToken = true; MarkSingleLineTokenEnd(); ReportError(Errors.UnterminatedQuotedString); return Tokens.EndOfFile; case '(': terminator = ')'; break; case '{': terminator = '}'; break; case '[': terminator = ']'; break; case '<': terminator = '>'; break; default: if (IsLetterOrDigit(terminator)) { Back(terminator); MarkSingleLineTokenEnd(); ReportError(Errors.UnknownQuotedStringType); return (Tokens)'%'; } parenthesis = 0; break; } bool isMultiline = terminator == '\n'; if ((type & StringType.Words) != 0) { isMultiline |= SkipWhitespace(); } if (isMultiline) { MarkMultiLineTokenEnd(); } else { MarkSingleLineTokenEnd(); } _currentString = new StringContentTokenizer(type, (char)terminator, (char)parenthesis); _tokenValue.SetStringTokenizer(_currentString); return token; }
// Assignments: **= *= // Operators: ** * splat private Tokens ReadStar(bool whitespaceSeen) { Tokens result; int c = Peek(); if (c == '*') { Skip(c); if (Read('=')) { SetAsciiStringToken(Symbols.Power); _lexicalState = LexicalState.EXPR_BEG; return Tokens.Assignment; } result = Tokens.Pow; } else if (c == '=') { Skip(c); SetAsciiStringToken(Symbols.Multiply); _lexicalState = LexicalState.EXPR_BEG; return Tokens.Assignment; } else if (IS_ARG() && whitespaceSeen && !IsWhiteSpace(c)) { ReportWarning(Errors.StarInterpretedAsSplatArgument); result = Tokens.Star; } else if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID) { result = Tokens.Star; } else { result = (Tokens)'*'; } switch (_lexicalState) { case LexicalState.EXPR_FNAME: case LexicalState.EXPR_DOT: _lexicalState = LexicalState.EXPR_ARG; break; default: _lexicalState = LexicalState.EXPR_BEG; break; } return result; }
public Tokens GetNextToken() { if (_input == null) { throw new InvalidOperationException("Uninitialized"); } if (_currentString != null) { // TODO: RefillBuffer(); Tokens token = _currentString.Tokenize(this); if (token == Tokens.StringEnd || token == Tokens.RegexpEnd) { _currentString = null; _lexicalState = LexicalState.EXPR_END; } _tokenSpan = new SourceSpan(_currentTokenStart, _currentTokenEnd); DumpToken(token); return token; } bool whitespaceSeen = false; bool cmdState = _commaStart; _commaStart = false; while (true) { // TODO: RefillBuffer(); Tokens token = Tokenize(whitespaceSeen, cmdState); _tokenSpan = new SourceSpan(_currentTokenStart, _currentTokenEnd); DumpToken(token); // ignored tokens: switch (token) { case Tokens.MultiLineComment: case Tokens.SingleLineComment: break; case Tokens.Whitespace: whitespaceSeen = true; break; case Tokens.EndOfLine: // not considered whitespace break; case Tokens.EndOfFile: _eofReached = true; return token; default: return token; } if (_verbatim) { return token; } } }
// Operators: ! != !~ private Tokens ReadBang() { _lexicalState = LexicalState.EXPR_BEG; int c = Peek(); if (c == '=') { Skip(c); return Tokens.Neq; } else if (c == '~') { Skip(c); return Tokens.Nmatch; } return (Tokens)'!'; }
private Tokens GetEndOfLineToken() { if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_FNAME || _lexicalState == LexicalState.EXPR_DOT || _lexicalState == LexicalState.EXPR_CLASS) { return Tokens.EndOfLine; } _commaStart = true; _lexicalState = LexicalState.EXPR_BEG; return (Tokens)'\n'; }
// String: <<HEREDOC_LABEL // Assignment: <<= // Operators: << <= <=> < private Tokens TokenizeLessThan(bool whitespaceSeen) { int c = Read(); if (c == '<' && _lexicalState != LexicalState.EXPR_END && _lexicalState != LexicalState.EXPR_DOT && _lexicalState != LexicalState.EXPR_ENDARG && _lexicalState != LexicalState.EXPR_CLASS && (!IS_ARG() || whitespaceSeen)) { Tokens token = TokenizeHeredocLabel(); if (token != Tokens.None) { return token; } } switch (_lexicalState) { case LexicalState.EXPR_FNAME: case LexicalState.EXPR_DOT: _lexicalState = LexicalState.EXPR_ARG; break; default: _lexicalState = LexicalState.EXPR_BEG; break; } if (c == '=') { if (Read('>')) { MarkSingleLineTokenEnd(); return Tokens.Cmp; } MarkSingleLineTokenEnd(); return Tokens.Leq; } if (c == '<') { if (Read('=')) { SetAsciiStringToken(Symbols.LeftShift); _lexicalState = LexicalState.EXPR_BEG; MarkSingleLineTokenEnd(); return Tokens.Assignment; } MarkSingleLineTokenEnd(); return Tokens.Lshft; } Back(c); MarkSingleLineTokenEnd(); return (Tokens)'<'; }
/// <summary> /// Tries to parse an entity from the specified lexical machine state. /// In case of success returns true and advances parsing position. /// </summary> public override bool Parse(LexicalState state) { return ParseExcept(state, m_main, m_exception); }
public AssertTokenizer /*!*/ State(LexicalState expected) { _tests.Assert(Tokenizer.LexicalState == expected); return(this); }