// called from parser at the end of the embedded code internal void StringEmbeddedCodeEnd(StringTokenizer terminator) { _currentString = terminator; COND_LEXPOP(); CMDARG_LEXPOP(); }
public Tokens GetNextToken() { if (_input == null) { throw new InvalidOperationException("Uninitialized"); } if (_currentString != null) { // TODO: RefillBuffer(); Tokens token = _currentString.Tokenize(this); if (token == Tokens.StringEnd || token == Tokens.RegexpEnd) { _currentString = null; _lexicalState = LexicalState.EXPR_END; } _tokenSpan = new SourceSpan(_currentTokenStart, _currentTokenEnd); DumpToken(token); return token; } bool whitespaceSeen = false; bool cmdState = _commaStart; _commaStart = false; while (true) { // TODO: RefillBuffer(); Tokens token = Tokenize(whitespaceSeen, cmdState); _tokenSpan = new SourceSpan(_currentTokenStart, _currentTokenEnd); DumpToken(token); // ignored tokens: switch (token) { case Tokens.MultiLineComment: case Tokens.SingleLineComment: break; case Tokens.Whitespace: whitespaceSeen = true; break; case Tokens.EndOfLine: // not considered whitespace break; case Tokens.EndOfFile: _eofReached = true; return token; default: return token; } if (_verbatim) { return token; } } }
// Stores the current string tokenizer into the StringEmbeddedCodeBegin token. // It is restored later via call to StringEmbeddedCodeEnd. private Tokens StringEmbeddedCodeBegin() { _tokenValue.SetStringTokenizer(_currentString); _currentString = null; SetState(LexicalState.EXPR_BEG); COND_PUSH(0); CMDARG_PUSH(0); return Tokens.StringEmbeddedCodeBegin; }
// called from parser at the end of the embedded variable internal void StringEmbeddedVariableEnd(StringTokenizer stringTokenizer) { _currentString = stringTokenizer; }
// Quotation start: // %[QqWwxrs]?[^:alpha-numeric:] private Tokens TokenizeQuotationStart() { StringType type; Tokens token; int terminator; // c is the character following % // note that it could be eoln in which case it needs to be normalized: int c = ReadNormalizeEndOfLine(); switch (c) { case 'Q': type = StringType.ExpandsEmbedded; token = Tokens.StringBegin; terminator = ReadNormalizeEndOfLine(); break; case 'q': type = StringType.Default; token = Tokens.StringBegin; terminator = ReadNormalizeEndOfLine(); break; case 'W': type = StringType.Words | StringType.ExpandsEmbedded; token = Tokens.WordsBegin; // if the terminator is a whitespace the end will never be matched and syntax error will be reported terminator = ReadNormalizeEndOfLine(); break; case 'w': type = StringType.Words; token = Tokens.VerbatimWordsBegin; // if the terminator is a whitespace the end will never be matched and syntax error will be reported terminator = ReadNormalizeEndOfLine(); break; case 'x': type = StringType.ExpandsEmbedded; token = Tokens.ShellStringBegin; terminator = ReadNormalizeEndOfLine(); break; case 'r': type = StringType.RegularExpression | StringType.ExpandsEmbedded; token = Tokens.RegexpBegin; terminator = ReadNormalizeEndOfLine(); break; case 's': type = StringType.Symbol; token = Tokens.SymbolBegin; terminator = ReadNormalizeEndOfLine(); _lexicalState = LexicalState.EXPR_FNAME; break; default: type = StringType.ExpandsEmbedded; token = Tokens.StringBegin; terminator = c; break; } int parenthesis = terminator; switch (terminator) { case -1: _unterminatedToken = true; MarkSingleLineTokenEnd(); ReportError(Errors.UnterminatedQuotedString); return Tokens.EndOfFile; case '(': terminator = ')'; break; case '{': terminator = '}'; break; case '[': terminator = ']'; break; case '<': terminator = '>'; break; default: if (IsLetterOrDigit(terminator)) { Back(terminator); MarkSingleLineTokenEnd(); ReportError(Errors.UnknownQuotedStringType); return (Tokens)'%'; } parenthesis = 0; break; } bool isMultiline = terminator == '\n'; if ((type & StringType.Words) != 0) { isMultiline |= SkipWhitespace(); } if (isMultiline) { MarkMultiLineTokenEnd(); } else { MarkSingleLineTokenEnd(); } _currentString = new StringContentTokenizer(type, (char)terminator, (char)parenthesis); _tokenValue.SetStringTokenizer(_currentString); return token; }
// Stores the current string tokenizer into the StringEmbeddedVariableBegin token. // It is restored later via call to StringEmbeddedVariableEnd. private Tokens StringEmbeddedVariableBegin() { _tokenValue.SetStringTokenizer(_currentString); _currentString = null; SetState(LexicalState.EXPR_BEG); return Tokens.StringEmbeddedVariableBegin; }
// String: '... private Tokens ReadSingleQuote() { _currentString = new StringContentTokenizer(StringType.Default, '\''); _tokenValue.SetStringTokenizer(_currentString); return Tokens.StringBegin; }
// Quotation start: // %[QqWwxrs]?[^:alpha-numeric:] private Tokens ReadQuotationStart(int c) { StringType type; Tokens token; int terminator; // c is the character following % switch (c) { case 'Q': type = StringType.ExpandsEmbedded; token = Tokens.StringBeg; terminator = nextc(); break; case 'q': type = StringType.Default; token = Tokens.StringBeg; terminator = nextc(); break; case 'W': type = StringType.Words | StringType.ExpandsEmbedded; token = Tokens.WordsBeg; terminator = nextc(); SkipWhitespace(); break; case 'w': type = StringType.Words; token = Tokens.VerbatimWordsBegin; terminator = nextc(); SkipWhitespace(); break; case 'x': type = StringType.ExpandsEmbedded; token = Tokens.ShellStringBegin; terminator = nextc(); break; case 'r': type = StringType.RegularExpression | StringType.ExpandsEmbedded; token = Tokens.RegexpBeg; terminator = nextc(); break; case 's': type = StringType.Symbol; token = Tokens.Symbeg; terminator = nextc(); _lexicalState = LexicalState.EXPR_FNAME; break; default: type = StringType.ExpandsEmbedded; token = Tokens.StringBeg; terminator = c; break; } int parenthesis = terminator; switch (terminator) { case -1: UnterminatedToken = true; ReportError(Errors.UnterminatedQuotedString); return Tokens.EndOfFile; case '(': terminator = ')'; break; case '{': terminator = '}'; break; case '[': terminator = ']'; break; case '<': terminator = '>'; break; default: if (IsLetterOrDigit(terminator)) { pushback(c); ReportError(Errors.UnknownQuotedStringType); return (Tokens)'%'; } parenthesis = 0; break; } _currentString = new StringContentTokenizer(type, (char)terminator, (char)parenthesis); _tokenValue.SetStringTokenizer(_currentString); return token; }
// String: `... // Operator: ` private Tokens ReadBacktick(bool cmdState) { if (_lexicalState == LexicalState.EXPR_FNAME) { _lexicalState = LexicalState.EXPR_END; return (Tokens)'`'; } if (_lexicalState == LexicalState.EXPR_DOT) { _lexicalState = (cmdState) ? LexicalState.EXPR_CMDARG : LexicalState.EXPR_ARG; return (Tokens)'`'; } _currentString = new StringContentTokenizer(StringType.ExpandsEmbedded, '`'); _tokenValue.SetStringTokenizer(_currentString); return Tokens.ShellStringBegin; }
// String: "... private Tokens ReadDoubleQuote() { _currentString = new StringContentTokenizer(StringType.ExpandsEmbedded, '"'); _tokenValue.SetStringTokenizer(_currentString); return Tokens.StringBegin; }
// Operators: :: : // Literals: :... (symbol start) private Tokens ReadColon(bool whitespaceSeen) { int c = Peek(); if (c == ':') { Skip(c); if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID || _lexicalState == LexicalState.EXPR_CLASS || (IS_ARG() && whitespaceSeen)) { _lexicalState = LexicalState.EXPR_BEG; return Tokens.LeadingDoubleColon; } _lexicalState = LexicalState.EXPR_DOT; return Tokens.SeparatingDoubleColon; } if (_lexicalState == LexicalState.EXPR_END || _lexicalState == LexicalState.EXPR_ENDARG || IsWhiteSpace(c)) { _lexicalState = LexicalState.EXPR_BEG; return (Tokens)':'; } switch (c) { case '\'': Skip(c); _currentString = new StringContentTokenizer(StringType.Symbol, '\''); break; case '"': Skip(c); _currentString = new StringContentTokenizer(StringType.Symbol | StringType.ExpandsEmbedded, '"'); break; default: Debug.Assert(_currentString == null); break; } _lexicalState = LexicalState.EXPR_FNAME; _tokenValue.SetStringTokenizer(_currentString); return Tokens.SymbolBegin; }
// Operators: / // Assignments: /= // Literals: /... (regex start) private Tokens ReadSlash(bool whitespaceSeen) { if (_lexicalState == LexicalState.EXPR_BEG || _lexicalState == LexicalState.EXPR_MID) { _currentString = new StringContentTokenizer(StringType.RegularExpression | StringType.ExpandsEmbedded, '/'); _tokenValue.SetStringTokenizer(_currentString); return Tokens.RegexpBegin; } int c = Peek(); if (c == '=') { Skip(c); SetAsciiStringToken(Symbols.Divide); _lexicalState = LexicalState.EXPR_BEG; return Tokens.Assignment; } if (IS_ARG() && whitespaceSeen) { if (!IsWhiteSpace(c)) { ReportWarning(Errors.AmbiguousFirstArgument); _currentString = new StringContentTokenizer(StringType.RegularExpression | StringType.ExpandsEmbedded, '/'); _tokenValue.SetStringTokenizer(_currentString); return Tokens.RegexpBegin; } } switch (_lexicalState) { case LexicalState.EXPR_FNAME: case LexicalState.EXPR_DOT: _lexicalState = LexicalState.EXPR_ARG; break; default: _lexicalState = LexicalState.EXPR_BEG; break; } return (Tokens)'/'; }
private Tokens Tokenize() { yytext = new StringBuilder(); bool whitespaceSeen = false; if (_currentString != null) { Tokens token = _currentString.Tokenize(this); if (token == Tokens.StringEnd || token == Tokens.RegexpEnd) { _currentString = null; _lexicalState = LexicalState.EXPR_END; } _tokenSpan = new SourceSpan(_currentTokenStart, _currentTokenEnd); DumpToken(token); return token; } bool cmdState = _commaStart; _commaStart = false; while (true) { Tokens token = Tokenize(whitespaceSeen, cmdState); _tokenSpan = new SourceSpan(_currentTokenStart, _currentTokenEnd); DumpToken(token); // ignored tokens: switch (token) { case Tokens.MultiLineComment: case Tokens.SingleLineComment: if (_verbatim) { return token; } continue; case Tokens.Whitespace: whitespaceSeen = true; continue; case Tokens.EndOfLine: // not considered whitespace case Tokens.InvalidCharacter: continue; } return token; } }
internal void SetStringTokenizer(StringTokenizer value) { StringTokenizer = value; }
private Tokens TokenizeHeredocLabel() { int term; StringType stringType = StringType.Default; int prefixWidth; int c = ReadNormalizeEndOfLine(out prefixWidth); if (c == '-') { c = ReadNormalizeEndOfLine(out prefixWidth); prefixWidth++; stringType = StringType.IndentedHeredoc; } string label; if (c == '\'' || c == '"' || c == '`') { if (c != '\'') { stringType |= StringType.ExpandsEmbedded; } // do not include quotes: int start = _bufferPos; term = c; while (true) { c = Read(); if (c == -1) { _unterminatedToken = true; ReportError(Errors.UnterminatedHereDocIdentifier); c = term; break; } if (c == term) { break; } // MRI doesn't do this, it continues reading the label and includes \n into it. // The label cannot be matched with the end label (only single-line comparison is done), so it's better to report error here // Allowing \n in label requires the token to be multi-line. // Note we can ignore \r followed by \n here since it will fail in the next iteration. if (c == '\n') { Back('\n'); ReportError(Errors.UnterminatedHereDocIdentifier); c = term; break; } } label = new String(_lineBuffer, start, _bufferPos - start - 1); } else if (IsIdentifier(c)) { term = '"'; stringType |= StringType.ExpandsEmbedded; int start = _bufferPos - 1; SkipVariableName(); label = new String(_lineBuffer, start, _bufferPos - start); } else { SeekRelative(-prefixWidth); return Tokens.None; } // note that if we allow \n in the label we must change this to multi-line token! MarkSingleLineTokenEnd(); // skip the rest of the line (the content is stored in heredoc string terminal and tokenized upon restore) int resume = _bufferPos; _bufferPos = _lineLength; _currentString = new HeredocTokenizer(stringType, label, resume, _lineBuffer, _lineLength, _currentLine, _currentLineIndex); _lineBuffer = new char[InitialBufferSize]; _tokenValue.SetStringTokenizer(_currentString); return term == '`' ? Tokens.ShellStringBegin : Tokens.StringBegin; }
internal void SetStringTokenizer(StringTokenizer value) { StringTokenizer = value; _type = TokenValueType.StringTokenizer; }
private Tokens TokenizeHeredocLabel() { int term; StringType stringType = StringType.Default; int c = nextc(); if (c == '-') { c = nextc(); stringType = StringType.IndentedHeredoc; } string label; if (c == '\'' || c == '"' || c == '`') { if (c != '\'') { stringType |= StringType.ExpandsEmbedded; } // do not include quotes: int start = _bufferPos; term = c; while (true) { c = nextc(); if (c == -1) { UnterminatedToken = true; ReportError(Errors.UnterminatedHereDocIdentifier); c = term; break; } if (c == term) { break; } // MRI doesn't do this, it continues reading the label and includes \n into it. // The label cannot be matched with the end label (only single-line comparison is done), so it's better to report error here // Allowing \n in label requires the token to be multi-line. if (c == '\n') { pushback(c); ReportError(Errors.UnterminatedHereDocIdentifier); c = term; break; } } label = new String(_lineBuffer, start, _bufferPos - start - 1); } else if (IsIdentifier(c)) { term = '"'; stringType |= StringType.ExpandsEmbedded; int start = _bufferPos - 1; SkipVariableName(); label = new String(_lineBuffer, start, _bufferPos - start); } else { pushback(c); if ((stringType & StringType.IndentedHeredoc) != 0) { pushback('-'); } return Tokens.None; } // note that if we allow \n in the label we must change this to multi-line token! MarkSingleLineTokenEnd(); // skip the rest of the line (the content is stored in heredoc string terminal and tokenized upon restore) int resume = _bufferPos; _bufferPos = _lineBuffer.Length; _currentString = new HeredocTokenizer(stringType, label, resume, _lineBuffer, _currentLine, _currentLineIndex); _tokenValue.SetStringTokenizer(_currentString); return term == '`' ? Tokens.ShellStringBegin : Tokens.StringBeg; }