// // returns tokens: // - StringEnd/RegexEnd ... string/regex closed // - (Tokens)' ' ... space in word list // - StringEmbeddedVariableBegin ... #$, #@ (start of an embedded global/instance variable) // - StringEmbeddedCodeBegin ... #{ (start of an embedded expression) // - StringContent ... string data // internal Tokens TokenizeString(StringContentTokenizer/*!*/ info) { StringType stringKind = info.Properties; bool whitespaceSeen = false; // final separator in the list of words (see grammar): if (stringKind == StringType.FinalWordSeparator) { MarkTokenStart(); MarkSingleLineTokenEnd(); return Tokens.StringEnd; } MarkTokenStart(); int eolnWidth; int c = ReadNormalizeEndOfLine(out eolnWidth); // unterminated string (error recovery is slightly different from MRI): if (c == -1) { ReportError(Errors.UnterminatedString); _unterminatedToken = true; MarkSingleLineTokenEnd(); return Tokens.StringEnd; } bool isMultiline = c == '\n'; // skip whitespace in word list: if ((stringKind & StringType.Words) != 0 && IsWhiteSpace(c)) { isMultiline |= SkipWhitespace(); c = Read(); whitespaceSeen = true; } // end of the top-level string: if (c == info.TerminatingCharacter && info.NestingLevel == 0) { // end of words: if ((stringKind & StringType.Words) != 0) { // final separator in the list of words (see grammar): info.Properties = StringType.FinalWordSeparator; MarkTokenEnd(isMultiline); return Tokens.WordSeparator; } // end of regex: if ((stringKind & StringType.RegularExpression) != 0) { _tokenValue.SetRegexOptions(ReadRegexOptions()); MarkTokenEnd(isMultiline); return Tokens.RegexpEnd; } // end of string/symbol: MarkTokenEnd(isMultiline); return Tokens.StringEnd; } // word separator: if (whitespaceSeen) { Debug.Assert(!IsWhiteSpace(c)); Back(c); MarkTokenEnd(isMultiline); return Tokens.WordSeparator; } MutableStringBuilder content; // start of #$variable, #@variable, #{expression} in a string: if ((stringKind & StringType.ExpandsEmbedded) != 0 && c == '#') { switch (Peek()) { case '$': case '@': MarkSingleLineTokenEnd(); return StringEmbeddedVariableBegin(); case '{': Skip('{'); MarkSingleLineTokenEnd(); return StringEmbeddedCodeBegin(); } content = new MutableStringBuilder(_encoding); content.Append('#'); } else { content = new MutableStringBuilder(_encoding); SeekRelative(-eolnWidth); } int nestingLevel = info.NestingLevel; ReadStringContent(content, stringKind, info.TerminatingCharacter, info.OpeningParenthesis, ref nestingLevel); info.NestingLevel = nestingLevel; _tokenValue.SetStringContent(content); MarkMultiLineTokenEnd(); return Tokens.StringContent; }
// // returns tokens: // - StringEnd/RegexEnd ... string/regex closed // - (Tokens)' ' ... space in word list // - StringEmbeddedVariableBegin ... #$, #@ (start of an embedded global/instance variable) // - StringEmbeddedCodeBegin ... #{ (start of an embedded expression) // - StringContent ... string data // internal Tokens TokenizeString(StringContentTokenizer/*!*/ info) { StringType stringKind = info.Properties; bool whitespaceSeen = false; // final separator in the list of words (see grammar): if (stringKind == StringType.FinalWordSeparator) { MarkTokenStart(); MarkSingleLineTokenEnd(); return Tokens.StringEnd; } int c = peekc(); MarkTokenStart(); // unterminated string (error recovery is slightly different from MRI): if (c == -1) { ReportError(Errors.UnterminatedString); UnterminatedToken = true; MarkSingleLineTokenEnd(); return Tokens.StringEnd; } c = nextc(); // skip whitespace in word list: if ((stringKind & StringType.Words) != 0 && IsWhiteSpace(c)) { do { c = nextc(); } while (IsWhiteSpace(c)); whitespaceSeen = true; } // end of the top-level string: if (c == info.TerminatingCharacter && info.NestingLevel == 0) { // end of words: if ((stringKind & StringType.Words) != 0) { // final separator in the list of words (see grammar): info.Properties = StringType.FinalWordSeparator; MarkMultiLineTokenEnd(); return Tokens.WordSeparator; } // end of regex: if ((stringKind & StringType.RegularExpression) != 0) { _tokenValue.SetRegexOptions(ReadRegexOptions()); MarkSingleLineTokenEnd(); return Tokens.RegexpEnd; } // end of string/symbol: MarkSingleLineTokenEnd(); return Tokens.StringEnd; } // word separator: if (whitespaceSeen) { pushback(c); MarkMultiLineTokenEnd(); return Tokens.WordSeparator; } newtok(); // start of #$variable, #@variable, #{expression} in a string: if ((stringKind & StringType.ExpandsEmbedded) != 0 && c == '#') { c = nextc(); switch (c) { case '$': case '@': pushback(c); MarkSingleLineTokenEnd(); return StringEmbeddedVariableBegin(); case '{': MarkSingleLineTokenEnd(); return StringEmbeddedCodeBegin(); } tokadd('#'); } pushback(c); bool hasUnicodeEscape = false; int nestingLevel = info.NestingLevel; ReadStringContent(stringKind, info.TerminatingCharacter, info.OpeningParenthesis, ref nestingLevel, ref hasUnicodeEscape); info.NestingLevel = nestingLevel; _tokenValue.SetString(tok(), hasUnicodeEscape); MarkMultiLineTokenEnd(); return Tokens.StringContent; }