Exemplo n.º 1
0
        //
        // returns tokens: 
        // - StringEnd/RegexEnd           ... string/regex closed
        // - (Tokens)' '                  ... space in word list
        // - StringEmbeddedVariableBegin  ... #$, #@ (start of an embedded global/instance variable)
        // - StringEmbeddedCodeBegin      ... #{ (start of an embedded expression)
        // - StringContent                ... string data
        //
        internal Tokens TokenizeString(StringContentTokenizer/*!*/ info) {
            StringType stringKind = info.Properties;
            bool whitespaceSeen = false;

            // final separator in the list of words (see grammar):
            if (stringKind == StringType.FinalWordSeparator) {
                MarkTokenStart();
                MarkSingleLineTokenEnd();
                return Tokens.StringEnd;
            }

            MarkTokenStart();

            int eolnWidth;
            int c = ReadNormalizeEndOfLine(out eolnWidth);

            // unterminated string (error recovery is slightly different from MRI):
            if (c == -1) {
                ReportError(Errors.UnterminatedString);
                _unterminatedToken = true;
                MarkSingleLineTokenEnd();
                return Tokens.StringEnd;
            }

            bool isMultiline = c == '\n';

            // skip whitespace in word list:
            if ((stringKind & StringType.Words) != 0 && IsWhiteSpace(c)) {
                isMultiline |= SkipWhitespace();
                c = Read(); 
                whitespaceSeen = true;
            }

            // end of the top-level string:
            if (c == info.TerminatingCharacter && info.NestingLevel == 0) {
                
                // end of words:
                if ((stringKind & StringType.Words) != 0) {
                    // final separator in the list of words (see grammar):
                    info.Properties = StringType.FinalWordSeparator;
                    MarkTokenEnd(isMultiline);
                    return Tokens.WordSeparator;
                }

                // end of regex:
                if ((stringKind & StringType.RegularExpression) != 0) {
                    _tokenValue.SetRegexOptions(ReadRegexOptions());
                    MarkTokenEnd(isMultiline);
                    return Tokens.RegexpEnd;
                }
                
                // end of string/symbol:
                MarkTokenEnd(isMultiline);
                return Tokens.StringEnd;
            }

            // word separator:
            if (whitespaceSeen) {
                Debug.Assert(!IsWhiteSpace(c));
                Back(c);
                MarkTokenEnd(isMultiline);
                return Tokens.WordSeparator;
            }

            MutableStringBuilder content;

            // start of #$variable, #@variable, #{expression} in a string:
            if ((stringKind & StringType.ExpandsEmbedded) != 0 && c == '#') {
                switch (Peek()) {
                    case '$':
                    case '@':
                        MarkSingleLineTokenEnd();
                        return StringEmbeddedVariableBegin();

                    case '{':
                        Skip('{');
                        MarkSingleLineTokenEnd();
                        return StringEmbeddedCodeBegin();
                }
                content = new MutableStringBuilder(_encoding);
                content.Append('#');
            } else {
                content = new MutableStringBuilder(_encoding);
                SeekRelative(-eolnWidth);
            }

            int nestingLevel = info.NestingLevel;
            ReadStringContent(content, stringKind, info.TerminatingCharacter, info.OpeningParenthesis, ref nestingLevel);
            info.NestingLevel = nestingLevel;

            _tokenValue.SetStringContent(content);
            MarkMultiLineTokenEnd();
            return Tokens.StringContent;
        }
Exemplo n.º 2
0
        //
        // returns tokens: 
        // - StringEnd/RegexEnd           ... string/regex closed
        // - (Tokens)' '                  ... space in word list
        // - StringEmbeddedVariableBegin  ... #$, #@ (start of an embedded global/instance variable)
        // - StringEmbeddedCodeBegin      ... #{ (start of an embedded expression)
        // - StringContent                ... string data
        //
        internal Tokens TokenizeString(StringContentTokenizer/*!*/ info) {
            StringType stringKind = info.Properties;
            bool whitespaceSeen = false;

            // final separator in the list of words (see grammar):
            if (stringKind == StringType.FinalWordSeparator) {
                MarkTokenStart();
                MarkSingleLineTokenEnd();
                return Tokens.StringEnd;
            }

            int c = peekc();
            MarkTokenStart();

            // unterminated string (error recovery is slightly different from MRI):
            if (c == -1) {
                ReportError(Errors.UnterminatedString);
                UnterminatedToken = true;
                MarkSingleLineTokenEnd();
                return Tokens.StringEnd;
            }

            c = nextc();
            
            // skip whitespace in word list:
            if ((stringKind & StringType.Words) != 0 && IsWhiteSpace(c)) {
                do { 
                    c = nextc(); 
                } while (IsWhiteSpace(c));

                whitespaceSeen = true;
            }

            // end of the top-level string:
            if (c == info.TerminatingCharacter && info.NestingLevel == 0) {
                
                // end of words:
                if ((stringKind & StringType.Words) != 0) {
                    // final separator in the list of words (see grammar):
                    info.Properties = StringType.FinalWordSeparator;
                    MarkMultiLineTokenEnd();
                    return Tokens.WordSeparator;
                }

                // end of regex:
                if ((stringKind & StringType.RegularExpression) != 0) {
                    _tokenValue.SetRegexOptions(ReadRegexOptions());
                    MarkSingleLineTokenEnd();
                    return Tokens.RegexpEnd;
                }
                
                // end of string/symbol:
                MarkSingleLineTokenEnd();
                return Tokens.StringEnd;
            }

            // word separator:
            if (whitespaceSeen) {
                pushback(c);
                MarkMultiLineTokenEnd();
                return Tokens.WordSeparator;
            }

            newtok();

            // start of #$variable, #@variable, #{expression} in a string:
            if ((stringKind & StringType.ExpandsEmbedded) != 0 && c == '#') {
                c = nextc();
                switch (c) {
                    case '$':
                    case '@':
                        pushback(c);
                        MarkSingleLineTokenEnd();
                        return StringEmbeddedVariableBegin();

                    case '{':
                        MarkSingleLineTokenEnd();
                        return StringEmbeddedCodeBegin();
                }
                tokadd('#');
            }

            pushback(c);
            bool hasUnicodeEscape = false;

            int nestingLevel = info.NestingLevel;
            ReadStringContent(stringKind, info.TerminatingCharacter, info.OpeningParenthesis, ref nestingLevel, ref hasUnicodeEscape);
            info.NestingLevel = nestingLevel;

            _tokenValue.SetString(tok(), hasUnicodeEscape);
            MarkMultiLineTokenEnd();
            return Tokens.StringContent;
        }