示例#1
0
        private static string ScanHexNumber(SlidingTextWindow window)
        {
            var buffer = new StringBuilder();

            while (true)
            {
                if (window.IsAtEnd())
                {
                    return(buffer.ToString());
                }

                char current = window.Peek();
                if (!IsHexDigit(current))
                {
                    return(buffer.ToString());
                }

                buffer.Append(current);
                window.Advance();
            }
        }
示例#2
0
        /// <summary>
        /// Converts string literal text into its value. Returns null if the specified string token is malformed due to lexer error recovery.
        /// </summary>
        /// <param name="stringToken">the string token</param>
        public static string?TryGetStringValue(Token stringToken)
        {
            var(start, end) = stringToken.Type switch
            {
                TokenType.StringComplete => (LanguageConstants.StringDelimiter, LanguageConstants.StringDelimiter),
                TokenType.StringLeftPiece => (LanguageConstants.StringDelimiter, LanguageConstants.StringHoleOpen),
                TokenType.StringMiddlePiece => (LanguageConstants.StringHoleClose, LanguageConstants.StringHoleOpen),
                TokenType.StringRightPiece => (LanguageConstants.StringHoleClose, LanguageConstants.StringDelimiter),
                _ => (null, null),
            };

            if (start == null || end == null)
            {
                return(null);
            }

            if (stringToken.Text.Length < start.Length + end.Length ||
                stringToken.Text.Substring(0, start.Length) != start ||
                stringToken.Text.Substring(stringToken.Text.Length - end.Length) != end)
            {
                // any lexer-generated token should not hit this problem as the start & end are already verified
                return(null);
            }

            var contents = stringToken.Text.Substring(start.Length, stringToken.Text.Length - start.Length - end.Length);
            var window   = new SlidingTextWindow(contents);

            // the value of the string will be shorter because escapes are longer than the characters they represent
            var buffer = new StringBuilder(contents.Length);

            while (!window.IsAtEnd())
            {
                var nextChar = window.Next();

                if (nextChar == '\'')
                {
                    return(null);
                }

                if (nextChar == '\\')
                {
                    // escape sequence begins
                    if (window.IsAtEnd())
                    {
                        return(null);
                    }

                    char escapeChar = window.Next();

                    if (escapeChar == 'u')
                    {
                        // unicode escape
                        char openCurly = window.Next();
                        if (openCurly != '{')
                        {
                            return(null);
                        }

                        var codePointText = ScanHexNumber(window);
                        if (!TryParseCodePoint(codePointText, out uint codePoint))
                        {
                            // invalid codepoint
                            return(null);
                        }

                        char closeCurly = window.Next();
                        if (closeCurly != '}')
                        {
                            return(null);
                        }

                        char charOrHighSurrogate = CodepointToString(codePoint, out char lowSurrogate);
                        buffer.Append(charOrHighSurrogate);
                        if (lowSurrogate != SlidingTextWindow.InvalidCharacter)
                        {
                            // previous char was a high surrogate
                            // also append the low surrogate
                            buffer.Append(lowSurrogate);
                        }

                        continue;
                    }

                    if (SingleCharacterEscapes.TryGetValue(escapeChar, out char escapeCharValue) == false)
                    {
                        // invalid escape character
                        return(null);
                    }

                    buffer.Append(escapeCharValue);

                    // continue to next iteration
                    continue;
                }

                // regular string char - append to buffer
                buffer.Append(nextChar);
            }

            return(buffer.ToString());
        }
示例#3
0
 public Lexer(SlidingTextWindow textWindow, IDiagnosticWriter diagnosticWriter)
 {
     this.textWindow       = textWindow;
     this.diagnosticWriter = diagnosticWriter;
 }
示例#4
0
        /// <summary>
        /// Converts string literal text into its value. Returns null if the specified string token is malformed due to lexer error recovery.
        /// </summary>
        /// <param name="stringToken">the string token</param>
        public static string?TryGetStringValue(Token stringToken)
        {
            var(start, end) = stringToken.Type switch {
                TokenType.StringComplete => (LanguageConstants.StringDelimiter, LanguageConstants.StringDelimiter),
                TokenType.StringLeftPiece => (LanguageConstants.StringDelimiter, LanguageConstants.StringHoleOpen),
                TokenType.StringMiddlePiece => (LanguageConstants.StringHoleClose, LanguageConstants.StringHoleOpen),
                TokenType.StringRightPiece => (LanguageConstants.StringHoleClose, LanguageConstants.StringDelimiter),
                _ => (null, null),
            };

            if (start == null || end == null)
            {
                return(null);
            }

            if (stringToken.Text.Length < start.Length + end.Length ||
                stringToken.Text.Substring(0, start.Length) != start ||
                stringToken.Text.Substring(stringToken.Text.Length - end.Length) != end)
            {
                // any lexer-generated token should not hit this problem as the start & end are already verified
                return(null);
            }

            var contents = stringToken.Text.Substring(start.Length, stringToken.Text.Length - start.Length - end.Length);
            var window   = new SlidingTextWindow(contents);

            // the value of the string will be shorter because escapes are longer than the characters they represent
            var buffer = new StringBuilder(contents.Length);

            while (!window.IsAtEnd())
            {
                var nextChar = window.Next();

                if (nextChar == '\'')
                {
                    return(null);
                }

                if (nextChar == '\\')
                {
                    // escape sequence begins
                    if (window.IsAtEnd())
                    {
                        return(null);
                    }

                    char escapeChar = window.Next();

                    if (CharacterEscapes.TryGetValue(escapeChar, out char escapeCharValue) == false)
                    {
                        // invalid escape character
                        return(null);
                    }

                    buffer.Append(escapeCharValue);

                    // continue to next iteration
                    continue;
                }

                // regular string char - append to buffer
                buffer.Append(nextChar);
            }

            return(buffer.ToString());
        }