/// <summary> /// Converts string literal text into its value. Returns null if the specified string token is malformed due to lexer error recovery. /// </summary> /// <param name="stringToken">the string token</param> public static string?TryGetStringValue(Token stringToken) { var(start, end) = stringToken.Type switch { TokenType.StringComplete => (LanguageConstants.StringDelimiter, LanguageConstants.StringDelimiter), TokenType.StringLeftPiece => (LanguageConstants.StringDelimiter, LanguageConstants.StringHoleOpen), TokenType.StringMiddlePiece => (LanguageConstants.StringHoleClose, LanguageConstants.StringHoleOpen), TokenType.StringRightPiece => (LanguageConstants.StringHoleClose, LanguageConstants.StringDelimiter), _ => (null, null), }; if (start == null || end == null) { return(null); } if (stringToken.Text.Length < start.Length + end.Length || stringToken.Text.Substring(0, start.Length) != start || stringToken.Text.Substring(stringToken.Text.Length - end.Length) != end) { // any lexer-generated token should not hit this problem as the start & end are already verified return(null); } var contents = stringToken.Text.Substring(start.Length, stringToken.Text.Length - start.Length - end.Length); var window = new SlidingTextWindow(contents); // the value of the string will be shorter because escapes are longer than the characters they represent var buffer = new StringBuilder(contents.Length); while (!window.IsAtEnd()) { var nextChar = window.Next(); if (nextChar == '\'') { return(null); } if (nextChar == '\\') { // escape sequence begins if (window.IsAtEnd()) { return(null); } char escapeChar = window.Next(); if (escapeChar == 'u') { // unicode escape char openCurly = window.Next(); if (openCurly != '{') { return(null); } var codePointText = ScanHexNumber(window); if (!TryParseCodePoint(codePointText, out uint codePoint)) { // invalid codepoint return(null); } char closeCurly = window.Next(); if (closeCurly != '}') { return(null); } char charOrHighSurrogate = CodepointToString(codePoint, out char lowSurrogate); buffer.Append(charOrHighSurrogate); if (lowSurrogate != SlidingTextWindow.InvalidCharacter) { // previous char was a high surrogate // also append the low surrogate buffer.Append(lowSurrogate); } continue; } if (SingleCharacterEscapes.TryGetValue(escapeChar, out char escapeCharValue) == false) { // invalid escape character return(null); } buffer.Append(escapeCharValue); // continue to next iteration continue; } // regular string char - append to buffer buffer.Append(nextChar); } return(buffer.ToString()); }
/// <summary> /// Converts string literal text into its value. Returns null if the specified string token is malformed due to lexer error recovery. /// </summary> /// <param name="stringToken">the string token</param> public static string?TryGetStringValue(Token stringToken) { var(start, end) = stringToken.Type switch { TokenType.StringComplete => (LanguageConstants.StringDelimiter, LanguageConstants.StringDelimiter), TokenType.StringLeftPiece => (LanguageConstants.StringDelimiter, LanguageConstants.StringHoleOpen), TokenType.StringMiddlePiece => (LanguageConstants.StringHoleClose, LanguageConstants.StringHoleOpen), TokenType.StringRightPiece => (LanguageConstants.StringHoleClose, LanguageConstants.StringDelimiter), _ => (null, null), }; if (start == null || end == null) { return(null); } if (stringToken.Text.Length < start.Length + end.Length || stringToken.Text.Substring(0, start.Length) != start || stringToken.Text.Substring(stringToken.Text.Length - end.Length) != end) { // any lexer-generated token should not hit this problem as the start & end are already verified return(null); } var contents = stringToken.Text.Substring(start.Length, stringToken.Text.Length - start.Length - end.Length); var window = new SlidingTextWindow(contents); // the value of the string will be shorter because escapes are longer than the characters they represent var buffer = new StringBuilder(contents.Length); while (!window.IsAtEnd()) { var nextChar = window.Next(); if (nextChar == '\'') { return(null); } if (nextChar == '\\') { // escape sequence begins if (window.IsAtEnd()) { return(null); } char escapeChar = window.Next(); if (CharacterEscapes.TryGetValue(escapeChar, out char escapeCharValue) == false) { // invalid escape character return(null); } buffer.Append(escapeCharValue); // continue to next iteration continue; } // regular string char - append to buffer buffer.Append(nextChar); } return(buffer.ToString()); }