internal static string ParseString(byte[] bytes, int start, int length, bool isRaw, ParseStringErrorHandler<byte> errorHandler) { Debug.Assert(bytes != null); Debug.Assert(start + length <= bytes.Length); string result = DoParseString(bytes.AsSpan(start, length), isRaw, isUniEscape: true, normalizeLineEndings: false, errorHandler); return result ?? bytes.AsSpan(start, length).MakeString(); }
private static string DoParseString <T>(T[] data, int start, int length, bool isRaw, bool isUniEscape, bool normalizeLineEndings, ParseStringErrorHandler <T> errorHandler = default) where T : IConvertible { Bytes bytesData = null; StringBuilder buf = null; int i = start; int l = start + length; int val; while (i < l) { char ch = data[i++].ToChar(null); if ((!isRaw || isUniEscape) && ch == '\\') { StringBuilderInit(ref buf, data, start, i - 1, length); if (i >= l) { if (isRaw) { buf.Append('\\'); } else { handleError(i - start - 1, i - start, "\\ at end of string"); } break; } ch = data[i++].ToChar(null); if ((ch == 'u' || ch == 'U') && isUniEscape) { int len = (ch == 'u') ? 4 : 8; int max = 16; if (TryParseInt(data, i, len, max, out val, out int consumed)) { if (val < 0 || val > 0x10ffff) { handleError(i - start - 2, i - start + consumed, isRaw ? @"\Uxxxxxxxx out of range" : "illegal Unicode character"); } else if (val < 0x010000) { buf.Append((char)val); } else { buf.Append(char.ConvertFromUtf32(val)); } } else { handleError(i - start - 2, i - start + consumed, ch == 'u' ? @"truncated \uXXXX escape" : @"truncated \UXXXXXXXX escape"); } i += consumed; } else { if (isRaw) { buf.Append('\\'); buf.Append(ch); continue; } switch (ch) { case 'a': buf.Append('\a'); continue; case 'b': buf.Append('\b'); continue; case 'f': buf.Append('\f'); continue; case 'n': buf.Append('\n'); continue; case 'r': buf.Append('\r'); continue; case 't': buf.Append('\t'); continue; case 'v': buf.Append('\v'); continue; case '\\': buf.Append('\\'); continue; case '\'': buf.Append('\''); continue; case '\"': buf.Append('\"'); continue; case '\n': continue; case '\r': if (!normalizeLineEndings) { goto default; } else if (i < l && data[i].ToChar(null) == '\n') { i++; } continue; case 'N': { IronPython.Modules.unicodedata.PerformModuleReload(null, null); StringBuilder namebuf = new StringBuilder(); bool namestarted = false; bool namecomplete = false; if (i < l && data[i].ToChar(null) == '{') { namestarted = true; i++; while (i < l) { char namech = data[i++].ToChar(null); if (namech != '}') { namebuf.Append(namech); } else { namecomplete = true; break; } } } if (!namecomplete || namebuf.Length == 0) { handleError(i - start - 2 - (namestarted ? 1 : 0) - namebuf.Length - (namecomplete ? 1 : 0), // 2 for \N and 1 for { and 1 for } i - start - (namecomplete ? 1 : 0), // 1 for } @"malformed \N character escape"); if (namecomplete) { buf.Append('}'); } } else { try { string uval = IronPython.Modules.unicodedata.lookup(namebuf.ToString()); buf.Append(uval); } catch (KeyNotFoundException) { handleError(i - start - 4 - namebuf.Length, // 4 for \N{} i - start, "unknown Unicode character name"); } } } continue; case 'x': //hex if (!TryParseInt(data, i, 2, 16, out val, out int consumed)) { handleError(i - start - 2, i - start + consumed, @"truncated \xXX escape"); } else { buf.Append((char)val); } i += consumed; continue; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { val = ch - '0'; if (i < l && HexValue(data[i].ToChar(null), out int onechar) && onechar < 8) { val = val * 8 + onechar; i++; if (i < l && HexValue(data[i].ToChar(null), out onechar) && onechar < 8) { val = val * 8 + onechar; i++; } } } buf.Append((char)val); continue; default: buf.Append("\\"); buf.Append(ch); continue; } } } else if (ch == '\r' && normalizeLineEndings) { StringBuilderInit(ref buf, data, start, i - 1, length); // normalize line endings if (i < l && data[i].ToChar(null) == '\n') { i++; } buf.Append('\n'); } else { buf?.Append(ch); } } return(buf?.ToString()); void handleError(int start, int end, string reason) { if (bytesData == null) { var ba = data as byte[]; if (ba == null) { throw new NotImplementedException("Error handler for non byte[] data not supported"); } bytesData = new Bytes(ba); } if (errorHandler == null) { throw PythonExceptions.CreateThrowable(PythonExceptions.UnicodeDecodeError, isRaw ? "rawunicodeescape" : "unicodeescape", bytesData, start, end, reason); } var substitute = errorHandler(data, start, end); if (substitute != null) { buf.Append(substitute.ToArray()); } } }