public static string ParseString(char[] text, int start, int length, bool isRaw, bool isUniEscape, bool normalizeLineEndings)
        {
            Debug.Assert(text != null);

            if (isRaw && !isUniEscape && !normalizeLineEndings)
            {
                return(new String(text, start, length));
            }

            StringBuilder buf = null;
            int           i   = start;
            int           l   = start + length;
            int           val;

            while (i < l)
            {
                char ch = text[i++];
                if ((!isRaw || isUniEscape) && ch == '\\')
                {
                    if (buf == null)
                    {
                        buf = new StringBuilder(length);
                        buf.Append(text, start, i - start - 1);
                    }

                    if (i >= l)
                    {
                        if (isRaw)
                        {
                            buf.Append('\\');
                            break;
                        }
                        else
                        {
                            throw PythonOps.ValueError("Trailing \\ in string");
                        }
                    }
                    ch = text[i++];

                    if (ch == 'u' || ch == 'U')
                    {
                        int len = (ch == 'u') ? 4 : 8;
                        int max = 16;
                        if (isUniEscape)
                        {
                            if (TryParseInt(text, i, len, max, out val))
                            {
                                if (val < 0 || val > 0x10ffff)
                                {
                                    throw PythonExceptions.CreateThrowable(PythonExceptions.UnicodeDecodeError, isRaw ? "rawunicodeescape" : "unicodeescape", Bytes.Empty, i - start - 2, i - start + len - 1, "illegal Unicode character");
                                }

                                if (val < 0x010000)
                                {
                                    buf.Append((char)val);
                                }
                                else
                                {
                                    buf.Append(char.ConvertFromUtf32(val));
                                }
                                i += len;
                            }
                            else
                            {
                                throw PythonExceptions.CreateThrowable(PythonExceptions.UnicodeDecodeError, isRaw ? "rawunicodeescape" : "unicodeescape", Bytes.Empty, i - start - 2, i - start - 1, @"truncated \uXXXX escape");
                            }
                        }
                        else
                        {
                            buf.Append('\\');
                            buf.Append(ch);
                        }
                    }
                    else
                    {
                        if (isRaw)
                        {
                            buf.Append('\\');
                            buf.Append(ch);
                            continue;
                        }
                        switch (ch)
                        {
                        case 'a': buf.Append('\a'); continue;

                        case 'b': buf.Append('\b'); continue;

                        case 'f': buf.Append('\f'); continue;

                        case 'n': buf.Append('\n'); continue;

                        case 'r': buf.Append('\r'); continue;

                        case 't': buf.Append('\t'); continue;

                        case 'v': buf.Append('\v'); continue;

                        case '\\': buf.Append('\\'); continue;

                        case '\'': buf.Append('\''); continue;

                        case '\"': buf.Append('\"'); continue;

                        case '\r': if (i < l && text[i] == '\n')
                            {
                                i++;
                            }
                            continue;

                        case '\n': continue;

                        case 'N': {
                            IronPython.Modules.unicodedata.PerformModuleReload(null, null);
                            if (i < l && text[i] == '{')
                            {
                                i++;
                                StringBuilder namebuf      = new StringBuilder();
                                bool          namecomplete = false;
                                while (i < l)
                                {
                                    char namech = text[i++];
                                    if (namech != '}')
                                    {
                                        namebuf.Append(namech);
                                    }
                                    else
                                    {
                                        namecomplete = true;
                                        break;
                                    }
                                }

                                if (!namecomplete || namebuf.Length == 0)
                                {
                                    throw PythonExceptions.CreateThrowable(PythonExceptions.UnicodeDecodeError, isRaw ? "rawunicodeescape" : "unicodeescape", Bytes.Empty, i, i, @"malformed \N character escape");
                                }

                                try {
                                    string uval = IronPython.Modules.unicodedata.lookup(namebuf.ToString());
                                    buf.Append(uval);
                                } catch (KeyNotFoundException) {
                                    throw PythonExceptions.CreateThrowable(PythonExceptions.UnicodeDecodeError, isRaw ? "rawunicodeescape" : "unicodeescape", Bytes.Empty, i, i, "unknown Unicode character name");
                                }
                            }
                            else
                            {
                                throw PythonExceptions.CreateThrowable(PythonExceptions.UnicodeDecodeError, isRaw ? "rawunicodeescape" : "unicodeescape", Bytes.Empty, i, i, @"malformed \N character escape");
                            }
                        }
                            continue;

                        case 'x':     //hex
                            if (!TryParseInt(text, i, 2, 16, out val))
                            {
                                goto default;
                            }
                            buf.Append((char)val);
                            i += 2;
                            continue;

                        case '0':
                        case '1':
                        case '2':
                        case '3':
                        case '4':
                        case '5':
                        case '6':
                        case '7': {
                            int onechar;
                            val = ch - '0';
                            if (i < l && HexValue(text[i], out onechar) && onechar < 8)
                            {
                                val = val * 8 + onechar;
                                i++;
                                if (i < l && HexValue(text[i], out onechar) && onechar < 8)
                                {
                                    val = val * 8 + onechar;
                                    i++;
                                }
                            }
                        }

                            buf.Append((char)val);
                            continue;

                        default:
                            buf.Append("\\");
                            buf.Append(ch);
                            continue;
                        }
                    }
                }
                else if (ch == '\r' && normalizeLineEndings)
                {
                    if (buf == null)
                    {
                        buf = new StringBuilder(length);
                        buf.Append(text, start, i - start - 1);
                    }

                    // normalize line endings
                    if (i < text.Length && text[i] == '\n')
                    {
                        i++;
                    }
                    buf.Append('\n');
                }
                else
                {
                    buf?.Append(ch);
                }
            }

            if (buf != null)
            {
                return(buf.ToString());
            }
            return(new String(text, start, length));
        }
Esempio n. 2
0
 internal static Exception MakeError(CodeContext context, params object[] args)
 => PythonExceptions.CreateThrowable(ZipImportError(context), args);
Esempio n. 3
0
        private static string DoParseString <T>(T[] data, int start, int length, bool isRaw, bool isUniEscape, bool normalizeLineEndings, ParseStringErrorHandler <T> errorHandler = default) where T : IConvertible
        {
            Bytes         bytesData = null;
            StringBuilder buf       = null;
            int           i         = start;
            int           l         = start + length;
            int           val;

            while (i < l)
            {
                char ch = data[i++].ToChar(null);
                if ((!isRaw || isUniEscape) && ch == '\\')
                {
                    StringBuilderInit(ref buf, data, start, i - 1, length);

                    if (i >= l)
                    {
                        if (isRaw)
                        {
                            buf.Append('\\');
                        }
                        else
                        {
                            handleError(i - start - 1, i - start, "\\ at end of string");
                        }
                        break;
                    }
                    ch = data[i++].ToChar(null);

                    if ((ch == 'u' || ch == 'U') && isUniEscape)
                    {
                        int len = (ch == 'u') ? 4 : 8;
                        int max = 16;
                        if (TryParseInt(data, i, len, max, out val, out int consumed))
                        {
                            if (val < 0 || val > 0x10ffff)
                            {
                                handleError(i - start - 2, i - start + consumed, isRaw ? @"\Uxxxxxxxx out of range" : "illegal Unicode character");
                            }
                            else if (val < 0x010000)
                            {
                                buf.Append((char)val);
                            }
                            else
                            {
                                buf.Append(char.ConvertFromUtf32(val));
                            }
                        }
                        else
                        {
                            handleError(i - start - 2, i - start + consumed, ch == 'u' ? @"truncated \uXXXX escape" : @"truncated \UXXXXXXXX escape");
                        }
                        i += consumed;
                    }
                    else
                    {
                        if (isRaw)
                        {
                            buf.Append('\\');
                            buf.Append(ch);
                            continue;
                        }
                        switch (ch)
                        {
                        case 'a': buf.Append('\a'); continue;

                        case 'b': buf.Append('\b'); continue;

                        case 'f': buf.Append('\f'); continue;

                        case 'n': buf.Append('\n'); continue;

                        case 'r': buf.Append('\r'); continue;

                        case 't': buf.Append('\t'); continue;

                        case 'v': buf.Append('\v'); continue;

                        case '\\': buf.Append('\\'); continue;

                        case '\'': buf.Append('\''); continue;

                        case '\"': buf.Append('\"'); continue;

                        case '\n': continue;

                        case '\r':
                            if (!normalizeLineEndings)
                            {
                                goto default;
                            }
                            else if (i < l && data[i].ToChar(null) == '\n')
                            {
                                i++;
                            }
                            continue;

                        case 'N': {
                            IronPython.Modules.unicodedata.PerformModuleReload(null, null);
                            StringBuilder namebuf      = new StringBuilder();
                            bool          namestarted  = false;
                            bool          namecomplete = false;
                            if (i < l && data[i].ToChar(null) == '{')
                            {
                                namestarted = true;
                                i++;
                                while (i < l)
                                {
                                    char namech = data[i++].ToChar(null);
                                    if (namech != '}')
                                    {
                                        namebuf.Append(namech);
                                    }
                                    else
                                    {
                                        namecomplete = true;
                                        break;
                                    }
                                }
                            }
                            if (!namecomplete || namebuf.Length == 0)
                            {
                                handleError(i - start - 2 - (namestarted ? 1 : 0) - namebuf.Length - (namecomplete ? 1 : 0), // 2 for \N  and 1 for { and 1 for }
                                            i - start - (namecomplete ? 1 : 0),                                              // 1 for }
                                            @"malformed \N character escape");
                                if (namecomplete)
                                {
                                    buf.Append('}');
                                }
                            }
                            else
                            {
                                try {
                                    string uval = IronPython.Modules.unicodedata.lookup(namebuf.ToString());
                                    buf.Append(uval);
                                } catch (KeyNotFoundException) {
                                    handleError(i - start - 4 - namebuf.Length,         // 4 for \N{}
                                                i - start,
                                                "unknown Unicode character name");
                                }
                            }
                        }
                            continue;

                        case 'x':     //hex
                            if (!TryParseInt(data, i, 2, 16, out val, out int consumed))
                            {
                                handleError(i - start - 2, i - start + consumed, @"truncated \xXX escape");
                            }
                            else
                            {
                                buf.Append((char)val);
                            }
                            i += consumed;
                            continue;

                        case '0':
                        case '1':
                        case '2':
                        case '3':
                        case '4':
                        case '5':
                        case '6':
                        case '7': {
                            val = ch - '0';
                            if (i < l && HexValue(data[i].ToChar(null), out int onechar) && onechar < 8)
                            {
                                val = val * 8 + onechar;
                                i++;
                                if (i < l && HexValue(data[i].ToChar(null), out onechar) && onechar < 8)
                                {
                                    val = val * 8 + onechar;
                                    i++;
                                }
                            }
                        }

                            buf.Append((char)val);
                            continue;

                        default:
                            buf.Append("\\");
                            buf.Append(ch);
                            continue;
                        }
                    }
                }
                else if (ch == '\r' && normalizeLineEndings)
                {
                    StringBuilderInit(ref buf, data, start, i - 1, length);

                    // normalize line endings
                    if (i < l && data[i].ToChar(null) == '\n')
                    {
                        i++;
                    }
                    buf.Append('\n');
                }
                else
                {
                    buf?.Append(ch);
                }
            }
            return(buf?.ToString());

            void handleError(int start, int end, string reason)
            {
                if (bytesData == null)
                {
                    var ba = data as byte[];
                    if (ba == null)
                    {
                        throw new NotImplementedException("Error handler for non byte[] data not supported");
                    }
                    bytesData = new Bytes(ba);
                }

                if (errorHandler == null)
                {
                    throw PythonExceptions.CreateThrowable(PythonExceptions.UnicodeDecodeError, isRaw ? "rawunicodeescape" : "unicodeescape", bytesData, start, end, reason);
                }
                var substitute = errorHandler(data, start, end);

                if (substitute != null)
                {
                    buf.Append(substitute.ToArray());
                }
            }
        }