Esempio n. 1
0
        /// <summary>
        /// The unsafeForAttributesMode tells the method to perform more aggressive
        /// matching of "basic" entities, like IE does on non-markup HTML text.
        /// However we can't do this kind of matching for attributes, since it
        /// breaks URLs.  When in doubt, use false.
        ///
        /// Example:
        ///
        /// UnEscapeEntities("&pounda", true) => "£a"
        /// UnEscapeEntities("&pounda", false) => "&pounda"
        /// </summary>
        public static string UnEscapeEntities(string html, UnEscapeMode unEscapeMode)
        {
            if (html == null)
            {
                return(null);
            }

            StringBuilder output = new StringBuilder(html.Length);
            int           len    = html.Length;

            for (int i = 0; i < len; i++)
            {
                char c0 = html[i];
                if (c0 == '&')
                {
                    if (i + 1 < len)
                    {
                        char c1 = html[i + 1];
                        switch (c1)
                        {
                        case '#':
                        {
                            if (i + 2 < len)
                            {
                                char c2 = html[i + 2];
                                switch (c2)
                                {
                                case 'x':
                                case 'X':
                                {
                                    // do hexadecimal match

                                    bool semicolonTerminated = false;
                                    int  charVal             = 0;
                                    int  j;
                                    for (j = i + 3; j < len; j++)
                                    {
                                        int hexVal = ToHexValue(html[j]);
                                        if (hexVal == -1)
                                        {
                                            // skip one more char if currently on semicolon
                                            if (html[j] == ';')
                                            {
                                                semicolonTerminated = true;
                                            }
                                            break;
                                        }
                                        charVal *= 16;
                                        charVal += hexVal;
                                    }
                                    if (semicolonTerminated && charVal != 0)
                                    {
                                        i = j;
                                        output.Append((char)charVal);
                                        continue;
                                    }
                                    // if total is 0, continue
                                    break;
                                }

                                case '0':
                                case '1':
                                case '2':
                                case '3':
                                case '4':
                                case '5':
                                case '6':
                                case '7':
                                case '8':
                                case '9':
                                {
                                    // do decimal match

                                    int charVal = 0;
                                    int j;
                                    for (j = i + 2; j < len; j++)
                                    {
                                        char c = html[j];
                                        if (c < '0' || c > '9')
                                        {
                                            if (c == ';')
                                            {
                                                ++j;
                                            }
                                            break;
                                        }

                                        int cVal = c - '0';
                                        charVal *= 10;
                                        charVal += cVal;
                                    }
                                    if (charVal != 0)
                                    {
                                        i = j - 1;
                                        output.Append((char)charVal);
                                        continue;
                                    }
                                    // if total is 0, continue
                                    break;
                                }
                                }
                            }
                            break;
                        }

                        default:
                        {
                            int j;
                            int end = Math.Min(len, i + 12);
                            for (j = i + 1; j < end; j++)
                            {
                                char c = html[j];
                                if (c == ';' || (!(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z') && !(c >= '0' && c <= '9')))
                                {
                                    break;
                                }
                            }

                            string entityRef = html.Substring(i + 1, j - (i + 1));

                            if (unEscapeMode != UnEscapeMode.Attribute)
                            {
                                // k = number of characters in entityRef that we are using
                                int k, code = -1;
                                for (k = 1; k < entityRef.Length; k++)
                                {
                                    if (-1 != (code = EntityEscaper.Code(entityRef.Substring(0, k), true)))
                                    {
                                        break;
                                    }
                                }

                                if (code == -1)
                                {
                                    code = EntityEscaper.Code(entityRef, false);
                                }

                                if (code != -1)
                                {
                                    output.Append((char)code);
                                    i += 1 + k;
                                    if (i < end && html[i] == ';')
                                    {
                                        ++i;
                                    }
                                    --i;
                                    continue;
                                }
                            }
                            else
                            {
                                int code = EntityEscaper.Code(entityRef, false);
                                if (code != -1)
                                {
                                    output.Append((char)code);
                                    i += 1 + entityRef.Length;
                                    if (i < end && html[i] == ';')
                                    {
                                        ++i;
                                    }
                                    --i;
                                    continue;
                                }
                            }

                            break;
                        }
                        }
                    }
                }
                output.Append(c0);
            }
            return(output.ToString());
        }
Esempio n. 2
0
        /// <summary>
        /// The unsafeForAttributesMode tells the method to perform more aggressive
        /// matching of "basic" entities, like IE does on non-markup HTML text.
        /// However we can't do this kind of matching for attributes, since it
        /// breaks URLs.  When in doubt, use false.
        ///
        /// Example:
        ///
        /// UnEscapeEntities("&pounda", true) => "£a"
        /// UnEscapeEntities("&pounda", false) => "&pounda"
        /// </summary>
        public static string UnEscapeEntities(string html, UnEscapeMode unEscapeMode)
        {
            if (html == null)
                return null;

            StringBuilder output = new StringBuilder(html.Length);
            int len = html.Length;
            for (int i = 0; i < len; i++)
            {
                char c0 = html[i];
                if (c0 == '&')
                {
                    if (i + 1 < len)
                    {
                        char c1 = html[i + 1];
                        switch (c1)
                        {
                            case '#':
                                {
                                    if (i + 2 < len)
                                    {
                                        char c2 = html[i + 2];
                                        switch (c2)
                                        {
                                            case 'x':
                                            case 'X':
                                                {
                                                    // do hexadecimal match

                                                    bool semicolonTerminated = false;
                                                    int charVal = 0;
                                                    int j;
                                                    for (j = i + 3; j < len; j++)
                                                    {
                                                        int hexVal = ToHexValue(html[j]);
                                                        if (hexVal == -1)
                                                        {
                                                            // skip one more char if currently on semicolon
                                                            if (html[j] == ';')
                                                                semicolonTerminated = true;
                                                            break;
                                                        }
                                                        charVal *= 16;
                                                        charVal += hexVal;
                                                    }
                                                    if (semicolonTerminated && charVal != 0)
                                                    {
                                                        i = j;
                                                        output.Append((char)charVal);
                                                        continue;
                                                    }
                                                    // if total is 0, continue
                                                    break;
                                                }
                                            case '0':
                                            case '1':
                                            case '2':
                                            case '3':
                                            case '4':
                                            case '5':
                                            case '6':
                                            case '7':
                                            case '8':
                                            case '9':
                                                {
                                                    // do decimal match

                                                    int charVal = 0;
                                                    int j;
                                                    for (j = i + 2; j < len; j++)
                                                    {
                                                        char c = html[j];
                                                        if (c < '0' || c > '9')
                                                        {
                                                            if (c == ';')
                                                                ++j;
                                                            break;
                                                        }

                                                        int cVal = c - '0';
                                                        charVal *= 10;
                                                        charVal += cVal;
                                                    }
                                                    if (charVal != 0)
                                                    {
                                                        i = j - 1;
                                                        output.Append((char)charVal);
                                                        continue;
                                                    }
                                                    // if total is 0, continue
                                                    break;
                                                }
                                        }
                                    }
                                    break;
                                }
                            default:
                                {
                                    int j;
                                    int end = Math.Min(len, i + 12);
                                    for (j = i + 1; j < end; j++)
                                    {
                                        char c = html[j];
                                        if (c == ';' || (!(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z') && !(c >= '0' && c <= '9')))
                                        {
                                            break;
                                        }
                                    }

                                    string entityRef = html.Substring(i + 1, j - (i + 1));

                                    if (unEscapeMode != UnEscapeMode.Attribute)
                                    {
                                        // k = number of characters in entityRef that we are using
                                        int k, code = -1;
                                        for (k = 1; k < entityRef.Length; k++)
                                        {
                                            if (-1 != (code = EntityEscaper.Code(entityRef.Substring(0, k), true)))
                                                break;
                                        }

                                        if (code == -1)
                                        {
                                            code = EntityEscaper.Code(entityRef, false);
                                        }

                                        if (code != -1)
                                        {
                                            output.Append((char)code);
                                            i += 1 + k;
                                            if (i < end && html[i] == ';')
                                                ++i;
                                            --i;
                                            continue;
                                        }
                                    }
                                    else
                                    {
                                        int code = EntityEscaper.Code(entityRef, false);
                                        if (code != -1)
                                        {
                                            output.Append((char)code);
                                            i += 1 + entityRef.Length;
                                            if (i < end && html[i] == ';')
                                                ++i;
                                            --i;
                                            continue;
                                        }
                                    }

                                    break;
                                }
                        }
                    }
                }
                output.Append(c0);
            }
            return output.ToString();
        }