예제 #1
0
        public static string HtmlDecode(string value)
        {
            if (value == null)
            {
                throw new ArgumentNullException("value");
            }

            var sb = new StringBuilder();

            for (int i = 0; i < value.Length; i++)
            {
                if (value[i] == '&' && value.Length > i + 2)
                {
                    // Scan for the ;.

                    int maxSearch   = Math.Min(value.Length, i + _longestHtmlEntity + 2);
                    int endPosition = -1;

                    for (int j = i + 1; j < maxSearch; j++)
                    {
                        if (value[j] == ';')
                        {
                            endPosition = j;
                            break;
                        }
                    }

                    // If we did not find an end separator, just skip over this
                    // entity and treat is at text.

                    if (endPosition == -1)
                    {
                        sb.Append(value[i]);
                        continue;
                    }

                    // Are we in a numeric separator?

                    if (value[i + 1] == '#')
                    {
                        int offset = 2;

                        bool isHexNumeric = false;

                        if (value[i + 2] == 'x' || value[i + 2] == 'X')
                        {
                            isHexNumeric = true;
                            offset++;
                        }

                        // All parts of the numeric separator must be digits.

                        bool isNumeric = true;

                        for (int j = i + offset; j < endPosition; j++)
                        {
                            if (!(
                                    Char.IsDigit(value[j]) ||
                                    (isHexNumeric && HttpUtil.IsHex(value[j]))
                                    ))
                            {
                                isNumeric = false;
                                break;
                            }
                        }

                        // If not all numeric, just skip over this
                        // entity and treat is at text.

                        if (!isNumeric)
                        {
                            sb.Append(value[i]);
                            continue;
                        }

                        // Convert the numeric entity to unicode.

                        string numericEntity = value.Substring(i + offset, endPosition - (i + offset));

                        sb.Append((char)int.Parse(numericEntity, isHexNumeric ? NumberStyles.HexNumber : NumberStyles.Integer));

                        i = endPosition;
                    }
                    else
                    {
                        string entity = value.Substring(i + 1, endPosition - (i + 1));

                        int codePoint;

                        if (_htmlEntitiesByEntity.TryGetValue(entity, out codePoint))
                        {
                            sb.Append((char)codePoint);

                            i = endPosition;
                        }
                        else
                        {
                            // If we don't know the entity, just skip over this
                            // entity and treat is at text.

                            sb.Append(value[i]);
                        }
                    }
                }
                else
                {
                    sb.Append(value[i]);
                }
            }

            return(sb.ToString());
        }