Example #1
0
        public static void HtmlDecode(string value, TextWriter output)
        {
            if (value == null)
            {
                return;
            }
            if (output == null)
            {
                throw new ArgumentNullException("output");
            }

            if (!StringRequiresHtmlDecoding(value))
            {
                output.Write(value);        // good as is
                return;
            }

            UnicodeDecodingConformance decodeConformance = HtmlDecodeConformance;
            int l = value.Length;

            for (int i = 0; i < l; i++)
            {
                char ch = value[i];

                if (ch == '&')
                {
                    // We found a '&'. Now look for the next ';' or '&'. The idea is that
                    // if we find another '&' before finding a ';', then this is not an entity,
                    // and the next '&' might start a real entity (VSWhidbey 275184)
                    int index = value.IndexOfAny(_htmlEntityEndingChars, i + 1);
                    if (index > 0 && value[index] == ';')
                    {
                        string entity = value.Substring(i + 1, index - i - 1);

                        if (entity.Length > 1 && entity[0] == '#')
                        {
                            // The # syntax can be in decimal or hex, e.g.
                            //      &#229;  --> decimal
                            //      &#xE5;  --> same char in hex
                            // See http://www.w3.org/TR/REC-html40/charset.html#entities

                            bool parsedSuccessfully;
                            uint parsedValue;
                            if (entity[1] == 'x' || entity[1] == 'X')
                            {
                                parsedSuccessfully = UInt32.TryParse(entity.Substring(2), NumberStyles.AllowHexSpecifier, NumberFormatInfo.InvariantInfo, out parsedValue);
                            }
                            else
                            {
                                parsedSuccessfully = UInt32.TryParse(entity.Substring(1), NumberStyles.Integer, NumberFormatInfo.InvariantInfo, out parsedValue);
                            }

                            if (parsedSuccessfully)
                            {
                                switch (decodeConformance)
                                {
                                case UnicodeDecodingConformance.Strict:
                                    // decoded character must be U+0000 .. U+10FFFF, excluding surrogates
                                    parsedSuccessfully = ((parsedValue < HIGH_SURROGATE_START) || (LOW_SURROGATE_END < parsedValue && parsedValue <= UNICODE_PLANE16_END));
                                    break;

                                case UnicodeDecodingConformance.Compat:
                                    // decoded character must be U+0001 .. U+FFFF
                                    // null chars disallowed for compat with 4.0
                                    parsedSuccessfully = (0 < parsedValue && parsedValue <= UNICODE_PLANE00_END);
                                    break;

                                case UnicodeDecodingConformance.Loose:
                                    // decoded character must be U+0000 .. U+10FFFF
                                    parsedSuccessfully = (parsedValue <= UNICODE_PLANE16_END);
                                    break;

                                default:
                                    Debug.Assert(false, "Should never get here!");
                                    parsedSuccessfully = false;
                                    break;
                                }
                            }

                            if (parsedSuccessfully)
                            {
                                if (parsedValue <= UNICODE_PLANE00_END)
                                {
                                    // single character
                                    output.Write((char)parsedValue);
                                }
                                else
                                {
                                    // multi-character
                                    char leadingSurrogate, trailingSurrogate;
                                    ConvertSmpToUtf16(parsedValue, out leadingSurrogate, out trailingSurrogate);
                                    output.Write(leadingSurrogate);
                                    output.Write(trailingSurrogate);
                                }

                                i = index; // already looked at everything until semicolon
                                continue;
                            }
                        }
                        else
                        {
                            i = index; // already looked at everything until semicolon

                            char entityChar = HtmlEntities.Lookup(entity);
                            if (entityChar != (char)0)
                            {
                                ch = entityChar;
                            }
                            else
                            {
                                output.Write('&');
                                output.Write(entity);
                                output.Write(';');
                                continue;
                            }
                        }
                    }
                }

                output.Write(ch);
            }
        }
Example #2
0
 static WebUtility()
 {
     s_htmlDecodeConformance = UnicodeDecodingConformance.Strict;
     s_htmlEncodeConformance = UnicodeEncodingConformance.Strict;
 }
Example #3
0
 static WebUtility()
 {
     s_htmlDecodeConformance = UnicodeDecodingConformance.Strict;
     s_htmlEncodeConformance = UnicodeEncodingConformance.Strict;
 }