Exemplo n.º 1
0
        // this method is ugly, and does a lot. but other breakups cause rescanning and stringbuilder generations
        internal static void Escape(StringBuilder accum, string @string, EscapeMode escapeMode, Encoding charset, bool inAttribute, bool normaliseWhite, bool stripLeadingWhite)
        {
            bool lastWasWhite = false;
            bool reachedNonWhite = false;
            CharsetEncoder encoder = new CharsetEncoder(charset);
            IDictionary<Utf32, string> map = GetMap(escapeMode);
            int length = @string.Length;
            char c = default(char);
            for (int offset = 0; offset < length; offset += (char.IsSurrogate(c) ? 2 : 1))
            {
                c = @string[offset];
                if (normaliseWhite)
                {
                    if (StringUtil.IsWhitespace(c))
                    {
                        if ((stripLeadingWhite && !reachedNonWhite) || lastWasWhite)
                        {
                            continue;
                        }
                        accum.Append(' ');
                        lastWasWhite = true;
                        continue;
                    }
                    else
                    {
                        lastWasWhite = false;
                        reachedNonWhite = true;
                    }
                }

                if (!char.IsSurrogate(c))
                {
                    // split implementation for efficiency on single char common case (saves creating strings, char[]):
                    switch (c)
                    {
                        case '&':
                            // html specific and required escapes:
                            accum.Append("&amp;");
                            continue;

                        case '\u00A0':
                            if (escapeMode != EscapeMode.Xhtml)
                            {
                                accum.Append("&nbsp;");
                            }
                            else
                            {
                                accum.Append(c);
                            }
                            continue;

                        case '<':
                            if (!inAttribute)
                            {
                                accum.Append("&lt;");
                            }
                            else
                            {
                                accum.Append(c);
                            }
                            continue;

                        case '>':
                            if (!inAttribute)
                            {
                                accum.Append("&gt;");
                            }
                            else
                            {
                                accum.Append(c);
                            }
                            continue;

                        case '"':
                            if (inAttribute)
                            {
                                accum.Append("&quot;");
                            }
                            else
                            {
                                accum.Append(c);
                            }
                            continue;

                        default:
                            break;
                    }
                }

                var chars = char.IsSurrogate(c)
                    ? new[] { c, @string[offset + 1] }
                    : new[] { c };
                if (encoder.CanEncode(chars))
                {
                    accum.Append(chars);
                    continue;
                }

                Utf32 codePoint = char.ConvertToUtf32(@string, offset);
                if (map.ContainsKey(codePoint))
                {
                    accum.Append('&').Append(map[codePoint]).Append(';');
                }
                else
                {
                    accum.Append("&#x").AppendFormat("{0:x}", codePoint).Append(';');
                }
            }
        }
Exemplo n.º 2
0
        // this method is ugly, and does a lot. but other breakups cause rescanning and stringbuilder generations
        internal static void Escape(StringBuilder accum, string @string, EscapeMode escapeMode, Encoding charset, bool inAttribute, bool normaliseWhite, bool stripLeadingWhite)
        {
            bool           lastWasWhite     = false;
            bool           reachedNonWhite  = false;
            CharsetEncoder encoder          = new CharsetEncoder(charset);
            IDictionary <Utf32, string> map = GetMap(escapeMode);
            int  length = @string.Length;
            char c      = default(char);

            for (int offset = 0; offset < length; offset += (char.IsSurrogate(c) ? 2 : 1))
            {
                c = @string[offset];
                if (normaliseWhite)
                {
                    if (StringUtil.IsWhitespace(c))
                    {
                        if ((stripLeadingWhite && !reachedNonWhite) || lastWasWhite)
                        {
                            continue;
                        }
                        accum.Append(' ');
                        lastWasWhite = true;
                        continue;
                    }
                    else
                    {
                        lastWasWhite    = false;
                        reachedNonWhite = true;
                    }
                }

                if (!char.IsSurrogate(c))
                {
                    // split implementation for efficiency on single char common case (saves creating strings, char[]):
                    switch (c)
                    {
                    case '&':
                        // html specific and required escapes:
                        accum.Append("&amp;");
                        continue;

                    case '\u00A0':
                        if (escapeMode != EscapeMode.Xhtml)
                        {
                            accum.Append("&nbsp;");
                        }
                        else
                        {
                            accum.Append(c);
                        }
                        continue;

                    case '<':
                        if (!inAttribute)
                        {
                            accum.Append("&lt;");
                        }
                        else
                        {
                            accum.Append(c);
                        }
                        continue;

                    case '>':
                        if (!inAttribute)
                        {
                            accum.Append("&gt;");
                        }
                        else
                        {
                            accum.Append(c);
                        }
                        continue;

                    case '"':
                        if (inAttribute)
                        {
                            accum.Append("&quot;");
                        }
                        else
                        {
                            accum.Append(c);
                        }
                        continue;

                    default:
                        break;
                    }
                }

                var chars = char.IsSurrogate(c)
                    ? new[] { c, @string[offset + 1] }
                    : new[] { c };
                if (encoder.CanEncode(chars))
                {
                    accum.Append(chars);
                    continue;
                }

                Utf32 codePoint = char.ConvertToUtf32(@string, offset);
                if (map.ContainsKey(codePoint))
                {
                    accum.Append('&').Append(map[codePoint]).Append(';');
                }
                else
                {
                    accum.Append("&#x").AppendFormat("{0:x}", codePoint).Append(';');
                }
            }
        }