Example #1
0
        public DefaultHtmlEncoder(CodePointFilter filter)
        {
            if (filter == null)
            {
                throw new ArgumentNullException("filter");
            }

            _allowedCharacters = filter.GetAllowedCharacters();

            // Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed
            // (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
            _allowedCharacters.ForbidUndefinedCharacters();

            ForbidHtmlCharacters(_allowedCharacters);
        }
Example #2
0
        public DefaultHtmlEncoder(CodePointFilter filter)
        {
            if (filter == null)
            {
                throw new ArgumentNullException("filter");
            }

            _allowedCharacters = filter.GetAllowedCharacters();

            // Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed
            // (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
            _allowedCharacters.ForbidUndefinedCharacters();

            ForbidHtmlCharacters(_allowedCharacters);
        }
Example #3
0
        /// <summary>
        /// Allows all characters specified by <paramref name="filter"/> through the filter.
        /// </summary>
        public virtual void AllowFilter(CodePointFilter filter)
        {
            if (filter == null)
            {
                throw new ArgumentNullException("filter");
            }

            foreach (var allowedCodePoint in filter.GetAllowedCodePoints())
            {
                // If the code point can't be represented as a BMP character, skip it.
                char codePointAsChar = (char)allowedCodePoint;
                if (allowedCodePoint == codePointAsChar)
                {
                    _allowedCharactersBitmap.AllowCharacter(codePointAsChar);
                }
            }
        }
Example #4
0
        /// <summary>
        /// Allows all characters specified by <paramref name="filter"/> through the filter.
        /// </summary>
        public virtual void AllowFilter(CodePointFilter filter)
        {
            if (filter == null)
            {
                throw new ArgumentNullException("filter");
            }

            foreach (var allowedCodePoint in filter.GetAllowedCodePoints())
            {
                // If the code point can't be represented as a BMP character, skip it.
                char codePointAsChar = (char)allowedCodePoint;
                if (allowedCodePoint == codePointAsChar)
                {
                    _allowedCharactersBitmap.AllowCharacter(codePointAsChar);
                }
            }
        }
Example #5
0
        public DefaultJavaScriptEncoder(CodePointFilter filter)
        {
            _allowedCharsBitmap = filter.GetAllowedCharsBitmap();

            // Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed
            // (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
            _allowedCharsBitmap.ForbidUndefinedCharacters();

            // Forbid characters that are special in HTML.
            // Even though this is a not HTML encoder,
            // it's unfortunately common for developers to
            // forget to HTML-encode a string once it has been JS-encoded,
            // so this offers extra protection.
            DefaultHtmlEncoder.ForbidHtmlCharacters(_allowedCharsBitmap);

            _allowedCharsBitmap.ForbidCharacter('\\');
            _allowedCharsBitmap.ForbidCharacter('/');
        }
Example #6
0
        /// <summary>
        /// Instantiates an encoder using a custom allow list of characters.
        /// </summary>
        protected UnicodeEncoderBase(CodePointFilter filter, int maxOutputCharsPerInputChar)
        {
            _maxOutputCharsPerInputChar = maxOutputCharsPerInputChar;
            _allowedCharsBitmap         = filter.GetAllowedCharsBitmap();

            // Forbid characters that are special in HTML.
            // Even though this is a common encoder used by everybody (including URL
            // and JavaScript strings), it's unfortunately common for developers to
            // forget to HTML-encode a string once it has been URL-encoded or
            // JavaScript string-escaped, so this offers extra protection.
            ForbidCharacter('<');
            ForbidCharacter('>');
            ForbidCharacter('&');
            ForbidCharacter('\''); // can be used to escape attributes
            ForbidCharacter('\"'); // can be used to escape attributes
            ForbidCharacter('+');  // technically not HTML-specific, but can be used to perform UTF7-based attacks

            // Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed
            // (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
            _allowedCharsBitmap.ForbidUndefinedCharacters();
        }
        public DefaultJavaScriptEncoder(CodePointFilter filter)
        {
            if (filter == null)
            {
                throw new ArgumentNullException("filter");
            }

            _allowedCharacters = filter.GetAllowedCharacters();

            // Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed
            // (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
            _allowedCharacters.ForbidUndefinedCharacters();

            // Forbid characters that are special in HTML.
            // Even though this is a not HTML encoder, 
            // it's unfortunately common for developers to
            // forget to HTML-encode a string once it has been JS-encoded,
            // so this offers extra protection.
            DefaultHtmlEncoder.ForbidHtmlCharacters(_allowedCharacters);

            _allowedCharacters.ForbidCharacter('\\');
            _allowedCharacters.ForbidCharacter('/');
        }
Example #8
0
 /// <summary>
 /// Wraps the provided filter as a CodePointFilter, avoiding the clone if possible.
 /// </summary>
 internal static CodePointFilter Wrap(CodePointFilter filter)
 {
     return((filter as CodePointFilter) ?? new CodePointFilter(filter));
 }
Example #9
0
 public static JavaScriptEncoder Create(CodePointFilter filter)
 {
     return(new DefaultJavaScriptEncoder(filter));
 }
Example #10
0
        public DefaultUrlEncoder(CodePointFilter filter)
        {
            if (filter == null)
            {
                throw new ArgumentNullException("filter");
            }

            _allowedCharacters = filter.GetAllowedCharacters();

            // Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed
            // (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
            _allowedCharacters.ForbidUndefinedCharacters();

            // Forbid characters that are special in HTML.
            // Even though this is a not HTML encoder,
            // it's unfortunately common for developers to
            // forget to HTML-encode a string once it has been URL-encoded,
            // so this offers extra protection.
            DefaultHtmlEncoder.ForbidHtmlCharacters(_allowedCharacters);

            // Per RFC 3987, Sec. 2.2, we want encodings that are safe for
            // four particular components: 'isegment', 'ipath-noscheme',
            // 'iquery', and 'ifragment'. The relevant definitions are below.
            //
            //    ipath-noscheme = isegment-nz-nc *( "/" isegment )
            //
            //    isegment       = *ipchar
            //
            //    isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
            //                         / "@" )
            //                   ; non-zero-length segment without any colon ":"
            //
            //    ipchar         = iunreserved / pct-encoded / sub-delims / ":"
            //                   / "@"
            //
            //    iquery         = *( ipchar / iprivate / "/" / "?" )
            //
            //    ifragment      = *( ipchar / "/" / "?" )
            //
            //    iunreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
            //
            //    ucschar        = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
            //                   / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
            //                   / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
            //                   / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
            //                   / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
            //                   / %xD0000-DFFFD / %xE1000-EFFFD
            //
            //    pct-encoded    = "%" HEXDIG HEXDIG
            //
            //    sub-delims     = "!" / "$" / "&" / "'" / "(" / ")"
            //                   / "*" / "+" / "," / ";" / "="
            //
            // The only common characters between these four components are the
            // intersection of 'isegment-nz-nc' and 'ipchar', which is really
            // just 'isegment-nz-nc' (colons forbidden).
            //
            // From this list, the base encoder already forbids "&", "'", "+",
            // and we'll additionally forbid "=" since it has special meaning
            // in x-www-form-urlencoded representations.
            //
            // This means that the full list of allowed characters from the
            // Basic Latin set is:
            // ALPHA / DIGIT / "-" / "." / "_" / "~" / "!" / "$" / "(" / ")" / "*" / "," / ";" / "@"

            const string forbiddenChars = @" #%/:=?[\]^`{|}"; // chars from Basic Latin which aren't already disallowed by the base encoder

            foreach (char character in forbiddenChars)
            {
                _allowedCharacters.ForbidCharacter(character);
            }

            // Specials (U+FFF0 .. U+FFFF) are forbidden by the definition of 'ucschar' above
            for (int i = 0; i < 16; i++)
            {
                _allowedCharacters.ForbidCharacter((char)(0xFFF0 | i));
            }
        }
Example #11
0
 public static UrlEncoder Create(CodePointFilter filter)
 {
     return new DefaultUrlEncoder(filter);
 }
Example #12
0
 public static HtmlEncoder Create(CodePointFilter filter)
 {
     return(new DefaultHtmlEncoder(filter));
 }
 public static JavaScriptEncoder Create(CodePointFilter filter)
 {
     return new DefaultJavaScriptEncoder(filter);
 }
Example #14
0
 /// <summary>
 /// Instantiates an encoder using a custom code point filter. Any character not in the
 /// set returned by <paramref name="filter"/>'s <see cref="ICodePointFilter.GetAllowedCodePoints"/>
 /// method will be escaped.
 /// </summary>
 public UrlEncoder(ICodePointFilter filter)
     : this(new UrlUnicodeEncoder(CodePointFilter.Wrap(filter)))
 {
 }
Example #15
0
            internal UrlUnicodeEncoder(CodePointFilter filter)
                : base(filter, MaxOutputCharsPerInputChar)
            {
                // Per RFC 3987, Sec. 2.2, we want encodings that are safe for
                // four particular components: 'isegment', 'ipath-noscheme',
                // 'iquery', and 'ifragment'. The relevant definitions are below.
                //
                //    ipath-noscheme = isegment-nz-nc *( "/" isegment )
                //
                //    isegment       = *ipchar
                //
                //    isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
                //                         / "@" )
                //                   ; non-zero-length segment without any colon ":"
                //
                //    ipchar         = iunreserved / pct-encoded / sub-delims / ":"
                //                   / "@"
                //
                //    iquery         = *( ipchar / iprivate / "/" / "?" )
                //
                //    ifragment      = *( ipchar / "/" / "?" )
                //
                //    iunreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
                //
                //    ucschar        = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
                //                   / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
                //                   / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
                //                   / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
                //                   / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
                //                   / %xD0000-DFFFD / %xE1000-EFFFD
                //
                //    pct-encoded    = "%" HEXDIG HEXDIG
                //
                //    sub-delims     = "!" / "$" / "&" / "'" / "(" / ")"
                //                   / "*" / "+" / "," / ";" / "="
                //
                // The only common characters between these four components are the
                // intersection of 'isegment-nz-nc' and 'ipchar', which is really
                // just 'isegment-nz-nc' (colons forbidden).
                //
                // From this list, the base encoder already forbids "&", "'", "+",
                // and we'll additionally forbid "=" since it has special meaning
                // in x-www-form-urlencoded representations.
                //
                // This means that the full list of allowed characters from the
                // Basic Latin set is:
                // ALPHA / DIGIT / "-" / "." / "_" / "~" / "!" / "$" / "(" / ")" / "*" / "," / ";" / "@"

                const string forbiddenChars = @" #%/:=?[\]^`{|}"; // chars from Basic Latin which aren't already disallowed by the base encoder

                foreach (char c in forbiddenChars)
                {
                    ForbidCharacter(c);
                }

                // Specials (U+FFF0 .. U+FFFF) are forbidden by the definition of 'ucschar' above
                for (int i = 0; i < 16; i++)
                {
                    ForbidCharacter((char)(0xFFF0 | i));
                }

                // Supplementary characters are forbidden anyway by the base encoder
            }
Example #16
0
        public DefaultUrlEncoder(CodePointFilter filter)
        {
            if (filter == null)
            {
                throw new ArgumentNullException("filter");
            }

            _allowedCharacters = filter.GetAllowedCharacters();

            // Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed
            // (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
            _allowedCharacters.ForbidUndefinedCharacters();

            // Forbid characters that are special in HTML.
            // Even though this is a not HTML encoder, 
            // it's unfortunately common for developers to
            // forget to HTML-encode a string once it has been URL-encoded,
            // so this offers extra protection.
            DefaultHtmlEncoder.ForbidHtmlCharacters(_allowedCharacters);

            // Per RFC 3987, Sec. 2.2, we want encodings that are safe for
            // four particular components: 'isegment', 'ipath-noscheme',
            // 'iquery', and 'ifragment'. The relevant definitions are below.
            //
            //    ipath-noscheme = isegment-nz-nc *( "/" isegment )
            // 
            //    isegment       = *ipchar
            // 
            //    isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
            //                         / "@" )
            //                   ; non-zero-length segment without any colon ":"
            //
            //    ipchar         = iunreserved / pct-encoded / sub-delims / ":"
            //                   / "@"
            // 
            //    iquery         = *( ipchar / iprivate / "/" / "?" )
            // 
            //    ifragment      = *( ipchar / "/" / "?" )
            // 
            //    iunreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
            // 
            //    ucschar        = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
            //                   / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
            //                   / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
            //                   / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
            //                   / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
            //                   / %xD0000-DFFFD / %xE1000-EFFFD
            // 
            //    pct-encoded    = "%" HEXDIG HEXDIG
            // 
            //    sub-delims     = "!" / "$" / "&" / "'" / "(" / ")"
            //                   / "*" / "+" / "," / ";" / "="
            //
            // The only common characters between these four components are the
            // intersection of 'isegment-nz-nc' and 'ipchar', which is really
            // just 'isegment-nz-nc' (colons forbidden).
            // 
            // From this list, the base encoder already forbids "&", "'", "+",
            // and we'll additionally forbid "=" since it has special meaning
            // in x-www-form-urlencoded representations.
            //
            // This means that the full list of allowed characters from the
            // Basic Latin set is:
            // ALPHA / DIGIT / "-" / "." / "_" / "~" / "!" / "$" / "(" / ")" / "*" / "," / ";" / "@"

            const string forbiddenChars = @" #%/:=?[\]^`{|}"; // chars from Basic Latin which aren't already disallowed by the base encoder
            foreach (char character in forbiddenChars)
            {
                _allowedCharacters.ForbidCharacter(character);
            }

            // Specials (U+FFF0 .. U+FFFF) are forbidden by the definition of 'ucschar' above
            for (int i = 0; i < 16; i++)
            {
                _allowedCharacters.ForbidCharacter((char)(0xFFF0 | i));
            }
        }
Example #17
0
 /// <summary>
 /// Instantiates the filter by cloning the allow list of another <see cref="CodePointFilter"/>.
 /// </summary>
 public CodePointFilter(CodePointFilter other)
 {
     _allowedCharactersBitmap = AllowedCharactersBitmap.CreateNew();
     AllowFilter(other);
 }
Example #18
0
 internal HtmlUnicodeEncoder(CodePointFilter filter)
     : base(filter, MaxOutputCharsPerInputChar)
 {
 }
Example #19
0
 /// <summary>
 /// Wraps the provided filter as a CodePointFilter, avoiding the clone if possible.
 /// </summary>
 internal static CodePointFilter Wrap(CodePointFilter filter)
 {
     return (filter as CodePointFilter) ?? new CodePointFilter(filter);
 }
Example #20
0
 /// <summary>
 /// Instantiates an encoder using a custom code point filter. Any character not in the
 /// set returned by <paramref name="filter"/>'s <see cref="ICodePointFilter.GetAllowedCodePoints"/>
 /// method will be escaped.
 /// </summary>
 public HtmlEncoder(ICodePointFilter filter)
     : this(new HtmlUnicodeEncoder(CodePointFilter.Wrap(filter)))
 {
 }
Example #21
0
 /// <summary>
 /// Instantiates an encoder using a custom code point filter. Any character not in the
 /// set returned by <paramref name="filter"/>'s <see cref="ICodePointFilter.GetAllowedCodePoints"/>
 /// method will be escaped.
 /// </summary>
 public JavaScriptStringEncoder(ICodePointFilter filter)
     : this(new JavaScriptStringUnicodeEncoder(CodePointFilter.Wrap(filter)))
 {
 }
Example #22
0
 /// <summary>
 /// Instantiates the filter by cloning the allow list of another <see cref="CodePointFilter"/>.
 /// </summary>
 public CodePointFilter(CodePointFilter other)
 {
     _allowedCharactersBitmap = AllowedCharactersBitmap.CreateNew();
     AllowFilter(other);
 }