Beispiel #1
0
        internal DefaultHtmlEncoder(TextEncoderSettings settings)
        {
            if (settings is null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.settings);
            }

            _innerEncoder = new OptimizedInboxTextEncoder(EscaperImplementation.Singleton, settings.GetAllowedCodePointsBitmap());
        }
Beispiel #2
0
        private DefaultJavaScriptEncoder(TextEncoderSettings settings, bool allowMinimalJsonEscaping)
        {
            if (settings is null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.settings);
            }

            // '\' (U+005C REVERSE SOLIDUS) must always be escaped in Javascript / ECMAScript / JSON.
            // '/' (U+002F SOLIDUS) is not Javascript / ECMAScript / JSON-sensitive so doesn't need to be escaped.
            // '`' (U+0060 GRAVE ACCENT) is ECMAScript-sensitive (see ECMA-262).

            _innerEncoder = allowMinimalJsonEscaping
                ? new OptimizedInboxTextEncoder(EscaperImplementation.SingletonMinimallyEscaped, settings.GetAllowedCodePointsBitmap(), forbidHtmlSensitiveCharacters: false,
                                                extraCharactersToEscape: stackalloc char[] { '\"', '\\' })
Beispiel #3
0
        internal DefaultUrlEncoder(TextEncoderSettings settings)
        {
            if (settings is null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.settings);
            }

            // Per RFC 3987, Sec. 2.2, we want encodings that are safe for
            // four particular components: 'isegment', 'ipath-noscheme',
            // 'iquery', and 'ifragment'. The relevant definitions are below.
            //
            //    ipath-noscheme = isegment-nz-nc *( "/" isegment )
            //
            //    isegment       = *ipchar
            //
            //    isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
            //                         / "@" )
            //                   ; non-zero-length segment without any colon ":"
            //
            //    ipchar         = iunreserved / pct-encoded / sub-delims / ":"
            //                   / "@"
            //
            //    iquery         = *( ipchar / iprivate / "/" / "?" )
            //
            //    ifragment      = *( ipchar / "/" / "?" )
            //
            //    iunreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
            //
            //    ucschar        = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
            //                   / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
            //                   / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
            //                   / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
            //                   / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
            //                   / %xD0000-DFFFD / %xE1000-EFFFD
            //
            //    pct-encoded    = "%" HEXDIG HEXDIG
            //
            //    sub-delims     = "!" / "$" / "&" / "'" / "(" / ")"
            //                   / "*" / "+" / "," / ";" / "="
            //
            // The only common characters between these four components are the
            // intersection of 'isegment-nz-nc' and 'ipchar', which is really
            // just 'isegment-nz-nc' (colons forbidden).
            //
            // From this list, the base encoder already forbids "&", "'", "+",
            // and we'll additionally forbid "=" since it has special meaning
            // in x-www-form-urlencoded representations.
            //
            // This means that the full list of allowed characters from the
            // Basic Latin set is:
            // ALPHA / DIGIT / "-" / "." / "_" / "~" / "!" / "$" / "(" / ")" / "*" / "," / ";" / "@"

            _innerEncoder = new OptimizedInboxTextEncoder(EscaperImplementation.Singleton, settings.GetAllowedCodePointsBitmap(), extraCharactersToEscape: stackalloc char[] {
                ' ', // chars from Basic Latin which aren't already disallowed by the base encoder
                '#',
                '%',
                '/',
                ':',
                '=',
                '?',
                '[',
                '\\',
                ']',
                '^',
                '`',
                '{',
                '|',
                '}',
                '\uFFF0', // specials (U+FFF0 .. U+FFFF) are forbidden by the definition of 'ucschar' above
                '\uFFF1',
                '\uFFF2',
                '\uFFF3',
                '\uFFF4',
                '\uFFF5',
                '\uFFF6',
                '\uFFF7',
                '\uFFF8',
                '\uFFF9',
                '\uFFFA',
                '\uFFFB',
                '\uFFFC',
                '\uFFFD',
                '\uFFFE',
                '\uFFFF',
            });