public void Ctor_WithUnicodeRanges()
        {
            // Arrange
            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(new CodePointFilter(UnicodeRanges.Latin1Supplement, UnicodeRanges.MiscellaneousSymbols));

            // Act & assert
            Assert.Equal("[U+0061]", encoder.Encode("a"));
            Assert.Equal("\u00E9", encoder.Encode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
            Assert.Equal("\u2601", encoder.Encode("\u2601" /* CLOUD */));
        }
        public void Encode_AllRangesAllowed_StillEncodesForbiddenChars_Simple()
        {
            // Arrange
            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
            const string input = "Hello <>&\'\"+ there!";
            const string expected = "Hello [U+003C][U+003E][U+0026][U+0027][U+0022][U+002B] there!";

            // Act & assert
            Assert.Equal(expected, encoder.Encode(input));
        }
Exemple #3
0
        public void Encode_CharArray_SomeCharsValid()
        {
            // Arrange
            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
            StringWriter             output  = new StringWriter();

            // Act
            encoder.Encode("abc&xyz".ToCharArray(), 2, 3, output);

            // Assert
            Assert.Equal("c[U+0026]x", output.ToString());
        }
Exemple #4
0
        public void Encode_CharArray_AllCharsInvalid()
        {
            // Arrange
            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
            StringWriter             output  = new StringWriter();

            // Act
            encoder.Encode("abc&xyz".ToCharArray(), 4, 2, output);

            // Assert
            Assert.Equal("[U+0078][U+0079]", output.ToString());
        }
        public void Encode_StringSubstring_AllCharsValid()
        {
            // Arrange
            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
            StringWriter             output  = new StringWriter();

            // Act
            encoder.Encode("abc&xyz", 4, 2, output);

            // Assert
            Assert.Equal("xy", output.ToString());
        }
        public void Encode_StringSubstring_ZeroCount_DoesNotCallIntoTextWriter()
        {
            // Arrange
            var encoder = new CustomUnicodeEncoderBase();
            var output  = new StringWriter();

            output.Dispose(); // Throws ODE if written to.

            // Act
            encoder.Encode("abc", 2, 0, output);

            // Assert
            // If we got this far (without TextWriter throwing), success!
        }
        public void Encode_StringSubstring_ParameterChecking_NegativeTestCases()
        {
            // Arrange
            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();

            // Act & assert
            Assert.Throws <ArgumentNullException>(() => encoder.Encode((string)null, 0, 0, new StringWriter()));
            Assert.Throws <ArgumentNullException>(() => encoder.Encode("abc", 0, 3, null));
            Assert.Throws <ArgumentOutOfRangeException>(() => encoder.Encode("abc", -1, 2, new StringWriter()));
            Assert.Throws <ArgumentOutOfRangeException>(() => encoder.Encode("abc", 2, 2, new StringWriter()));
            Assert.Throws <ArgumentOutOfRangeException>(() => encoder.Encode("abc", 4, 0, new StringWriter()));
            Assert.Throws <ArgumentOutOfRangeException>(() => encoder.Encode("abc", 2, -1, new StringWriter()));
            Assert.Throws <ArgumentOutOfRangeException>(() => encoder.Encode("abc", 1, 3, new StringWriter()));
        }
        public void Encode_StringSubstring_EntireString_SomeCharsValid()
        {
            // Arrange
            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
            StringWriter             output  = new StringWriter();

            // Act
            const string input = "abc&xyz";

            encoder.Encode(input, 0, input.Length, output);

            // Assert
            Assert.Equal("abc[U+0026]xyz", output.ToString());
        }
        public void Encode_BadSurrogates_ReturnsUnicodeReplacementChar()
        {
            // Arrange
            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All); // allow all codepoints

            // "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
            const string input    = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
            const string expected = "a\uFFFDb\uFFFDc\uFFFD\uFFFDd\uFFFD[U+103FF]e\uFFFD";

            // Act
            string retVal = encoder.Encode(input);

            // Assert
            Assert.Equal(expected, retVal);
        }
Exemple #10
0
        public void Ctor_WithCustomFilters()
        {
            // Arrange
            var filter = new CodePointFilter().AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(filter);

            // Act & assert
            Assert.Equal("a", encoder.Encode("a"));
            Assert.Equal("b", encoder.Encode("b"));
            Assert.Equal("[U+0063]", encoder.Encode("c"));
            Assert.Equal("d", encoder.Encode("d"));
            Assert.Equal("[U+0000]", encoder.Encode("\0"));     // we still always encode control chars
            Assert.Equal("[U+0026]", encoder.Encode("&"));      // we still always encode HTML-special chars
            Assert.Equal("[U+FFFF]", encoder.Encode("\uFFFF")); // we still always encode non-chars and other forbidden chars
        }
        public void Ctor_WithCustomFilters()
        {
            // Arrange
            var filter = new CodePointFilter().AllowCharacters("ab").AllowCharacters('\0', '&', '\uFFFF', 'd');
            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(filter);

            // Act & assert
            Assert.Equal("a", encoder.Encode("a"));
            Assert.Equal("b", encoder.Encode("b"));
            Assert.Equal("[U+0063]", encoder.Encode("c"));
            Assert.Equal("d", encoder.Encode("d"));
            Assert.Equal("[U+0000]", encoder.Encode("\0")); // we still always encode control chars
            Assert.Equal("[U+0026]", encoder.Encode("&")); // we still always encode HTML-special chars
            Assert.Equal("[U+FFFF]", encoder.Encode("\uFFFF")); // we still always encode non-chars and other forbidden chars
        }
        public void Encode_CharArray_AllCharsValid()
        {
            // Arrange
            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
            StringWriter output = new StringWriter();

            // Act
            encoder.Encode("abc&xyz".ToCharArray(), 4, 2, output);

            // Assert
            Assert.Equal("xy", output.ToString());
        }
        public void Encode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
        {
            // Arrange
            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);

            // Act & assert - BMP chars
            for (int i = 0; i <= 0xFFFF; i++)
            {
                string input = new String((char)i, 1);
                string expected;
                if (IsSurrogateCodePoint(i))
                {
                    expected = "\uFFFD"; // unpaired surrogate -> Unicode replacement char
                }
                else
                {
                    bool mustEncode = false;
                    switch (i)
                    {
                        case '<':
                        case '>':
                        case '&':
                        case '\"':
                        case '\'':
                        case '+':
                            mustEncode = true;
                            break;
                    }

                    if (i <= 0x001F || (0x007F <= i && i <= 0x9F))
                    {
                        mustEncode = true; // control char
                    }
                    else if (!UnicodeHelpers.IsCharacterDefined((char)i))
                    {
                        mustEncode = true; // undefined (or otherwise disallowed) char
                    }

                    if (mustEncode)
                    {
                        expected = String.Format(CultureInfo.InvariantCulture, "[U+{0:X4}]", i);
                    }
                    else
                    {
                        expected = input; // no encoding
                    }
                }

                string retVal = encoder.Encode(input);
                Assert.Equal(expected, retVal);
            }

            // Act & assert - astral chars
            for (int i = 0x10000; i <= 0x10FFFF; i++)
            {
                string input = Char.ConvertFromUtf32(i);
                string expected = String.Format(CultureInfo.InvariantCulture, "[U+{0:X}]", i);
                string retVal = encoder.Encode(input);
                Assert.Equal(expected, retVal);
            }
        }
        public void Encode_StringSubstring_ZeroCount_DoesNotCallIntoTextWriter()
        {
            // Arrange
            var encoder = new CustomUnicodeEncoderBase();
            var output = new StringWriter();
            output.Dispose(); // Throws ODE if written to.

            // Act
            encoder.Encode("abc", 2, 0, output);

            // Assert
            // If we got this far (without TextWriter throwing), success!
        }
        public void Encode_StringSubstring_EntireString_SomeCharsValid()
        {
            // Arrange
            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
            StringWriter output = new StringWriter();

            // Act
            const string input = "abc&xyz";
            encoder.Encode(input, 0, input.Length, output);

            // Assert
            Assert.Equal("abc[U+0026]xyz", output.ToString());
        }
        public void Encode_StringSubstring_SomeCharsValid()
        {
            // Arrange
            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
            StringWriter output = new StringWriter();

            // Act
            encoder.Encode("abc&xyz", 2, 3, output);

            // Assert
            Assert.Equal("c[U+0026]x", output.ToString());
        }
        public void Encode_StringSubstring_EntireString_AllCharsValid_ForwardDirectlyToOutput()
        {
            // Arrange
            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
            StringWriter output = new StringWriter();

            // Act
            encoder.Encode("abc", 0, 3, output);

            // Assert
            Assert.Equal("abc", output.ToString());
        }
        public void Encode_StringSubstring_AllCharsInvalid()
        {
            // Arrange
            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
            StringWriter output = new StringWriter();

            // Act
            encoder.Encode("abc&xyz", 4, 2, output);

            // Assert
            Assert.Equal("[U+0078][U+0079]", output.ToString());
        }
        public void Encode_BadSurrogates_ReturnsUnicodeReplacementChar()
        {
            // Arrange
            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All); // allow all codepoints

            // "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
            const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
            const string expected = "a\uFFFDb\uFFFDc\uFFFD\uFFFDd\uFFFD[U+103FF]e\uFFFD";

            // Act
            string retVal = encoder.Encode(input);

            // Assert
            Assert.Equal(expected, retVal);
        }
        public void Encode_NullInput_ReturnsNull()
        {
            // Arrange
            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);

            // Act & assert
            Assert.Null(encoder.Encode(null));
        }
        public void Encode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()
        {
            // Arrange
            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
            string input = "Hello, there!";

            // Act & assert
            Assert.Same(input, encoder.Encode(input));
        }
        public void Encode_EmptyStringInput_ReturnsEmptyString()
        {
            // Arrange
            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);

            // Act & assert
            Assert.Equal("", encoder.Encode(""));
        }
        public void Encode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
        {
            // Arrange
            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);

            // Act & assert - BMP chars
            for (int i = 0; i <= 0xFFFF; i++)
            {
                string input = new String((char)i, 1);
                string expected;
                if (IsSurrogateCodePoint(i))
                {
                    expected = "\uFFFD"; // unpaired surrogate -> Unicode replacement char
                }
                else
                {
                    bool mustEncode = false;
                    switch (i)
                    {
                    case '<':
                    case '>':
                    case '&':
                    case '\"':
                    case '\'':
                    case '+':
                        mustEncode = true;
                        break;
                    }

                    if (i <= 0x001F || (0x007F <= i && i <= 0x9F))
                    {
                        mustEncode = true; // control char
                    }
                    else if (!UnicodeHelpers.IsCharacterDefined((char)i))
                    {
                        mustEncode = true; // undefined (or otherwise disallowed) char
                    }

                    if (mustEncode)
                    {
                        expected = String.Format(CultureInfo.InvariantCulture, "[U+{0:X4}]", i);
                    }
                    else
                    {
                        expected = input; // no encoding
                    }
                }

                string retVal = encoder.Encode(input);
                Assert.Equal(expected, retVal);
            }

            // Act & assert - astral chars
            for (int i = 0x10000; i <= 0x10FFFF; i++)
            {
                string input    = Char.ConvertFromUtf32(i);
                string expected = String.Format(CultureInfo.InvariantCulture, "[U+{0:X}]", i);
                string retVal   = encoder.Encode(input);
                Assert.Equal(expected, retVal);
            }
        }
        public void Encode_StringSubstring_ParameterChecking_NegativeTestCases()
        {
            // Arrange
            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();

            // Act & assert
            Assert.Throws<ArgumentNullException>(() => encoder.Encode((string)null, 0, 0, new StringWriter()));
            Assert.Throws<ArgumentNullException>(() => encoder.Encode("abc", 0, 3, null));
            Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", -1, 2, new StringWriter()));
            Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 2, 2, new StringWriter()));
            Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 4, 0, new StringWriter()));
            Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 2, -1, new StringWriter()));
            Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 1, 3, new StringWriter()));
        }