예제 #1
0
        public void DoNotDoubleDecode()
        {
            const string input          = "=3D3D";
            const string expectedOutput = "=3D";             // Checks that the output itself is not decoded, as this is encoded equal sign

            string output = QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1"));

            Assert.AreEqual(expectedOutput, output);
        }
예제 #2
0
        public void RFCExample()
        {
            const string input          = "Now is the time =\r\nfor all folk to come=\r\n to the aid of their country.";
            const string expectedOutput = "Now is the time for all folk to come to the aid of their country.";

            string output = QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1"));

            Assert.AreEqual(expectedOutput, output);
        }
예제 #3
0
        public void LiteralSpaceAndTabSupported()
        {
            const string input          = "Test for space\tand\ttabs";
            const string expectedOutput = input;             // Nothing should happen, spaces and tabs should be kept

            string output = QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1"));

            Assert.AreEqual(expectedOutput, output);
        }
예제 #4
0
        public void DoNotTouchLiterals()
        {
            const string input          = "!\"#$%&'()*+,-./0123456789:;<>@ABCDEFGHIJKLMNIOQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
            const string expectedOutput = "!\"#$%&'()*+,-./0123456789:;<>@ABCDEFGHIJKLMNIOQRSTUVWXYZ[\\]^ `abcdefghijklmnopqrstuvwxyz{|}~";             // Only change is that _ delimits SPACE

            string output = QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1"));

            Assert.AreEqual(expectedOutput, output);
        }
예제 #5
0
        public void CanDecodeEqualSignTwoTimes()
        {
            const string input          = "=3D=3D";
            const string expectedOutput = "==";

            string output = QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1"));

            Assert.AreEqual(expectedOutput, output);
        }
예제 #6
0
        public void CanDecodeUnderscoreToSpace()
        {
            // Space is represented as an _

            const string input          = "_";
            const string expectedOutput = " ";

            string output = QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1"));

            Assert.AreEqual(expectedOutput, output);
        }
예제 #7
0
        public void CanHandleWindows1252Encoding()
        {
            const string input = "=C5=F7=96";

            // http://en.wikipedia.org/wiki/Windows-1254
            const string expectedOutput = "Å÷–";

            string output = QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding(1252));

            Assert.AreEqual(expectedOutput, output);
        }
예제 #8
0
        public void CanDecodeSpanishSentence()
        {
            // Space is represented as an _ in

            const string input          = "=A1Hola,_se=F1or!";
            const string expectedOutput = "¡Hola, señor!";

            string output = QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1"));

            Assert.AreEqual(expectedOutput, output);
        }
예제 #9
0
        public void CanDecodeSoftLineBreak()
        {
            const string input          = "=\r\n";
            const string expectedOutput = "";

            // =20 should be a space
            // = just after should be a soft line break

            string output = QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1"));

            Assert.AreEqual(expectedOutput, output);
        }
예제 #10
0
        public void ImplementationShouldBeRobustSmallHexDigitsAfterEqual()
        {
            const string input          = "=3d=a1";
            const string expectedOutput = "=¡";             // Should simply be decoded as if hex characters were uppercase

            // Therefore no exceptions must be thrown
            Assert.DoesNotThrow(delegate { QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1")); });

            string output = QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1"));

            // And output should be correct
            Assert.AreEqual(expectedOutput, output);
        }
예제 #11
0
        public void ImplementationShouldBeRobustControlCharactersCarriageReturnNewlineNotPair()
        {
            const string input          = "\n\runit\ned\r";    // Notice the ordering is wrong with the first pair, and therefore not allowed
            const string expectedOutput = "united";            // All illegal control characters should have been deleted

            // Do not throw exceptions
            Assert.DoesNotThrow(delegate { QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1")); });

            string output = QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1"));

            // And output should be correct
            Assert.AreEqual(expectedOutput, output);
        }
예제 #12
0
        public void CanDecodeLongSentence()
        {
            // Includes a space (=20 is SPACE) and and a soft line break (=\r\n is soft line break)
            const string input          = "If you believe that truth=3Dbeauty, then surely=20=\r\nmathematics is the most beautiful branch of philosophy.";
            const string expectedOutput = "If you believe that truth=beauty, then surely mathematics is the most beautiful branch of philosophy.";

            // No exceptions
            Assert.DoesNotThrow(delegate { QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1")); });

            // And output is to be decoded anyway
            string output = QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1"));

            Assert.AreEqual(expectedOutput, output);
        }
예제 #13
0
        public void ImplementationShouldBeRobustNoHexAfterEqual()
        {
            const string input = "=PK";             // This is clearly illigal input, as the RFC says there MUST be a HEX string after the
            // equal sign.

            // It also states that the parser should be robust, and that in such case the input is not to be touched.
            const string expectedOutput = input;

            // Therefore no exceptions must be thrown
            Assert.DoesNotThrow(delegate { QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1")); });

            string output = QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1"));

            // And output should be equal input
            Assert.AreEqual(expectedOutput, output);
        }
예제 #14
0
        public void ImplementationShouldBeRobustNothingAfterEqual()
        {
            const string input = "=";             // This is clearly illigal input, as the RFC says there MUST be something after

            // the equal sign. It also states that the parser should be robust. Therefore no exceptions must be thrown

            Assert.DoesNotThrow(delegate { QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1")); });             // Maybe exception thrown?

            // The RFC says that the input should be though of as not encoded at all
            const string expectedOutput = "=";

            string output = QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1"));

            // And output should be correct
            Assert.AreEqual(expectedOutput, output);
        }
예제 #15
0
        public void ImplementationShouldBeRobustControlCharacters()
        {
            const char   bellAlert      = '\a';
            const char   backSpace      = '\b';
            const char   formFeed       = '\f';
            const char   nullChar       = '\0';
            const char   carrigeReturn  = '\r';    // Allowed if used with \r\n
            const char   newline        = '\n';    // Allowed if used with \r\n
            const char   horizontalTab  = '\t';    // Allowed
            const char   deleteChar     = '\u007F';
            string       input          = "" + bellAlert + backSpace + formFeed + nullChar + carrigeReturn + newline + horizontalTab + deleteChar;
            const string expectedOutput = "\r\n\t";             // All other illegal control characters should have been deleted

            // Do not throw exceptions
            Assert.DoesNotThrow(delegate { QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1")); });

            string output = QuotedPrintable.DecodeEncodedWord(input, Encoding.GetEncoding("iso-8859-1"));

            // And output should be correct
            Assert.AreEqual(expectedOutput, output);
        }
예제 #16
0
        /// <summary>
        /// Decode text that is encoded with the <see cref="EncodedWord"/> encoding.<br/>
        ///<br/>
        /// This method will decode any encoded-word found in the string.<br/>
        /// All parts which is not encoded will not be touched.<br/>
        /// <br/>
        /// From <a href="http://tools.ietf.org/html/rfc2047">RFC 2047</a>:<br/>
        /// <code>
        /// Generally, an "encoded-word" is a sequence of printable ASCII
        /// characters that begins with "=?", ends with "?=", and has two "?"s in
        /// between.  It specifies a character set and an encoding method, and
        /// also includes the original text encoded as graphic ASCII characters,
        /// according to the rules for that encoding method.
        /// </code>
        /// Example:<br/>
        /// <c>=?ISO-8859-1?q?this=20is=20some=20text?= other text here</c>
        /// </summary>
        /// <remarks>See <a href="http://tools.ietf.org/html/rfc2047#section-2">RFC 2047 section 2</a> "Syntax of encoded-words" for more details</remarks>
        /// <param name="encodedWords">Source text. May be content which is not encoded.</param>
        /// <returns>Decoded text</returns>
        /// <exception cref="ArgumentNullException">If <paramref name="encodedWords"/> is <see langword="null"/></exception>
        public static string Decode(string encodedWords)
        {
            if (encodedWords == null)
            {
                throw new ArgumentNullException("encodedWords");
            }

            // Notice that RFC2231 redefines the BNF to
            // encoded-word := "=?" charset ["*" language] "?" encoded-text "?="
            // but no usage of this BNF have been spotted yet. It is here to
            // ease debugging if such a case is discovered.

            // This is the regex that should fit the BNF
            // RFC Says that NO WHITESPACE is allowed in this encoding, but there are examples
            // where whitespace is there, and therefore this regex allows for such.
            const string encodedWordRegex = @"\=\?(?<Charset>\S+?)\?(?<Encoding>\w)\?(?<Content>.+?)\?\=";
            // \w	Matches any word character including underscore. Equivalent to "[A-Za-z0-9_]".
            // \S	Matches any nonwhite space character. Equivalent to "[^ \f\n\r\t\v]".
            // +?   non-greedy equivalent to +
            // (?<NAME>REGEX) is a named group with name NAME and regular expression REGEX

            // Any amount of linear-space-white between 'encoded-word's,
            // even if it includes a CRLF followed by one or more SPACEs,
            // is ignored for the purposes of display.
            // http://tools.ietf.org/html/rfc2047#page-12
            // Define a regular expression that captures two encoded words with some whitespace between them
            const string replaceRegex = @"(?<first>" + encodedWordRegex + @")\s+(?<second>" + encodedWordRegex + ")";

            // Then, find an occurrence of such an expression, but remove the whitespace in between when found
            // Need to be done twice for encodings such as "=?UTF-8?Q?a?= =?UTF-8?Q?b?= =?UTF-8?Q?c?="
            // to be replaced correctly
            encodedWords = Regex.Replace(encodedWords, replaceRegex, "${first}${second}");
            encodedWords = Regex.Replace(encodedWords, replaceRegex, "${first}${second}");

            string decodedWords = encodedWords;

            MatchCollection matches = Regex.Matches(encodedWords, encodedWordRegex);

            foreach (Match match in matches)
            {
                // If this match was not a success, we should not use it
                if (!match.Success)
                {
                    continue;
                }

                string fullMatchValue = match.Value;

                string encodedText = match.Groups["Content"].Value;
                string encoding    = match.Groups["Encoding"].Value;
                string charset     = match.Groups["Charset"].Value;

                // Get the encoding which corrosponds to the character set
                System.Text.Encoding charsetEncoding = EncodingFinder.FindEncoding(charset);

                // Store decoded text here when done
                string decodedText;

                // Encoding may also be written in lowercase
                switch (encoding.ToUpperInvariant())
                {
                // RFC:
                // The "B" encoding is identical to the "BASE64"
                // encoding defined by RFC 2045.
                // http://tools.ietf.org/html/rfc2045#section-6.8
                case "B":
                    decodedText = Base64.Decode(encodedText, charsetEncoding);
                    break;

                // RFC:
                // The "Q" encoding is similar to the "Quoted-Printable" content-
                // transfer-encoding defined in RFC 2045.
                // There are more details to this. Please check
                // http://tools.ietf.org/html/rfc2047#section-4.2
                //
                case "Q":
                    decodedText = QuotedPrintable.DecodeEncodedWord(encodedText, charsetEncoding);
                    break;

                default:
                    throw new ArgumentException("The encoding " + encoding + " was not recognized");
                }

                // Repalce our encoded value with our decoded value
                decodedWords = decodedWords.Replace(fullMatchValue, decodedText);
            }

            return(decodedWords);
        }
예제 #17
0
        public static String Decode(String encodedWords)
        {
            if (encodedWords == null)
            {
                throw new ArgumentNullException("encodedWords");
            }

            String decodedWords = encodedWords;

            // Notice that RFC2231 redefines the BNF to
            // encoded-word := "=?" charset ["*" language] "?" encoded-text "?="
            // but no usage of this BNF have been spotted yet. It is here to
            // ease debugging if such a case is discovered.

            // This is the regex that should fit the BNF
            // RFC Says that NO WHITESPACE is allowed in this encoding, but there are examples
            // where whitespace is there, and therefore this regex allows for such.
            //const String strRegEx = @"\=\?(?<Charset>\S+?)\?(?<Encoding>\w)\?(?<Content>.+?)\?\=";
            // \w    Matches any word character including underscore. Equivalent to "[A-Za-z0-9_]".
            // \S    Matches any nonwhite space character. Equivalent to "[^ \f\n\r\t\v]".
            // +?   non-gready equivalent to +
            // (?<NAME>REGEX) is a named group with name NAME and regular expression REGEX

            /*MatchCollection matches = Regex.Matches(encodedWords, strRegEx);
             * foreach (Match match in matches)
             * {
             *      // If this match was not a success, we should not use it
             *      if (!match.Success) continue;
             *
             *      String fullMatchValue = match.Value;
             *
             *      String encodedText = match.Groups["Content"].Value;
             *      String encoding = match.Groups["Encoding"].Value;
             *      String charset = match.Groups["Charset"].Value;*/

            var lTextInfo = ParseEncodedText(encodedWords);

            foreach (EncodedTextInfo lInfo in lTextInfo)
            {
                String fullMatchValue = lInfo.FullText;
                String encodedText    = lInfo.Content;
                String encoding       = lInfo.Encoding;
                String charset        = lInfo.Charset;

                // Get the encoding which corrosponds to the character set
                Encoding charsetEncoding = Utility.ParseCharsetToEncoding(charset);

                // Store decoded text here when done
                String decodedText;

                // Encoding may also be written in lowercase
                switch (encoding.ToUpperInvariant())
                {
                // RFC:
                // The "B" encoding is identical to the "BASE64"
                // encoding defined by RFC 2045.
                // http://tools.ietf.org/html/rfc2045#section-6.8
                case "B":
                    decodedText = Base64.Decode(encodedText, charsetEncoding);
                    break;

                // RFC:
                // The "Q" encoding is similar to the "Quoted-Printable" content-
                // transfer-encoding defined in RFC 2045.
                // There are more details to this. Please check
                // http://tools.ietf.org/html/rfc2047#section-4.2
                //
                case "Q":
                    decodedText = QuotedPrintable.DecodeEncodedWord(encodedText, charsetEncoding);
                    break;

                default:
                    throw new ArgumentException("The encoding " + encoding + " was not recognized");
                }

                // Repalce our encoded value with our decoded value
                decodedWords = decodedWords.Replace(fullMatchValue, decodedText);
            }

            return(decodedWords);
        }