Example #1
0
        /// <summary>
        /// Detects the best <see cref="Encoding"/> to use to convert the data in the supplied stream to Unicode, and returns it.
        /// </summary>
        /// <param name="stream">The stream to detect the character encoding for.</param>
        /// <returns>The best <see cref="Encoding"/> object to be used to decode text from <paramref name="stream"/>
        /// into Unicode, or <c>null</c> if the best encoding can't be detected.</returns>
        /// <remarks>See <a href="http://code.logos.com/blog/2010/05/detecting_the_character_encoding_of_a_file.html">Detecting the Character Encoding of a File</a>.</remarks>
        public static Encoding DetectBestEncoding(Stream stream)
        {
            // check parameter validity
            if (stream == null)
            {
                throw new ArgumentNullException("stream");
            }
            if (!stream.CanRead)
            {
                throw new NotSupportedException("'stream' must be readable.");
            }
            if (!stream.CanSeek)
            {
                throw new NotSupportedException("'stream' must be seekable.");
            }

            // the encoding that was detected, or null on failure
            Encoding encoding = null;

            // MLang will move the stream pointer; remember its original position
            long position = stream.Position;

            // allocate a number of DetectEncodingInfo structures for MLang to fill in
            DetectEncodingInfo[] infos = new DetectEncodingInfo[8];
            int infoCount = infos.Length;

            // allow MLang to seek to the "beginning" (i.e., current position) of the stream by rebasing it
            using (RebasedStream rebased = new RebasedStream(stream))
            {
                try
                {
                    // try to create MLang object
                    IMultiLanguage2 multiLanguage = (IMultiLanguage2) new MultiLanguage();

                    // wrap input stream with an IStream
                    ManagedIStream istream = new ManagedIStream(rebased);

                    // detect the code page
                    int hresult = multiLanguage.DetectCodepageInIStream(MultiLanguageDetectCodePage.None, 0, istream, ref infos[0], ref infoCount);
                    GC.KeepAlive(istream);

                    if (infoCount > 0 && (hresult == Win32.S_OK || hresult == Win32.S_FALSE))
                    {
                        // take the best code page that was found
                        int nCodePage = (int)infos.Take(infoCount).OrderByDescending(i => i.nConfidence).Select(i => i.nCodePage).FirstOrDefault();
                        encoding = Encoding.GetEncoding(nCodePage);
                    }
                }
                catch (COMException)
                {
                    // failure
                }
            }

            // reset the stream back to its input position for the caller
            stream.Position = position;

            // return detected encoding (or null for failure)
            return(encoding);
        }
		/// <summary>
		/// Detects the best <see cref="Encoding"/> to use to convert the data in the supplied stream to Unicode, and returns it.
		/// </summary>
		/// <param name="stream">The stream to detect the character encoding for.</param>
		/// <returns>The best <see cref="Encoding"/> object to be used to decode text from <paramref name="stream"/>
		/// into Unicode, or <c>null</c> if the best encoding can't be detected.</returns>
		/// <remarks>See <a href="http://code.logos.com/blog/2010/05/detecting_the_character_encoding_of_a_file.html">Detecting the Character Encoding of a File</a>.</remarks>
		public static Encoding DetectBestEncoding(Stream stream)
		{
			// check parameter validity
			if (stream == null)
				throw new ArgumentNullException("stream");
			if (!stream.CanRead)
				throw new NotSupportedException("'stream' must be readable.");
			if (!stream.CanSeek)
				throw new NotSupportedException("'stream' must be seekable.");

			// the encoding that was detected, or null on failure
			Encoding encoding = null;

			// MLang will move the stream pointer; remember its original position
			long position = stream.Position;

			// allocate a number of DetectEncodingInfo structures for MLang to fill in
			DetectEncodingInfo[] infos = new DetectEncodingInfo[8];
			int infoCount = infos.Length;

			// allow MLang to seek to the "beginning" (i.e., current position) of the stream by rebasing it
			using (RebasedStream rebased = new RebasedStream(stream))
			{
				try
				{
					// try to create MLang object
					IMultiLanguage2 multiLanguage = (IMultiLanguage2) new MultiLanguage();

					// wrap input stream with an IStream
					ManagedIStream istream = new ManagedIStream(rebased);

					// detect the code page
					int hresult = multiLanguage.DetectCodepageInIStream(MultiLanguageDetectCodePage.None, 0, istream, ref infos[0], ref infoCount);
					GC.KeepAlive(istream);

					if (infoCount > 0 && (hresult == Win32.S_OK || hresult == Win32.S_FALSE))
					{
						// take the best code page that was found
						int nCodePage = (int) infos.Take(infoCount).OrderByDescending(i => i.nConfidence).Select(i => i.nCodePage).FirstOrDefault();
						encoding = Encoding.GetEncoding(nCodePage);
					}
				}
				catch (COMException)
				{
					// failure
				}
			}

			// reset the stream back to its input position for the caller
			stream.Position = position;

			// return detected encoding (or null for failure)
			return encoding;
		}