Ejemplo n.º 1
0
        public static Encoding DetectEncoding(Stream stream, DetectOption flags)
        {
            Encoding encoding;
            IMultiLanguage2 o = new CMultiLanguageClass() as IMultiLanguage2;
            if (o == null)
            {
                byte[] buffer = new byte[4];
                int dataLength = stream.Read(buffer, 0, buffer.Length);
                return SimpleDetectEncoding(buffer, dataLength);
            }
            try
            {
                DetectEncodingInfo[] lpEncoding = new DetectEncodingInfo[1];
                int length = lpEncoding.Length;
                ComStream pstmIn = new ComStream(stream);
                int errorCode = o.DetectCodepageInIStream(((MLDETECTCP) flags) & MLDETECTCP.MLDETECTCP_MASK, 0, pstmIn, lpEncoding, ref length);
                switch (errorCode)
                {
                    case 0:
                    case 1:
                        if (length <= 0)
                        {
                            break;
                        }
                        return Encoding.GetEncoding((int) lpEncoding[0].nCodePage);

                    default:
                        throw Marshal.GetExceptionForHR(errorCode);
                }
                encoding = null;
            }
            finally
            {
                Marshal.FinalReleaseComObject(o);
            }
            return encoding;
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Returns up to maxEncodings codepages that are assumed to be apropriate
        /// </summary>
        /// <param name="input">array containing the raw data</param>
        /// <param name="maxEncodings">maxiumum number of encodings to detect</param>
        /// <returns>an array of Encoding with assumed encodings</returns>
        public static Encoding[] DetectInputCodepages(byte[] input, int maxEncodings)
        {
            if (maxEncodings < 1)
                throw new ArgumentOutOfRangeException("maxEncodings", "at least one encoding must be returned");

            if (input == null)
                throw new ArgumentNullException("input");

            // empty strings can always be encoded as ASCII
            if (input.Length == 0)
                return new Encoding[] { Encoding.ASCII };

            // expand the string to be at least 256 bytes
            if (input.Length < 256)
            {
                byte[] newInput = new byte[256];
                int steps = 256 / input.Length;
                for (int i = 0; i < steps; i++)
                    Array.Copy(input, 0, newInput, input.Length * i, input.Length);

                int rest = 256 % input.Length;
                if (rest > 0)
                    Array.Copy(input, 0, newInput, steps * input.Length, rest);
                input = newInput;
            }

            List<Encoding> result = new List<Encoding>();

            // get the IMultiLanguage" interface
            IMultiLanguage2 multilang2 = new CMultiLanguageClass();
            if (multilang2 == null)
                throw new COMException("Failed to get IMultilang2");
            try
            {
                DetectEncodingInfo[] detectedEncdings = new DetectEncodingInfo[maxEncodings];

                int scores = detectedEncdings.Length;
                int srcLen = input.Length;

                // setup options (none)
                const MLDETECTCP options = MLDETECTCP.MLDETECTCP_NONE;

                // finally... call to DetectInputCodepage
                multilang2.DetectInputCodepage(options, 0,
                    ref input[0], ref srcLen, ref detectedEncdings[0], ref scores);

                // get result
                if (scores > 0)
                {
                    for (int i = 0; i < scores; i++)
                    {
                        // add the result
                        result.Add(Encoding.GetEncoding((int)detectedEncdings[i].nCodePage));
                    }
                }
            }
            finally
            {
                Marshal.FinalReleaseComObject(multilang2);
            }
            // nothing found
            return result.ToArray();
        }
Ejemplo n.º 3
0
        public static Encoding[] DetectOutgoingEncodings(string input, int[] preferredEncodings, bool preserveOrder)
        {
            if (input == null)
                throw new ArgumentNullException("input");

            // empty strings can always be encoded as ASCII
            if (input.Length == 0)
                return new Encoding[] { Encoding.ASCII };

            List<Encoding> result = new List<Encoding>();

            // get the IMultiLanguage3 interface
            IMultiLanguage3 multilang3 = new CMultiLanguageClass();
            if (multilang3 == null)
                throw new COMException("Failed to get IMultilang3");
            try
            {
                int[] resultCodePages = new int[preferredEncodings.Length];
                uint detectedCodepages = (uint)resultCodePages.Length;
                ushort specialChar = (ushort)'?';

                // get unmanaged arrays
                IntPtr pPrefEncs = Marshal.AllocCoTaskMem(sizeof(uint) * preferredEncodings.Length);
                IntPtr pDetectedEncs = Marshal.AllocCoTaskMem(sizeof(uint) * resultCodePages.Length);

                try
                {
                    Marshal.Copy(preferredEncodings, 0, pPrefEncs, preferredEncodings.Length);

                    Marshal.Copy(resultCodePages, 0, pDetectedEncs, resultCodePages.Length);

                    MLCPF options = MLCPF.MLDETECTF_VALID_NLS | MLCPF.MLDETECTF_PREFERRED_ONLY;
                    if (preserveOrder)
                        options |= MLCPF.MLDETECTF_PRESERVE_ORDER;

                    options |= MLCPF.MLDETECTF_PREFERRED_ONLY;

                    // finally... call to DetectOutboundCodePage
                    multilang3.DetectOutboundCodePage(options,
                        input, (uint)input.Length,
                        pPrefEncs, (uint)preferredEncodings.Length,
                        pDetectedEncs, ref detectedCodepages,
                        ref specialChar);

                    // get result
                    if (detectedCodepages > 0)
                    {
                        int[] theResult = new int[detectedCodepages];
                        Marshal.Copy(pDetectedEncs, theResult, 0, theResult.Length);

                        // get the encodings for the codepages
                        for (int i = 0; i < detectedCodepages; i++)
                            result.Add(Encoding.GetEncoding(theResult[i]));
                    }
                }
                finally
                {
                    if (pPrefEncs != IntPtr.Zero)
                        Marshal.FreeCoTaskMem(pPrefEncs);
                    Marshal.FreeCoTaskMem(pDetectedEncs);
                }
            }
            finally
            {
                Marshal.FinalReleaseComObject(multilang3);
            }
            // nothing found
            return result.ToArray();
        }
Ejemplo n.º 4
0
        private static Encoding DetectOutgoingEncoding(string input, int[] preferredEncodings, bool preserveOrder)
        {
            if (input == null)
                throw new ArgumentNullException("input");

            // empty strings can always be encoded as ASCII
            if (input.Length == 0)
                return Encoding.ASCII;

            Encoding result = Encoding.ASCII;

            // get the IMultiLanguage3 interface
            IMultiLanguage3 multilang3 = new CMultiLanguageClass();
            if (multilang3 == null)
                throw new COMException("Failed to get IMultilang3");
            try
            {
                int[] resultCodePages = new int[preferredEncodings != null ? preferredEncodings.Length : Encoding.GetEncodings().Length];
                uint detectedCodepages = (uint)resultCodePages.Length;
                ushort specialChar = (ushort)'?';

                // get unmanaged arrays
                IntPtr pPrefEncs = preferredEncodings == null ? IntPtr.Zero : Marshal.AllocCoTaskMem(sizeof(uint) * preferredEncodings.Length);
                IntPtr pDetectedEncs = Marshal.AllocCoTaskMem(sizeof(uint) * resultCodePages.Length);

                try
                {
                    if (preferredEncodings != null)
                        Marshal.Copy(preferredEncodings, 0, pPrefEncs, preferredEncodings.Length);

                    Marshal.Copy(resultCodePages, 0, pDetectedEncs, resultCodePages.Length);

                    MLCPF options = MLCPF.MLDETECTF_VALID_NLS;
                    if (preserveOrder)
                        options |= MLCPF.MLDETECTF_PRESERVE_ORDER;

                    if (preferredEncodings != null)
                        options |= MLCPF.MLDETECTF_PREFERRED_ONLY;

                    multilang3.DetectOutboundCodePage(options,
                        input, (uint)input.Length,
                        pPrefEncs, (uint)(preferredEncodings == null ? 0 : preferredEncodings.Length),

                        pDetectedEncs, ref detectedCodepages,
                        ref specialChar);

                    // get result
                    if (detectedCodepages > 0)
                    {
                        int[] theResult = new int[detectedCodepages];
                        Marshal.Copy(pDetectedEncs, theResult, 0, theResult.Length);
                        result = Encoding.GetEncoding(theResult[0]);
                    }
                }
                finally
                {
                    if (pPrefEncs != IntPtr.Zero)
                        Marshal.FreeCoTaskMem(pPrefEncs);
                    Marshal.FreeCoTaskMem(pDetectedEncs);
                }
            }
            finally
            {
                Marshal.FinalReleaseComObject(multilang3);
            }
            return result;
        }
        private static Encoding DetectOutgoingEncoding(string input, int[] preferredEncodings, bool preserveOrder)
        {
            if (input == null)
            {
                throw new ArgumentNullException("input");
            }

            // empty strings can always be encoded as ASCII
            if (input.Length == 0)
            {
                return Encoding.ASCII;
            }

            Encoding result = Encoding.ASCII;

            // get the IMultiLanguage3 interface
            IMultiLanguage3 multilang3 = new CMultiLanguageClass();
            if (multilang3 == null)
            {
                throw new COMException("Failed to get IMultilang3");
            }

            try
            {
                int[] resultCodePages = new int[preferredEncodings != null ? preferredEncodings.Length : Encoding.GetEncodings().Length];
                uint detectedCodepages = (uint)resultCodePages.Length;
                ushort specialChar = '?';

                // get unmanaged arrays
                IntPtr pPrefEncs = preferredEncodings == null ? IntPtr.Zero : Marshal.AllocCoTaskMem(sizeof(uint) * preferredEncodings.Length);
                IntPtr pDetectedEncs = Marshal.AllocCoTaskMem(sizeof(uint) * resultCodePages.Length);

                try
                {
                    if (preferredEncodings != null)
                    {
                        Marshal.Copy(preferredEncodings, 0, pPrefEncs, preferredEncodings.Length);
                    }

                    Marshal.Copy(resultCodePages, 0, pDetectedEncs, resultCodePages.Length);

                    Mlcpf options = Mlcpf.MldetectfValidNls;
                    if (preserveOrder)
                    {
                        options |= Mlcpf.MldetectfPreserveOrder;
                    }

                    if (preferredEncodings != null)
                    {
                        options |= Mlcpf.MldetectfPreferredOnly;
                    }

                    multilang3.DetectOutboundCodePage(options,
                        input, (uint)input.Length,
                        pPrefEncs, (uint)(preferredEncodings == null ? 0 : preferredEncodings.Length),

                        pDetectedEncs, ref detectedCodepages,
                        ref specialChar);

                    // get result
                    if (detectedCodepages > 0)
                    {
                        int[] theResult = new int[detectedCodepages];
                        Marshal.Copy(pDetectedEncs, theResult, 0, theResult.Length);
                        result = Encoding.GetEncoding(theResult[0]);
                    }
                }
                finally
                {
                    if (pPrefEncs != IntPtr.Zero)
                    {
                        Marshal.FreeCoTaskMem(pPrefEncs);
                    }

                    Marshal.FreeCoTaskMem(pDetectedEncs);
                }
            }
            finally
            {
                Marshal.FinalReleaseComObject(multilang3);
            }

            return result;
        }
        public static Encoding[] DetectOutgoingEncodings(string input, int[] preferredEncodings, bool preserveOrder)
        {
            if (input == null)
            {
                throw new ArgumentNullException("input");
            }

            // empty strings can always be encoded as ASCII
            if (input.Length == 0)
            {
                return new Encoding[] { Encoding.ASCII };
            }

            List<Encoding> result = new List<Encoding>();

            // get the IMultiLanguage3 interface
            IMultiLanguage3 multilang3 = new CMultiLanguageClass();
            if (multilang3 == null)
            {
                throw new COMException("Failed to get IMultilang3");
            }
            try
            {
                int[] resultCodePages = new int[preferredEncodings.Length];
                uint detectedCodepages = (uint)resultCodePages.Length;
                ushort specialChar = '?';

                // get unmanaged arrays
                IntPtr pPrefEncs = Marshal.AllocCoTaskMem(sizeof(uint) * preferredEncodings.Length);
                IntPtr pDetectedEncs = Marshal.AllocCoTaskMem(sizeof(uint) * resultCodePages.Length);

                try
                {
                    Marshal.Copy(preferredEncodings, 0, pPrefEncs, preferredEncodings.Length);

                    Marshal.Copy(resultCodePages, 0, pDetectedEncs, resultCodePages.Length);

                    Mlcpf options = Mlcpf.MldetectfValidNls | Mlcpf.MldetectfPreferredOnly;
                    if (preserveOrder)
                    {
                        options |= Mlcpf.MldetectfPreserveOrder;
                    }

                    options |= Mlcpf.MldetectfPreferredOnly;

                    // finally... call to DetectOutboundCodePage
                    multilang3.DetectOutboundCodePage(options,
                        input, (uint)input.Length,
                        pPrefEncs, (uint)preferredEncodings.Length,
                        pDetectedEncs, ref detectedCodepages,
                        ref specialChar);

                    // get result
                    if (detectedCodepages > 0)
                    {
                        int[] theResult = new int[detectedCodepages];
                        Marshal.Copy(pDetectedEncs, theResult, 0, theResult.Length);

                        // get the encodings for the codepages
                        for (int i = 0; i < detectedCodepages; i++)
                        {
                            result.Add(Encoding.GetEncoding(theResult[i]));
                        }
                    }
                }
                finally
                {
                    if (pPrefEncs != IntPtr.Zero)
                    {
                        Marshal.FreeCoTaskMem(pPrefEncs);
                    }

                    Marshal.FreeCoTaskMem(pDetectedEncs);
                }
            }
            finally
            {
                Marshal.FinalReleaseComObject(multilang3);
            }
            // nothing found
            return result.ToArray();
        }
        /// <summary>
        /// Rerurns up to maxEncodings codpages that are assumed to be apropriate
        /// </summary>
        /// <param name="input">array containing the raw data</param>
        /// <param name="maxEncodings">maxiumum number of encodings to detect</param>
        /// <returns>an array of Encoding with assumed encodings</returns>
        public static Encoding[] DetectInputCodepages(byte[] input, int maxEncodings)
        {
            if (maxEncodings < 1)
            {
                throw new ArgumentOutOfRangeException("maxEncodings", "at least one encoding must be returned");
            }

            if (input == null)
            {
                throw new ArgumentNullException("input");
            }

            // empty strings can always be encoded as ASCII
            if (input.Length == 0)
            {
                return new Encoding[] { Encoding.ASCII };
            }

            // expand the string to be at least 256 bytes
            if (input.Length < 256)
            {
                byte[] newInput = new byte[256];
                int steps = 256 / input.Length;
                for (int i = 0; i < steps; i++)
                {
                    Array.Copy(input, 0, newInput, input.Length * i, input.Length);
                }

                int rest = 256 % input.Length;
                if (rest > 0)
                {
                    Array.Copy(input, 0, newInput, steps * input.Length, rest);
                }

                input = newInput;
            }

            List<Encoding> result = new List<Encoding>();

            // get the IMultiLanguage" interface
            IMultiLanguage2 multilang2 = new CMultiLanguageClass();
            if (multilang2 == null)
            {
                throw new COMException("Failed to get IMultilang2");
            }
            try
            {
                DetectEncodingInfo[] detectedEncdings = new DetectEncodingInfo[maxEncodings];

                int scores = detectedEncdings.Length;
                int srcLen = input.Length;

                // setup options (none)
                const Mldetectcp options = Mldetectcp.MldetectcpNone;

                // finally... call to DetectInputCodepage
                multilang2.DetectInputCodepage(options, 0,
                    ref input[0], ref srcLen, ref detectedEncdings[0], ref scores);

                // get result
                if (scores > 0)
                {
                    for (int i = 0; i < scores; i++)
                    {
                        // add the result
                        result.Add(Encoding.GetEncoding((int)detectedEncdings[i].nCodePage));
                    }
                }
            }
            finally
            {
                Marshal.FinalReleaseComObject(multilang2);
            }
            // nothing found
            return result.ToArray();
        }
Ejemplo n.º 8
0
        private static Encoding[] FindEncodings(string input, int[] preferredEncodings, bool preserveOrder)
        {
            // empty strings can always be encoded as ASCII
            if (string.IsNullOrEmpty(input))
            {
                return(new[]
                {
                    Default
                });
            }

            bool            bPrefEnc = !preferredEncodings.IsNullOrEmpty();
            List <Encoding> result   = new List <Encoding>();

            // get the IMultiLanguage3 interface
            IMultiLanguage3 multiLang3 = new CMultiLanguageClass();

            if (multiLang3 == null)
            {
                throw new COMException("Failed to get " + nameof(IMultiLanguage3));
            }

            try
            {
                int    count             = bPrefEnc ? preferredEncodings.Length : SystemEncodingCount;
                int[]  resultCodePages   = new int[count];
                uint   detectedCodePages = (uint)resultCodePages.Length;
                ushort specialChar       = '?';

                // get unmanaged arrays
                IntPtr preferred = bPrefEnc ? Marshal.AllocCoTaskMem(sizeof(uint) * preferredEncodings.Length) : IntPtr.Zero;
                IntPtr detected  = Marshal.AllocCoTaskMem(sizeof(uint) * resultCodePages.Length);

                try
                {
                    if (bPrefEnc)
                    {
                        Marshal.Copy(preferredEncodings, 0, preferred, preferredEncodings.Length);
                    }

                    Marshal.Copy(resultCodePages, 0, detected, resultCodePages.Length);
                    MLCPF options = MLCPF.MLDETECTF_VALID_NLS;
                    if (preserveOrder)
                    {
                        options |= MLCPF.MLDETECTF_PRESERVE_ORDER;
                    }
                    if (bPrefEnc)
                    {
                        options |= MLCPF.MLDETECTF_PREFERRED_ONLY;
                    }

                    // finally... call to DetectOutboundCodePage
                    multiLang3.DetectOutboundCodePage(options,
                                                      input,
                                                      (uint)input.Length,
                                                      preferred,
                                                      (uint)(bPrefEnc ? preferredEncodings.Length : 0),
                                                      detected,
                                                      ref detectedCodePages,
                                                      ref specialChar);

                    // get result
                    if (detectedCodePages > 0)
                    {
                        int[] theResult = new int[detectedCodePages];
                        Marshal.Copy(detected, theResult, 0, theResult.Length);

                        // get the encodings for the code pages
                        for (int i = 0; i < detectedCodePages; i++)
                        {
                            result.Add(Encoding.GetEncoding(theResult[i]));
                        }
                    }
                }
                finally
                {
                    if (!preferred.IsZero())
                    {
                        Marshal.FreeCoTaskMem(preferred);
                    }
                    Marshal.FreeCoTaskMem(detected);
                }
            }
            finally
            {
                Marshal.FinalReleaseComObject(multiLang3);
            }

            return(result.ToArray());
        }
Ejemplo n.º 9
0
        public static Encoding[] GetEncodings(byte[] input, int maxEncodings)
        {
            if (input.IsNullOrEmpty())
            {
                return(new[]
                {
                    Default
                });
            }

            if (maxEncodings < 1)
            {
                maxEncodings = 1;
            }

            // expand the string to be at least 256 bytes
            if (input.Length < 256)
            {
                byte[] newInput = new byte[256];
                int    steps    = 256 / input.Length;

                for (int i = 0; i < steps; i++)
                {
                    Array.Copy(input, 0, newInput, input.Length * i, input.Length);
                }

                int rest = 256 % input.Length;
                if (rest > 0)
                {
                    Array.Copy(input, 0, newInput, steps * input.Length, rest);
                }
                input = newInput;
            }

            List <Encoding> result = new List <Encoding>();

            // get the IMultiLanguage" interface
            IMultiLanguage2 multiLang2 = new CMultiLanguageClass();

            if (multiLang2 == null)
            {
                throw new COMException("Failed to get " + nameof(IMultiLanguage2));
            }

            try
            {
                DetectEncodingInfo[] detectedEncodings = new DetectEncodingInfo[maxEncodings];
                int scores = detectedEncodings.Length;
                int srcLen = input.Length;

                // finally... call to DetectInputCodepage
                multiLang2.DetectInputCodepage(MLDETECTCP.MLDETECTCP_NONE, 0, ref input[0], ref srcLen, ref detectedEncodings[0], ref scores);

                // get result
                if (scores > 0)
                {
                    for (int i = 0; i < scores; i++)
                    {
                        result.Add(Encoding.GetEncoding((int)detectedEncodings[i].nCodePage));
                    }
                }
            }
            finally
            {
                Marshal.FinalReleaseComObject(multiLang2);
            }

            return(result.ToArray());
        }
Ejemplo n.º 10
0
 public static Encoding DetectEncoding(byte[] data, int dataLength, DetectOption flags)
 {
     Encoding encoding2;
     if (data == null)
     {
         throw new ArgumentNullException("data");
     }
     if (data.Length < dataLength)
     {
         throw new ArgumentException("data length is less than dataLength");
     }
     if (dataLength == 0)
     {
         return null;
     }
     if ((flags & DetectOption.TrySimpleDetectFirst) > DetectOption.Default)
     {
         Encoding encoding = SimpleDetectEncoding(data, dataLength);
         if (encoding != null)
         {
             return encoding;
         }
     }
     IMultiLanguage2 o = new CMultiLanguageClass() as IMultiLanguage2;
     if (o == null)
     {
         return (((flags & DetectOption.TrySimpleDetectFirst) > DetectOption.Default) ? null : SimpleDetectEncoding(data, dataLength));
     }
     try
     {
         DetectEncodingInfo[] lpEncoding = new DetectEncodingInfo[1];
         int length = lpEncoding.Length;
         switch (o.DetectInputCodepage(((MLDETECTCP) flags) & MLDETECTCP.MLDETECTCP_MASK, 0, data, ref dataLength, lpEncoding, ref length))
         {
             case 0:
             case 1:
                 if (length <= 0)
                 {
                     break;
                 }
                 return Encoding.GetEncoding((int) lpEncoding[0].nCodePage);
         }
         encoding2 = null;
     }
     finally
     {
         Marshal.FinalReleaseComObject(o);
     }
     return encoding2;
 }