예제 #1
0
        /// <summary>
        /// Rerurns up to maxEncodings codpages that are assumed to be apropriate
        /// </summary>
        /// <param name="input">array containing the raw data</param>
        /// <param name="maxEncodings">maxiumum number of encodings to detect</param>
        /// <returns>an array of Encoding with assumed encodings</returns>
        public static Encoding[] DetectInputCodepages(byte[] input, int maxEncodings)
        {
            if (maxEncodings < 1)
                throw new ArgumentOutOfRangeException("at least one encoding must be returend", "maxEncodings");

            if (input == null)
                throw new ArgumentNullException("input");

            // empty strings can always be encoded as ASCII
            if (input.Length == 0)
                return new[] {Encoding.ASCII};

            // expand the string to be at least 256 bytes
            if (input.Length < 256)
            {
                var newInput = new byte[256];
                var steps = 256/input.Length;
                for (var i = 0; i < steps; i++)
                    Array.Copy(input, 0, newInput, input.Length*i, input.Length);

                var rest = 256%input.Length;
                if (rest > 0)
                    Array.Copy(input, 0, newInput, steps*input.Length, rest);
                input = newInput;
            }

            var result = new List<Encoding>();

            // get the IMultiLanguage" interface
            IMultiLanguage2 multilang2 = new CMultiLanguageClass();
            if (multilang2 == null)
                throw new COMException("Failed to get IMultilang2");
            try
            {
                var detectedEncdings = new DetectEncodingInfo[maxEncodings];

                var scores = detectedEncdings.Length;
                var srcLen = input.Length;

                // setup options (none)   
                var options = MLDETECTCP.MLDETECTCP_NONE;

                // finally... call to DetectInputCodepage
                multilang2.DetectInputCodepage(options, 0,
                    ref input[0], ref srcLen, ref detectedEncdings[0], ref scores);

                // get result
                if (scores > 0)
                {
                    for (var i = 0; i < scores; i++)
                    {
                        // add the result
                        result.Add(Encoding.GetEncoding((int) detectedEncdings[i].nCodePage));
                    }
                }
            }
            finally
            {
                Marshal.FinalReleaseComObject(multilang2);
            }
            // nothing found
            return result.ToArray();
        }
예제 #2
0
        public static Encoding[] DetectOutgoingEncodings(string input, int[] preferedEncodings, bool preserveOrder)
        {
            if (input == null)
                throw new ArgumentNullException("input");

            // empty strings can always be encoded as ASCII
            if (input.Length == 0)
                return new[] {Encoding.ASCII};

            var result = new List<Encoding>();

            // get the IMultiLanguage3 interface
            IMultiLanguage3 multilang3 = new CMultiLanguageClass();
            if (multilang3 == null)
                throw new COMException("Failed to get IMultilang3");
            try
            {
                var resultCodePages = new int[preferedEncodings.Length];
                var detectedCodepages = (uint) resultCodePages.Length;
                ushort specialChar = '?';


                // get unmanaged arrays
                var pPrefEncs = Marshal.AllocCoTaskMem(sizeof (uint)*preferedEncodings.Length);
                var pDetectedEncs = preferedEncodings == null
                    ? IntPtr.Zero
                    : Marshal.AllocCoTaskMem(sizeof (uint)*resultCodePages.Length);

                try
                {
                    if (preferedEncodings != null)
                        Marshal.Copy(preferedEncodings, 0, pPrefEncs, preferedEncodings.Length);

                    Marshal.Copy(resultCodePages, 0, pDetectedEncs, resultCodePages.Length);

                    var options = MLCPF.MLDETECTF_VALID_NLS | MLCPF.MLDETECTF_PREFERRED_ONLY;
                    if (preserveOrder)
                        options |= MLCPF.MLDETECTF_PRESERVE_ORDER;

                    if (preferedEncodings != null)
                        options |= MLCPF.MLDETECTF_PREFERRED_ONLY;

                    // finally... call to DetectOutboundCodePage
                    multilang3.DetectOutboundCodePage(options,
                        input, (uint) input.Length,
                        pPrefEncs, (uint) (preferedEncodings == null ? 0 : preferedEncodings.Length),
                        pDetectedEncs, ref detectedCodepages,
                        ref specialChar);

                    // get result
                    if (detectedCodepages > 0)
                    {
                        var theResult = new int[detectedCodepages];
                        Marshal.Copy(pDetectedEncs, theResult, 0, theResult.Length);


                        // get the encodings for the codepages
                        for (var i = 0; i < detectedCodepages; i++)
                            result.Add(Encoding.GetEncoding(theResult[i]));
                    }
                }
                finally
                {
                    if (pPrefEncs != IntPtr.Zero)
                        Marshal.FreeCoTaskMem(pPrefEncs);
                    Marshal.FreeCoTaskMem(pDetectedEncs);
                }
            }
            finally
            {
                Marshal.FinalReleaseComObject(multilang3);
            }
            // nothing found
            return result.ToArray();
        }