/// <summary> /// Rerurns up to maxEncodings codpages that are assumed to be apropriate /// </summary> /// <param name="input">array containing the raw data</param> /// <param name="maxEncodings">maxiumum number of encodings to detect</param> /// <returns>an array of Encoding with assumed encodings</returns> public static Encoding[] DetectInputCodepages(byte[] input, int maxEncodings) { if (maxEncodings < 1) throw new ArgumentOutOfRangeException("at least one encoding must be returend", "maxEncodings"); if (input == null) throw new ArgumentNullException("input"); // empty strings can always be encoded as ASCII if (input.Length == 0) return new[] {Encoding.ASCII}; // expand the string to be at least 256 bytes if (input.Length < 256) { var newInput = new byte[256]; var steps = 256/input.Length; for (var i = 0; i < steps; i++) Array.Copy(input, 0, newInput, input.Length*i, input.Length); var rest = 256%input.Length; if (rest > 0) Array.Copy(input, 0, newInput, steps*input.Length, rest); input = newInput; } var result = new List<Encoding>(); // get the IMultiLanguage" interface IMultiLanguage2 multilang2 = new CMultiLanguageClass(); if (multilang2 == null) throw new COMException("Failed to get IMultilang2"); try { var detectedEncdings = new DetectEncodingInfo[maxEncodings]; var scores = detectedEncdings.Length; var srcLen = input.Length; // setup options (none) var options = MLDETECTCP.MLDETECTCP_NONE; // finally... call to DetectInputCodepage multilang2.DetectInputCodepage(options, 0, ref input[0], ref srcLen, ref detectedEncdings[0], ref scores); // get result if (scores > 0) { for (var i = 0; i < scores; i++) { // add the result result.Add(Encoding.GetEncoding((int) detectedEncdings[i].nCodePage)); } } } finally { Marshal.FinalReleaseComObject(multilang2); } // nothing found return result.ToArray(); }
public static Encoding[] DetectOutgoingEncodings(string input, int[] preferedEncodings, bool preserveOrder) { if (input == null) throw new ArgumentNullException("input"); // empty strings can always be encoded as ASCII if (input.Length == 0) return new[] {Encoding.ASCII}; var result = new List<Encoding>(); // get the IMultiLanguage3 interface IMultiLanguage3 multilang3 = new CMultiLanguageClass(); if (multilang3 == null) throw new COMException("Failed to get IMultilang3"); try { var resultCodePages = new int[preferedEncodings.Length]; var detectedCodepages = (uint) resultCodePages.Length; ushort specialChar = '?'; // get unmanaged arrays var pPrefEncs = Marshal.AllocCoTaskMem(sizeof (uint)*preferedEncodings.Length); var pDetectedEncs = preferedEncodings == null ? IntPtr.Zero : Marshal.AllocCoTaskMem(sizeof (uint)*resultCodePages.Length); try { if (preferedEncodings != null) Marshal.Copy(preferedEncodings, 0, pPrefEncs, preferedEncodings.Length); Marshal.Copy(resultCodePages, 0, pDetectedEncs, resultCodePages.Length); var options = MLCPF.MLDETECTF_VALID_NLS | MLCPF.MLDETECTF_PREFERRED_ONLY; if (preserveOrder) options |= MLCPF.MLDETECTF_PRESERVE_ORDER; if (preferedEncodings != null) options |= MLCPF.MLDETECTF_PREFERRED_ONLY; // finally... call to DetectOutboundCodePage multilang3.DetectOutboundCodePage(options, input, (uint) input.Length, pPrefEncs, (uint) (preferedEncodings == null ? 0 : preferedEncodings.Length), pDetectedEncs, ref detectedCodepages, ref specialChar); // get result if (detectedCodepages > 0) { var theResult = new int[detectedCodepages]; Marshal.Copy(pDetectedEncs, theResult, 0, theResult.Length); // get the encodings for the codepages for (var i = 0; i < detectedCodepages; i++) result.Add(Encoding.GetEncoding(theResult[i])); } } finally { if (pPrefEncs != IntPtr.Zero) Marshal.FreeCoTaskMem(pPrefEncs); Marshal.FreeCoTaskMem(pDetectedEncs); } } finally { Marshal.FinalReleaseComObject(multilang3); } // nothing found return result.ToArray(); }