public static Encoding DetectEncoding(Stream stream, DetectOption flags) { Encoding encoding; IMultiLanguage2 o = new CMultiLanguageClass() as IMultiLanguage2; if (o == null) { byte[] buffer = new byte[4]; int dataLength = stream.Read(buffer, 0, buffer.Length); return SimpleDetectEncoding(buffer, dataLength); } try { DetectEncodingInfo[] lpEncoding = new DetectEncodingInfo[1]; int length = lpEncoding.Length; ComStream pstmIn = new ComStream(stream); int errorCode = o.DetectCodepageInIStream(((MLDETECTCP) flags) & MLDETECTCP.MLDETECTCP_MASK, 0, pstmIn, lpEncoding, ref length); switch (errorCode) { case 0: case 1: if (length <= 0) { break; } return Encoding.GetEncoding((int) lpEncoding[0].nCodePage); default: throw Marshal.GetExceptionForHR(errorCode); } encoding = null; } finally { Marshal.FinalReleaseComObject(o); } return encoding; }
/// <summary> /// Returns up to maxEncodings codepages that are assumed to be apropriate /// </summary> /// <param name="input">array containing the raw data</param> /// <param name="maxEncodings">maxiumum number of encodings to detect</param> /// <returns>an array of Encoding with assumed encodings</returns> public static Encoding[] DetectInputCodepages(byte[] input, int maxEncodings) { if (maxEncodings < 1) throw new ArgumentOutOfRangeException("maxEncodings", "at least one encoding must be returned"); if (input == null) throw new ArgumentNullException("input"); // empty strings can always be encoded as ASCII if (input.Length == 0) return new Encoding[] { Encoding.ASCII }; // expand the string to be at least 256 bytes if (input.Length < 256) { byte[] newInput = new byte[256]; int steps = 256 / input.Length; for (int i = 0; i < steps; i++) Array.Copy(input, 0, newInput, input.Length * i, input.Length); int rest = 256 % input.Length; if (rest > 0) Array.Copy(input, 0, newInput, steps * input.Length, rest); input = newInput; } List<Encoding> result = new List<Encoding>(); // get the IMultiLanguage" interface IMultiLanguage2 multilang2 = new CMultiLanguageClass(); if (multilang2 == null) throw new COMException("Failed to get IMultilang2"); try { DetectEncodingInfo[] detectedEncdings = new DetectEncodingInfo[maxEncodings]; int scores = detectedEncdings.Length; int srcLen = input.Length; // setup options (none) const MLDETECTCP options = MLDETECTCP.MLDETECTCP_NONE; // finally... call to DetectInputCodepage multilang2.DetectInputCodepage(options, 0, ref input[0], ref srcLen, ref detectedEncdings[0], ref scores); // get result if (scores > 0) { for (int i = 0; i < scores; i++) { // add the result result.Add(Encoding.GetEncoding((int)detectedEncdings[i].nCodePage)); } } } finally { Marshal.FinalReleaseComObject(multilang2); } // nothing found return result.ToArray(); }
public static Encoding[] DetectOutgoingEncodings(string input, int[] preferredEncodings, bool preserveOrder) { if (input == null) throw new ArgumentNullException("input"); // empty strings can always be encoded as ASCII if (input.Length == 0) return new Encoding[] { Encoding.ASCII }; List<Encoding> result = new List<Encoding>(); // get the IMultiLanguage3 interface IMultiLanguage3 multilang3 = new CMultiLanguageClass(); if (multilang3 == null) throw new COMException("Failed to get IMultilang3"); try { int[] resultCodePages = new int[preferredEncodings.Length]; uint detectedCodepages = (uint)resultCodePages.Length; ushort specialChar = (ushort)'?'; // get unmanaged arrays IntPtr pPrefEncs = Marshal.AllocCoTaskMem(sizeof(uint) * preferredEncodings.Length); IntPtr pDetectedEncs = Marshal.AllocCoTaskMem(sizeof(uint) * resultCodePages.Length); try { Marshal.Copy(preferredEncodings, 0, pPrefEncs, preferredEncodings.Length); Marshal.Copy(resultCodePages, 0, pDetectedEncs, resultCodePages.Length); MLCPF options = MLCPF.MLDETECTF_VALID_NLS | MLCPF.MLDETECTF_PREFERRED_ONLY; if (preserveOrder) options |= MLCPF.MLDETECTF_PRESERVE_ORDER; options |= MLCPF.MLDETECTF_PREFERRED_ONLY; // finally... call to DetectOutboundCodePage multilang3.DetectOutboundCodePage(options, input, (uint)input.Length, pPrefEncs, (uint)preferredEncodings.Length, pDetectedEncs, ref detectedCodepages, ref specialChar); // get result if (detectedCodepages > 0) { int[] theResult = new int[detectedCodepages]; Marshal.Copy(pDetectedEncs, theResult, 0, theResult.Length); // get the encodings for the codepages for (int i = 0; i < detectedCodepages; i++) result.Add(Encoding.GetEncoding(theResult[i])); } } finally { if (pPrefEncs != IntPtr.Zero) Marshal.FreeCoTaskMem(pPrefEncs); Marshal.FreeCoTaskMem(pDetectedEncs); } } finally { Marshal.FinalReleaseComObject(multilang3); } // nothing found return result.ToArray(); }
private static Encoding DetectOutgoingEncoding(string input, int[] preferredEncodings, bool preserveOrder) { if (input == null) throw new ArgumentNullException("input"); // empty strings can always be encoded as ASCII if (input.Length == 0) return Encoding.ASCII; Encoding result = Encoding.ASCII; // get the IMultiLanguage3 interface IMultiLanguage3 multilang3 = new CMultiLanguageClass(); if (multilang3 == null) throw new COMException("Failed to get IMultilang3"); try { int[] resultCodePages = new int[preferredEncodings != null ? preferredEncodings.Length : Encoding.GetEncodings().Length]; uint detectedCodepages = (uint)resultCodePages.Length; ushort specialChar = (ushort)'?'; // get unmanaged arrays IntPtr pPrefEncs = preferredEncodings == null ? IntPtr.Zero : Marshal.AllocCoTaskMem(sizeof(uint) * preferredEncodings.Length); IntPtr pDetectedEncs = Marshal.AllocCoTaskMem(sizeof(uint) * resultCodePages.Length); try { if (preferredEncodings != null) Marshal.Copy(preferredEncodings, 0, pPrefEncs, preferredEncodings.Length); Marshal.Copy(resultCodePages, 0, pDetectedEncs, resultCodePages.Length); MLCPF options = MLCPF.MLDETECTF_VALID_NLS; if (preserveOrder) options |= MLCPF.MLDETECTF_PRESERVE_ORDER; if (preferredEncodings != null) options |= MLCPF.MLDETECTF_PREFERRED_ONLY; multilang3.DetectOutboundCodePage(options, input, (uint)input.Length, pPrefEncs, (uint)(preferredEncodings == null ? 0 : preferredEncodings.Length), pDetectedEncs, ref detectedCodepages, ref specialChar); // get result if (detectedCodepages > 0) { int[] theResult = new int[detectedCodepages]; Marshal.Copy(pDetectedEncs, theResult, 0, theResult.Length); result = Encoding.GetEncoding(theResult[0]); } } finally { if (pPrefEncs != IntPtr.Zero) Marshal.FreeCoTaskMem(pPrefEncs); Marshal.FreeCoTaskMem(pDetectedEncs); } } finally { Marshal.FinalReleaseComObject(multilang3); } return result; }
private static Encoding DetectOutgoingEncoding(string input, int[] preferredEncodings, bool preserveOrder) { if (input == null) { throw new ArgumentNullException("input"); } // empty strings can always be encoded as ASCII if (input.Length == 0) { return Encoding.ASCII; } Encoding result = Encoding.ASCII; // get the IMultiLanguage3 interface IMultiLanguage3 multilang3 = new CMultiLanguageClass(); if (multilang3 == null) { throw new COMException("Failed to get IMultilang3"); } try { int[] resultCodePages = new int[preferredEncodings != null ? preferredEncodings.Length : Encoding.GetEncodings().Length]; uint detectedCodepages = (uint)resultCodePages.Length; ushort specialChar = '?'; // get unmanaged arrays IntPtr pPrefEncs = preferredEncodings == null ? IntPtr.Zero : Marshal.AllocCoTaskMem(sizeof(uint) * preferredEncodings.Length); IntPtr pDetectedEncs = Marshal.AllocCoTaskMem(sizeof(uint) * resultCodePages.Length); try { if (preferredEncodings != null) { Marshal.Copy(preferredEncodings, 0, pPrefEncs, preferredEncodings.Length); } Marshal.Copy(resultCodePages, 0, pDetectedEncs, resultCodePages.Length); Mlcpf options = Mlcpf.MldetectfValidNls; if (preserveOrder) { options |= Mlcpf.MldetectfPreserveOrder; } if (preferredEncodings != null) { options |= Mlcpf.MldetectfPreferredOnly; } multilang3.DetectOutboundCodePage(options, input, (uint)input.Length, pPrefEncs, (uint)(preferredEncodings == null ? 0 : preferredEncodings.Length), pDetectedEncs, ref detectedCodepages, ref specialChar); // get result if (detectedCodepages > 0) { int[] theResult = new int[detectedCodepages]; Marshal.Copy(pDetectedEncs, theResult, 0, theResult.Length); result = Encoding.GetEncoding(theResult[0]); } } finally { if (pPrefEncs != IntPtr.Zero) { Marshal.FreeCoTaskMem(pPrefEncs); } Marshal.FreeCoTaskMem(pDetectedEncs); } } finally { Marshal.FinalReleaseComObject(multilang3); } return result; }
public static Encoding[] DetectOutgoingEncodings(string input, int[] preferredEncodings, bool preserveOrder) { if (input == null) { throw new ArgumentNullException("input"); } // empty strings can always be encoded as ASCII if (input.Length == 0) { return new Encoding[] { Encoding.ASCII }; } List<Encoding> result = new List<Encoding>(); // get the IMultiLanguage3 interface IMultiLanguage3 multilang3 = new CMultiLanguageClass(); if (multilang3 == null) { throw new COMException("Failed to get IMultilang3"); } try { int[] resultCodePages = new int[preferredEncodings.Length]; uint detectedCodepages = (uint)resultCodePages.Length; ushort specialChar = '?'; // get unmanaged arrays IntPtr pPrefEncs = Marshal.AllocCoTaskMem(sizeof(uint) * preferredEncodings.Length); IntPtr pDetectedEncs = Marshal.AllocCoTaskMem(sizeof(uint) * resultCodePages.Length); try { Marshal.Copy(preferredEncodings, 0, pPrefEncs, preferredEncodings.Length); Marshal.Copy(resultCodePages, 0, pDetectedEncs, resultCodePages.Length); Mlcpf options = Mlcpf.MldetectfValidNls | Mlcpf.MldetectfPreferredOnly; if (preserveOrder) { options |= Mlcpf.MldetectfPreserveOrder; } options |= Mlcpf.MldetectfPreferredOnly; // finally... call to DetectOutboundCodePage multilang3.DetectOutboundCodePage(options, input, (uint)input.Length, pPrefEncs, (uint)preferredEncodings.Length, pDetectedEncs, ref detectedCodepages, ref specialChar); // get result if (detectedCodepages > 0) { int[] theResult = new int[detectedCodepages]; Marshal.Copy(pDetectedEncs, theResult, 0, theResult.Length); // get the encodings for the codepages for (int i = 0; i < detectedCodepages; i++) { result.Add(Encoding.GetEncoding(theResult[i])); } } } finally { if (pPrefEncs != IntPtr.Zero) { Marshal.FreeCoTaskMem(pPrefEncs); } Marshal.FreeCoTaskMem(pDetectedEncs); } } finally { Marshal.FinalReleaseComObject(multilang3); } // nothing found return result.ToArray(); }
/// <summary> /// Rerurns up to maxEncodings codpages that are assumed to be apropriate /// </summary> /// <param name="input">array containing the raw data</param> /// <param name="maxEncodings">maxiumum number of encodings to detect</param> /// <returns>an array of Encoding with assumed encodings</returns> public static Encoding[] DetectInputCodepages(byte[] input, int maxEncodings) { if (maxEncodings < 1) { throw new ArgumentOutOfRangeException("maxEncodings", "at least one encoding must be returned"); } if (input == null) { throw new ArgumentNullException("input"); } // empty strings can always be encoded as ASCII if (input.Length == 0) { return new Encoding[] { Encoding.ASCII }; } // expand the string to be at least 256 bytes if (input.Length < 256) { byte[] newInput = new byte[256]; int steps = 256 / input.Length; for (int i = 0; i < steps; i++) { Array.Copy(input, 0, newInput, input.Length * i, input.Length); } int rest = 256 % input.Length; if (rest > 0) { Array.Copy(input, 0, newInput, steps * input.Length, rest); } input = newInput; } List<Encoding> result = new List<Encoding>(); // get the IMultiLanguage" interface IMultiLanguage2 multilang2 = new CMultiLanguageClass(); if (multilang2 == null) { throw new COMException("Failed to get IMultilang2"); } try { DetectEncodingInfo[] detectedEncdings = new DetectEncodingInfo[maxEncodings]; int scores = detectedEncdings.Length; int srcLen = input.Length; // setup options (none) const Mldetectcp options = Mldetectcp.MldetectcpNone; // finally... call to DetectInputCodepage multilang2.DetectInputCodepage(options, 0, ref input[0], ref srcLen, ref detectedEncdings[0], ref scores); // get result if (scores > 0) { for (int i = 0; i < scores; i++) { // add the result result.Add(Encoding.GetEncoding((int)detectedEncdings[i].nCodePage)); } } } finally { Marshal.FinalReleaseComObject(multilang2); } // nothing found return result.ToArray(); }
private static Encoding[] FindEncodings(string input, int[] preferredEncodings, bool preserveOrder) { // empty strings can always be encoded as ASCII if (string.IsNullOrEmpty(input)) { return(new[] { Default }); } bool bPrefEnc = !preferredEncodings.IsNullOrEmpty(); List <Encoding> result = new List <Encoding>(); // get the IMultiLanguage3 interface IMultiLanguage3 multiLang3 = new CMultiLanguageClass(); if (multiLang3 == null) { throw new COMException("Failed to get " + nameof(IMultiLanguage3)); } try { int count = bPrefEnc ? preferredEncodings.Length : SystemEncodingCount; int[] resultCodePages = new int[count]; uint detectedCodePages = (uint)resultCodePages.Length; ushort specialChar = '?'; // get unmanaged arrays IntPtr preferred = bPrefEnc ? Marshal.AllocCoTaskMem(sizeof(uint) * preferredEncodings.Length) : IntPtr.Zero; IntPtr detected = Marshal.AllocCoTaskMem(sizeof(uint) * resultCodePages.Length); try { if (bPrefEnc) { Marshal.Copy(preferredEncodings, 0, preferred, preferredEncodings.Length); } Marshal.Copy(resultCodePages, 0, detected, resultCodePages.Length); MLCPF options = MLCPF.MLDETECTF_VALID_NLS; if (preserveOrder) { options |= MLCPF.MLDETECTF_PRESERVE_ORDER; } if (bPrefEnc) { options |= MLCPF.MLDETECTF_PREFERRED_ONLY; } // finally... call to DetectOutboundCodePage multiLang3.DetectOutboundCodePage(options, input, (uint)input.Length, preferred, (uint)(bPrefEnc ? preferredEncodings.Length : 0), detected, ref detectedCodePages, ref specialChar); // get result if (detectedCodePages > 0) { int[] theResult = new int[detectedCodePages]; Marshal.Copy(detected, theResult, 0, theResult.Length); // get the encodings for the code pages for (int i = 0; i < detectedCodePages; i++) { result.Add(Encoding.GetEncoding(theResult[i])); } } } finally { if (!preferred.IsZero()) { Marshal.FreeCoTaskMem(preferred); } Marshal.FreeCoTaskMem(detected); } } finally { Marshal.FinalReleaseComObject(multiLang3); } return(result.ToArray()); }
public static Encoding[] GetEncodings(byte[] input, int maxEncodings) { if (input.IsNullOrEmpty()) { return(new[] { Default }); } if (maxEncodings < 1) { maxEncodings = 1; } // expand the string to be at least 256 bytes if (input.Length < 256) { byte[] newInput = new byte[256]; int steps = 256 / input.Length; for (int i = 0; i < steps; i++) { Array.Copy(input, 0, newInput, input.Length * i, input.Length); } int rest = 256 % input.Length; if (rest > 0) { Array.Copy(input, 0, newInput, steps * input.Length, rest); } input = newInput; } List <Encoding> result = new List <Encoding>(); // get the IMultiLanguage" interface IMultiLanguage2 multiLang2 = new CMultiLanguageClass(); if (multiLang2 == null) { throw new COMException("Failed to get " + nameof(IMultiLanguage2)); } try { DetectEncodingInfo[] detectedEncodings = new DetectEncodingInfo[maxEncodings]; int scores = detectedEncodings.Length; int srcLen = input.Length; // finally... call to DetectInputCodepage multiLang2.DetectInputCodepage(MLDETECTCP.MLDETECTCP_NONE, 0, ref input[0], ref srcLen, ref detectedEncodings[0], ref scores); // get result if (scores > 0) { for (int i = 0; i < scores; i++) { result.Add(Encoding.GetEncoding((int)detectedEncodings[i].nCodePage)); } } } finally { Marshal.FinalReleaseComObject(multiLang2); } return(result.ToArray()); }
public static Encoding DetectEncoding(byte[] data, int dataLength, DetectOption flags) { Encoding encoding2; if (data == null) { throw new ArgumentNullException("data"); } if (data.Length < dataLength) { throw new ArgumentException("data length is less than dataLength"); } if (dataLength == 0) { return null; } if ((flags & DetectOption.TrySimpleDetectFirst) > DetectOption.Default) { Encoding encoding = SimpleDetectEncoding(data, dataLength); if (encoding != null) { return encoding; } } IMultiLanguage2 o = new CMultiLanguageClass() as IMultiLanguage2; if (o == null) { return (((flags & DetectOption.TrySimpleDetectFirst) > DetectOption.Default) ? null : SimpleDetectEncoding(data, dataLength)); } try { DetectEncodingInfo[] lpEncoding = new DetectEncodingInfo[1]; int length = lpEncoding.Length; switch (o.DetectInputCodepage(((MLDETECTCP) flags) & MLDETECTCP.MLDETECTCP_MASK, 0, data, ref dataLength, lpEncoding, ref length)) { case 0: case 1: if (length <= 0) { break; } return Encoding.GetEncoding((int) lpEncoding[0].nCodePage); } encoding2 = null; } finally { Marshal.FinalReleaseComObject(o); } return encoding2; }