/// <summary> /// Rerurns up to maxEncodings codpages that are assumed to be apropriate /// </summary> /// <param name="input">array containing the raw data</param> /// <param name="maxEncodings">maxiumum number of encodings to detect</param> /// <returns>an array of Encoding with assumed encodings</returns> public static Encoding[] DetectInputCodepages(byte[] input, int maxEncodings) { if (maxEncodings < 1) throw new ArgumentOutOfRangeException("maxEncodings", "at least one encoding must be returned"); if (input == null) throw new ArgumentNullException("input"); // empty strings can always be encoded as ASCII if (input.Length == 0) return new Encoding[] { Encoding.ASCII }; // expand the string to be at least 256 bytes if (input.Length < 256) { byte[] newInput = new byte[256]; int steps = 256 / input.Length; for (int i = 0; i < steps; i++) Array.Copy(input, 0, newInput, input.Length * i, input.Length); int rest = 256 % input.Length; if (rest > 0) Array.Copy(input, 0, newInput, steps * input.Length, rest); input = newInput; } List<Encoding> result = new List<Encoding>(); // get the IMultiLanguage" interface IMultiLanguage2 multilang2 = new CMultiLanguageClass(); if (multilang2 == null) throw new COMException("Failed to get IMultilang2"); try { DetectEncodingInfo[] detectedEncdings = new DetectEncodingInfo[maxEncodings]; int scores = detectedEncdings.Length; int srcLen = input.Length; // setup options (none) const MLDETECTCP options = MLDETECTCP.MLDETECTCP_NONE; // finally... call to DetectInputCodepage multilang2.DetectInputCodepage(options, 0, ref input[0], ref srcLen, ref detectedEncdings[0], ref scores); // get result if (scores > 0) { for (int i = 0; i < scores; i++) { // add the result result.Add(Encoding.GetEncoding((int)detectedEncdings[i].nCodePage)); } } } finally { Marshal.FinalReleaseComObject(multilang2); } // nothing found return result.ToArray(); }
public virtual extern void DetectInputCodepage([In] MLDETECTCP flags, [In] uint dwPrefWinCodePage, [In] ref byte pSrcStr, [In, Out] ref int pcSrcSize, [In, Out] ref DetectEncodingInfo lpEncoding, [In, Out] ref int pnScores);
public virtual extern void IMultiLanguage3DetectInputCodepage([In] uint dwFlag, [In] uint dwPrefWinCodePage, [In] ref sbyte pSrcStr, [In, Out] ref int pcSrcSize, [In, Out] ref DetectEncodingInfo lpEncoding, [In, Out] ref int pnScores);
public virtual extern void DetectCodepageInIStream([In] MLDETECTCP flags, [In] uint dwPrefWinCodePage, [In, MarshalAs(UnmanagedType.Interface)] IStream pstmIn, [In, Out] ref DetectEncodingInfo lpEncoding, [In, Out] ref int pnScores);
public virtual extern void IMultiLanguage3DetectCodepageInIStream([In] uint dwFlag, [In] uint dwPrefWinCodePage, [In, MarshalAs(UnmanagedType.Interface)] IStream pstmIn, [In, Out] ref DetectEncodingInfo lpEncoding, [In, Out] ref int pnScores);