/// <summary> /// Rerurns up to maxEncodings codpages that are assumed to be apropriate /// </summary> /// <param name="input">array containing the raw data</param> /// <param name="maxEncodings">maxiumum number of encodings to detect</param> /// <returns>an array of Encoding with assumed encodings</returns> public static Encoding[] DetectInputCodepages(byte[] input, int maxEncodings) { if (maxEncodings < 1) { throw new ArgumentOutOfRangeException("maxEncodings", "at least one encoding must be returned"); } if (input == null) { throw new ArgumentNullException("input"); } // empty strings can always be encoded as ASCII if (input.Length == 0) { return new Encoding[] { Encoding.ASCII } } ; // expand the string to be at least 256 bytes if (input.Length < 256) { byte[] newInput = new byte[256]; int steps = 256 / input.Length; for (int i = 0; i < steps; i++) { Array.Copy(input, 0, newInput, input.Length * i, input.Length); } int rest = 256 % input.Length; if (rest > 0) { Array.Copy(input, 0, newInput, steps * input.Length, rest); } input = newInput; } List <Encoding> result = new List <Encoding>(); // get the IMultiLanguage" interface MultiLanguage.IMultiLanguage2 multilang2 = new MultiLanguage.CMultiLanguageClass(); if (multilang2 == null) { throw new COMException("Failed to get IMultilang2"); } try { MultiLanguage.DetectEncodingInfo[] detectedEncdings = new MultiLanguage.DetectEncodingInfo[maxEncodings]; int scores = detectedEncdings.Length; int srcLen = input.Length; // setup options (none) MultiLanguage.MLDETECTCP options = MultiLanguage.MLDETECTCP.MLDETECTCP_NONE; // finally... call to DetectInputCodepage multilang2.DetectInputCodepage(options, 0, ref input[0], ref srcLen, ref detectedEncdings[0], ref scores); // get result if (scores > 0) { for (int i = 0; i < scores; i++) { // add the result result.Add(Encoding.GetEncoding((int)detectedEncdings[i].nCodePage)); } } } finally { Marshal.FinalReleaseComObject(multilang2); } // nothing found return(result.ToArray()); }
/// <summary> /// Rerurns up to maxEncodings codpages that are assumed to be apropriate /// </summary> /// <param name="input">array containing the raw data</param> /// <param name="maxEncodings">maxiumum number of encodings to detect</param> /// <returns>an array of Encoding with assumed encodings</returns> public static Encoding[] DetectInputCodepages(byte[] input, int maxEncodings, uint preferedCodePage) { if (maxEncodings < 1) { throw new ArgumentOutOfRangeException("at least one encoding must be returend", "maxEncodings"); } if (input == null) { throw new ArgumentNullException("input"); } // empty strings can always be encoded as ASCII if (input.Length == 0) { return new Encoding[] { Encoding.ASCII } } ; // expand the string to be at least 256 bytes if (input.Length < 256) { byte[] newInput = new byte[256]; int steps = 256 / input.Length; for (int i = 0; i < steps; i++) { Array.Copy(input, 0, newInput, input.Length * i, input.Length); } int rest = 256 % input.Length; if (rest > 0) { Array.Copy(input, 0, newInput, steps * input.Length, rest); } input = newInput; } List <Encoding> result = new List <Encoding>(); // get the IMultiLanguage" interface MultiLanguage.IMultiLanguage3 multilang3 = new MultiLanguage.CMultiLanguageClass(); if (multilang3 == null) { throw new System.Runtime.InteropServices.COMException("Failed to get IMultilang3"); } try { MultiLanguage.DetectEncodingInfo[] detectedEncdings; detectedEncdings = new MultiLanguage.DetectEncodingInfo[maxEncodings]; int scores = detectedEncdings.Length; int srcLen = input.Length; // setup options (none) MultiLanguage.MLDETECTCP options = MultiLanguage.MLDETECTCP.MLDETECTCP_NONE; // finally... call to DetectInputCodepage // get unmanaged arrays IntPtr pPrefEncs = Marshal.AllocCoTaskMem(sizeof(uint) * PreferedEncodings.Length); if (PreferedEncodings != null) { Marshal.Copy(PreferedEncodings, 0, pPrefEncs, PreferedEncodings.Length); } MultiLanguage.HRESULT hres = MultiLanguage.HRESULT.E_FAIL; //we need an array of signed bytes... sbyte[] sbInput = null; sbInput = new SByte[srcLen]; for (int i = 0; i < srcLen; ++i) { sbInput[i] = (SByte)input[i]; } try { hres = multilang3.DetectInputCodepage( options, preferedCodePage, ref sbInput[0], ref srcLen, ref detectedEncdings[0], ref scores ); } catch (COMException exc) { MessageBox.Show(exc.Message); } //hres = multilang3.DetectInputCodepage( // options, // preferedCodePage, // ref input[0], // ref srcLen, // ref detectedEncdings[0], // ref scores // ); if (hres != MultiLanguage.HRESULT.S_OK) { MessageBox.Show("Error 3 ! HRESULT=" + hres.ToString()); } // get result if (scores > 0) { for (int i = 0; i < scores; i++) { // add the result result.Add(Encoding.GetEncoding((int)detectedEncdings[i].nCodePage)); } } } finally { Marshal.FinalReleaseComObject(multilang3); } // nothing found return(result.ToArray()); }