/// <summary>
        /// 获取字节流编码
        /// </summary>
        /// <param name="stream">字节流</param>
        /// <returns></returns>
        private static Encoding GetEncoding(Stream stream)
        {
            if (stream != null && stream.Length > 0)
            {
                //每次分配1024字节,进行编码判断
                var buffer = new byte[1024];

                var seek = stream.Position;
                stream.Seek(0, SeekOrigin.Begin);

                var ud = new UniversalDetector(null);
                while (!ud.IsDone() && stream.Read(buffer, 0, buffer.Length) > 0)
                {
                    ud.HandleData(buffer, 0, buffer.Length);
                }
                ud.DataEnd();

                stream.Seek(seek, SeekOrigin.Begin);

                var encoding = ud.GetDetectedCharset();
                if (encoding != null)
                {
                    if (encoding == Constants.CHARSET_X_ISO_10646_UCS_4_2143 || encoding == Constants.CHARSET_X_ISO_10646_UCS_4_3412)
                    {
                        encoding = "UTF-32";
                    }

                    return(Encoding.GetEncoding(encoding));
                }
            }

            return(Encoding.Default);
        }
示例#2
0
        public static Encoding Detect(Stream seekable_stream)
        {
            if (!seekable_stream.CanSeek)
            {
                throw new Exception("Detect encoding error: stream can't seek.");
            }

            long ori_pos = seekable_stream.Position;

            int buffer_size = 4096, cur;

            byte[]            buffer   = new byte[buffer_size];
            UniversalDetector detector = new UniversalDetector(null);

            while ((cur = seekable_stream.Read(buffer, 0, buffer_size)) > 0 && !detector.IsDone())
            {
                detector.HandleData(buffer, 0, cur);
            }
            detector.DataEnd();

            seekable_stream.Seek(ori_pos, SeekOrigin.Begin);

            if (detector.IsDone())
            {
                return(Encoding.GetEncoding(detector.GetDetectedCharset()));
            }
            return(null);
        }
        public Encoding ResolveFileEncoding(string filePath)
        {
            var bytes   = File.ReadAllBytes(filePath);
            var encoder = new UniversalDetector();

            encoder.HandleData(bytes);
            encoder.DataEnd();
            var charset = encoder.DetectedCharsetName;

            return(charset == null ? defaultEncoding : Encoding.GetEncoding(charset));
        }
示例#4
0
        private static string DetectEncoding_Bytes(byte[] DetectBuff, int DetectLen)
        {
            UniversalDetector Det = new UniversalDetector(null);

            Det.HandleData(DetectBuff, 0, DetectLen);
            Det.DataEnd();
            if (Det.GetDetectedCharset() != null)
            {
                return(Det.GetDetectedCharset());
            }
            return("default");
        }
示例#5
0
        private void DetectedCharset(Stream stream, out string htmlText, out Encoding enc)
        {
            htmlText = "";
            enc      = Encoding.Default;
            try {
                Stream mystream = stream;
                if (stream == null)
                {
                    return;
                }
                MemoryStream msTemp = new MemoryStream();
                int          len    = 0;
                byte[]       buff   = new byte[512];

                while ((len = mystream.Read(buff, 0, 512)) > 0)
                {
                    msTemp.Write(buff, 0, len);
                }

                if (msTemp.Length > 0)
                {
                    msTemp.Seek(0, SeekOrigin.Begin);
                    byte[] PageBytes = new byte[msTemp.Length];
                    msTemp.Read(PageBytes, 0, PageBytes.Length);

                    msTemp.Seek(0, SeekOrigin.Begin);
                    int               DetLen     = 0;
                    byte[]            DetectBuff = new byte[4096];
                    CharsetListener   listener   = new CharsetListener();
                    UniversalDetector Det        = new UniversalDetector(null);
                    while ((DetLen = msTemp.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone())
                    {
                        Det.HandleData(DetectBuff, 0, DetectBuff.Length);
                    }
                    Det.DataEnd();
                    if (Det.GetDetectedCharset() != null)
                    {
                        /*网页内容编码*/
                        enc = Encoding.GetEncoding(Det.GetDetectedCharset());
                        /*解码后的内容*/
                        htmlText = System.Text.Encoding.GetEncoding(Det.GetDetectedCharset()).GetString(PageBytes);
                    }
                }
            } catch { }
        }
示例#6
0
 //识别一个文本文件的字符集
 public static string GetCharSet(string filename)
 {
     try
     {
         byte[]       pReadByte = new byte[0];
         FileStream   fs        = new FileStream(filename, FileMode.Open, FileAccess.Read);
         BinaryReader r         = new BinaryReader(fs);
         r.BaseStream.Seek(0, SeekOrigin.Begin);    //将文件指针设置到文件开
         pReadByte = r.ReadBytes((int)r.BaseStream.Length);
         UniversalDetector Det = new UniversalDetector(null);
         Det.HandleData(pReadByte, 0, pReadByte.Length);
         Det.DataEnd();
         return(Det.GetDetectedCharset());
     }
     catch
     {
         return(null);
     }
 }
示例#7
0
        /// <summary>
        /// UniversalCharDet算法识别编码
        /// </summary>
        /// <param name="bytes"></param>
        /// <returns></returns>
        private Encoding GetEncodingByUniversalCharDet(byte[] bytes)
        {
            var detector     = new UniversalDetector(null);
            var detectBuffer = new byte[4096];

            while (this.MemoryStream.Read(detectBuffer, 0, detectBuffer.Length) > 0 && !detector.IsDone())
            {
                detector.HandleData(detectBuffer, 0, detectBuffer.Length);
            }

            detector.DataEnd();

            if (!string.IsNullOrEmpty(detector.GetDetectedCharset()))
            {
                return(Encoding.GetEncoding(detector.GetDetectedCharset()));
            }

            return(null);
        }
示例#8
0
        public static string DetectAndReadToEnd(Stream stream, Encoding default_encoding)
        {
            var ms = new MemoryStream();

            int buffer_size = 4096, cur;

            byte[]            buffer      = new byte[buffer_size];
            bool              detect_done = false;
            UniversalDetector detector    = new UniversalDetector(null);

            while ((cur = stream.Read(buffer, 0, buffer_size)) > 0)
            {
                ms.Write(buffer, 0, cur);
                if (!detect_done)
                {
                    detector.HandleData(buffer, 0, cur);
                    detect_done = detector.IsDone();
                }
            }
            detector.DataEnd();

            Encoding encoding;

            if (detect_done)
            {
                encoding = Encoding.GetEncoding(detector.GetDetectedCharset());
            }
            else if (default_encoding != null)
            {
                encoding = default_encoding;
            }
            else
            {
                encoding = Default;
            }

            ms.Seek(0, SeekOrigin.Begin);

            using (var sr = new StreamReader(ms, encoding))
                return(sr.ReadToEnd());
        }
示例#9
0
        public static string Read_File(FileInfo file)
        {
            string       tmp_result = "";
            Stream       mystream   = file.OpenRead();
            MemoryStream msTemp     = new MemoryStream();
            int          len        = 0;

            byte[] buff = new byte[512];

            while ((len = mystream.Read(buff, 0, 512)) > 0)
            {
                msTemp.Write(buff, 0, len);
            }

            if (msTemp.Length > 0)
            {
                msTemp.Seek(0, SeekOrigin.Begin);
                byte[] PageBytes = new byte[msTemp.Length];
                msTemp.Read(PageBytes, 0, PageBytes.Length);

                msTemp.Seek(0, SeekOrigin.Begin);
                int               DetLen     = 0;
                byte[]            DetectBuff = new byte[4096];
                UniversalDetector Det        = new UniversalDetector(null);
                while ((DetLen = msTemp.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone())
                {
                    Det.HandleData(DetectBuff, 0, DetectBuff.Length);
                }
                Det.DataEnd();
                if (Det.GetDetectedCharset() != null)
                {
                    tmp_result = System.Text.Encoding.GetEncoding(Det.GetDetectedCharset()).GetString(PageBytes);
                }
                else
                {
                    EchoHelper.Echo("编码识别失败,请手工转码为UTF8保存到任务文件夹。文件:" + file.Name.ToLower(), "编码识别", EchoHelper.EchoType.任务信息);
                }
            }
            return(tmp_result);
        }
示例#10
0
        private string GetResourceText(KFN.ResourceFile resource)
        {
            byte[] data = KFN.GetDataFromResource(resource);

            ////UTF-8
            int detEncoding       = 65001;
            UniversalDetector Det = new UniversalDetector(null);

            Det.HandleData(data, 0, data.Length);
            Det.DataEnd();
            string enc = Det.GetDetectedCharset();

            if (enc != null && enc != "Not supported")
            {
                // fix encoding for 1251 upper case and MAC
                //if (enc == "KOI8-R" || enc == "X-MAC-CYRILLIC") { enc = "WINDOWS-1251"; }
                Encoding denc = Encoding.GetEncoding(enc);
                detEncoding = denc.CodePage;
            }

            return(new string(Encoding.GetEncoding(detEncoding).GetChars(data)));
        }
示例#11
0
        public void ViewResourceButtonClick(object sender, RoutedEventArgs e)
        {
            KFN.ResourceFile resource = resourcesView.SelectedItem as KFN.ResourceFile;

            if (resource.FileType == "Text")
            {
                byte[] data = KFN.GetDataFromResource(resource);

                ////UTF-8
                int detEncoding       = 65001;
                UniversalDetector Det = new UniversalDetector(null);
                Det.HandleData(data, 0, data.Length);
                Det.DataEnd();
                string enc = Det.GetDetectedCharset();
                if (enc != null && enc != "Not supported")
                {
                    // fix encoding for 1251 upper case and MAC
                    //if (enc == "KOI8-R" || enc == "X-MAC-CYRILLIC") { enc = "WINDOWS-1251"; }
                    Encoding denc = Encoding.GetEncoding(enc);
                    detEncoding = denc.CodePage;
                }

                string text       = new string(Encoding.GetEncoding(detEncoding).GetChars(data));
                Window viewWindow = new ViewWindow(
                    resource.FileName,
                    text,
                    Encoding.GetEncodings().Where(en => en.CodePage == detEncoding).First().DisplayName
                    );
                viewWindow.Show();
            }
            else if (resource.FileType == "Image")
            {
                byte[] data = KFN.GetDataFromResource(resource);

                Window viewWindow = new ImageWindow(resource.FileName, data);
                viewWindow.Show();
            }
        }
示例#12
0
        /// <summary>
        /// 解析编码并获得字符串
        /// </summary>
        /// <param name="buffer"></param>
        /// <returns></returns>
        public string GetString(byte[] buffer)
        {
            string result = string.Empty;

            if (buffer == null)
            {
                return(result);
            }

            using (MemoryStream msTemp = new MemoryStream(buffer))
            {
                if (msTemp.Length > 0)
                {
                    msTemp.Seek(0, SeekOrigin.Begin);
                    int    DetLen     = 0;
                    byte[] DetectBuff = new byte[4096];

                    UniversalDetector det = new UniversalDetector(null);
                    while ((DetLen = msTemp.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !det.IsDone())
                    {
                        det.HandleData(DetectBuff, 0, DetectBuff.Length);
                    }
                    det.DataEnd();
                    if (det.GetDetectedCharset() != null)
                    {
                        try
                        {
                            result = System.Text.Encoding.GetEncoding(det.GetDetectedCharset()).GetString(buffer);
                        }
                        catch (ArgumentException)
                        {
                        }
                    }
                }
            }

            return(result);
        }
示例#13
0
        /// <summary>
        /// 返回流的编码格式
        /// </summary>
        /// <param name="stream"></param>
        /// <returns></returns>
        private static Encoding getEncoding(string streamName)
        {
            Encoding encoding = Encoding.Default;

            using (Stream stream = new FileStream(streamName, FileMode.Open))
            {
                MemoryStream msTemp = new MemoryStream();
                int          len    = 0;
                byte[]       buff   = new byte[512];
                while ((len = stream.Read(buff, 0, 512)) > 0)
                {
                    msTemp.Write(buff, 0, len);
                }
                if (msTemp.Length > 0)
                {
                    msTemp.Seek(0, SeekOrigin.Begin);
                    byte[] PageBytes = new byte[msTemp.Length];
                    msTemp.Read(PageBytes, 0, PageBytes.Length);
                    msTemp.Seek(0, SeekOrigin.Begin);
                    int DetLen                   = 0;
                    UniversalDetector Det        = new UniversalDetector(null);
                    byte[]            DetectBuff = new byte[4096];
                    while ((DetLen = msTemp.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone())
                    {
                        Det.HandleData(DetectBuff, 0, DetectBuff.Length);
                    }
                    Det.DataEnd();
                    if (Det.GetDetectedCharset() != null)
                    {
                        encoding = Encoding.GetEncoding(Det.GetDetectedCharset());
                    }
                }
                msTemp.Close();
                msTemp.Dispose();
                return(encoding);
            }
        }
示例#14
0
        static void convert(string file)
        {
            string stringEncoded = null;

            using (BinaryReader br = new BinaryReader(File.OpenRead(file)))
            {
                int               length    = (int)br.BaseStream.Length;
                byte[]            buffer    = br.ReadBytes(length);
                UniversalDetector uDetecter = new UniversalDetector(null);

                uDetecter.HandleData(buffer, 0, length);

                uDetecter.DataEnd();
                string detectedCharset = uDetecter.GetDetectedCharset();
                if (string.IsNullOrEmpty(detectedCharset))
                {
                    Console.WriteLine("Warning: {0} not detected", file);
                }
                else
                {
                    Console.WriteLine("Detected: {0} - {1}", file, detectedCharset);
                    if (detectedCharset != "UTF-8")
                    {
                        Encoding encoding = Encoding.GetEncoding(detectedCharset);
                        stringEncoded = encoding.GetString(buffer);
                    }
                }
            }

            if (!string.IsNullOrEmpty(stringEncoded))
            {
                using (StreamWriter sw = new StreamWriter(File.Open(file, FileMode.Create), Encoding.UTF8))
                {
                    sw.Write(stringEncoded);
                }
            }
        }
        static void ProcessFile(String filePath)
        {
            var fileStream = new FileStream(filePath, FileMode.Open);

            if (fileStream.Length > 0)
            {
                fileStream.Seek(0, SeekOrigin.Begin);
                var pageBytes = new Byte[fileStream.Length];
                fileStream.Read(pageBytes, 0, pageBytes.Length);

                fileStream.Seek(0, SeekOrigin.Begin);
                var detectionLength   = 0;
                var detectionBuffer   = new Byte[4096];
                var universalDetector = new UniversalDetector(null);

                while ((detectionLength = fileStream.Read(detectionBuffer, 0, detectionBuffer.Length)) > 0 && !universalDetector.IsDone())
                {
                    universalDetector.HandleData(detectionBuffer, 0, detectionBuffer.Length);
                }

                universalDetector.DataEnd();

                if (universalDetector.GetDetectedCharset() != null)
                {
                    Console.WriteLine("Charset: " + universalDetector.GetDetectedCharset() + ". Encoding: " + System.Text.Encoding.GetEncoding(universalDetector.GetDetectedCharset()).EncodingName);
                    Console.WriteLine();
                }
                else
                {
                    Console.WriteLine("Charset: " + "ASCII" + ". Encoding: " + System.Text.Encoding.GetEncoding("ASCII"));
                    Console.WriteLine();
                }
            }

            fileStream.Dispose();
        }
示例#16
0
    public void ReadFile(int filesEncoding = 0)
    {
        this.error = null;
        this.properties.Clear();
        this.unknownProperties.Clear();
        this.resources.Clear();

        using (FileStream fs = new FileStream(this.fullFileName, FileMode.Open, FileAccess.Read))
        {
            byte[] signature = new byte[4];
            fs.Read(signature, 0, signature.Length);
            string sign = new string(Encoding.UTF8.GetChars(signature));
            if (sign != "KFNB")
            {
                this.error = "Invalid KFN signature!";
                return;
            }

            byte[] prop      = new byte[5];
            byte[] propValue = new byte[4];
            int    maxProps  = 40;
            while (maxProps > 0)
            {
                fs.Read(prop, 0, prop.Length);
                string propName = new string(Encoding.UTF8.GetChars(new ArraySegment <byte>(prop, 0, 4).ToArray()));
                if (propName == "ENDH")
                {
                    fs.Position += 4;
                    break;
                }
                string SpropName = this.GetPropDesc(propName);
                if (prop[4] == 1)
                {
                    fs.Read(propValue, 0, propValue.Length);
                    if (SpropName == "Genre" && BitConverter.ToUInt32(propValue, 0) == 0xffffffff)
                    {
                        this.properties.Add(SpropName, "Not set");
                    }
                    else
                    {
                        if (SpropName.Contains("unknown"))
                        {
                            this.unknownProperties.Add(SpropName + ": " + BitConverter.ToUInt32(propValue, 0));
                        }
                        if (propName != SpropName)
                        {
                            this.properties.Add(SpropName, BitConverter.ToUInt32(propValue, 0).ToString());
                        }
                    }
                }
                else if (prop[4] == 2)
                {
                    fs.Read(propValue, 0, propValue.Length);
                    byte[] value = new byte[BitConverter.ToUInt32(propValue, 0)];
                    fs.Read(value, 0, value.Length);
                    if (SpropName == "AES-ECB-128 Key")
                    {
                        string val = (value.Select(b => (int)b).Sum() == 0)
                            ? "Not present"
                            : value.Select(b => b.ToString("X2")).Aggregate((s1, s2) => s1 + s2);
                        this.properties.Add(SpropName, val);
                    }
                    else
                    {
                        if (SpropName.Contains("unknown"))
                        {
                            this.unknownProperties.Add(SpropName + ": " + new string(Encoding.UTF8.GetChars(value)));
                        }
                        if (propName != SpropName)
                        {
                            this.properties.Add(SpropName, new string(Encoding.UTF8.GetChars(value)));
                        }
                    }
                }
                else
                {
                    this.error = "unknown property block type - " + prop[4];
                    return;
                }
                maxProps--;
            }
            this.endOfPropsOffset = fs.Position;

            byte[] numOfResources = new byte[4];
            fs.Read(numOfResources, 0, numOfResources.Length);
            int resourcesCount = BitConverter.ToInt32(numOfResources, 0);
            while (resourcesCount > 0)
            {
                byte[] resourceNameLenght      = new byte[4];
                byte[] resourceType            = new byte[4];
                byte[] resourceLenght          = new byte[4];
                byte[] resourceEncryptedLenght = new byte[4];
                byte[] resourceOffset          = new byte[4];
                byte[] resourceEncrypted       = new byte[4];

                fs.Read(resourceNameLenght, 0, resourceNameLenght.Length);
                byte[] resourceName = new byte[BitConverter.ToUInt32(resourceNameLenght, 0)];
                fs.Read(resourceName, 0, resourceName.Length);
                fs.Read(resourceType, 0, resourceType.Length);
                fs.Read(resourceLenght, 0, resourceLenght.Length);
                fs.Read(resourceOffset, 0, resourceOffset.Length);
                fs.Read(resourceEncryptedLenght, 0, resourceEncryptedLenght.Length);
                fs.Read(resourceEncrypted, 0, resourceEncrypted.Length);
                int encrypted = BitConverter.ToInt32(resourceEncrypted, 0);

                if (filesEncoding == 0 && resourceNamesEncodingAuto == 20127)
                {
                    UniversalDetector Det = new UniversalDetector(null);
                    Det.HandleData(resourceName, 0, resourceName.Length);
                    Det.DataEnd();
                    string enc = Det.GetDetectedCharset();
                    if (enc != null && enc != "Not supported")
                    {
                        // fix encoding for 1251 upper case and MAC
                        if (enc == "KOI8-R" || enc == "X-MAC-CYRILLIC")
                        {
                            enc = "WINDOWS-1251";
                        }
                        Encoding denc = Encoding.GetEncoding(enc);
                        resourceNamesEncodingAuto = denc.CodePage;
                        this.autoDetectEncoding   = denc.CodePage + ": " + denc.EncodingName;
                    }
                    else if (enc == null)
                    {
                        Encoding denc = Encoding.GetEncoding(resourceNamesEncodingAuto);
                        this.autoDetectEncoding = denc.CodePage + ": " + denc.EncodingName;
                    }
                    else
                    {
                        this.autoDetectEncoding = "No supported: use " + Encoding.GetEncoding(resourceNamesEncodingAuto).EncodingName;
                    }
                }

                int    useEncoding = (filesEncoding != 0) ? filesEncoding : resourceNamesEncodingAuto;
                string fName       = new string(Encoding.GetEncoding(useEncoding).GetChars(resourceName));

                this.resources.Add(new KFN.ResourceFile(
                                       this.GetFileType(resourceType),
                                       fName,
                                       BitConverter.ToInt32(resourceEncryptedLenght, 0),
                                       BitConverter.ToInt32(resourceLenght, 0),
                                       BitConverter.ToInt32(resourceOffset, 0),
                                       (encrypted == 0) ? false : true,
                                       (fName == this.GetAudioSourceName()) ? true : false
                                       ));

                resourcesCount--;
            }
            this.endOfHeaderOffset = fs.Position;
        }
    }
示例#17
0
        /// <summary>Gets the character endcoding of a file</summary>
        /// <param name="File">The absolute path to a file</param>
        /// <returns>The character encoding, or unknown</returns>
        internal static Encoding GetEncodingFromFile(string File)
        {
            if (File == null || !System.IO.File.Exists(File))
            {
                return(Encoding.Unknown);
            }
            try
            {
                System.IO.FileInfo fInfo = new FileInfo(File);
                byte[]             Data  = System.IO.File.ReadAllBytes(File);
                if (Data.Length >= 3)
                {
                    if (Data[0] == 0xEF & Data[1] == 0xBB & Data[2] == 0xBF)
                    {
                        return(Encoding.Utf8);
                    }

                    if (Data[0] == 0x2b & Data[1] == 0x2f & Data[2] == 0x76)
                    {
                        return(Encoding.Utf7);
                    }
                }
                if (Data.Length >= 2)
                {
                    if (Data[0] == 0xFE & Data[1] == 0xFF)
                    {
                        return(Encoding.Utf16Be);
                    }

                    if (Data[0] == 0xFF & Data[1] == 0xFE)
                    {
                        return(Encoding.Utf16Le);
                    }
                }
                if (Data.Length >= 4)
                {
                    if (Data[0] == 0x00 & Data[1] == 0x00 & Data[2] == 0xFE & Data[3] == 0xFF)
                    {
                        return(Encoding.Utf32Be);
                    }

                    if (Data[0] == 0xFF & Data[1] == 0xFE & Data[2] == 0x00 & Data[3] == 0x00)
                    {
                        return(Encoding.Utf32Le);
                    }
                }

                UniversalDetector Det = new UniversalDetector(null);
                Det.HandleData(Data, 0, Data.Length);
                Det.DataEnd();
                switch (Det.GetDetectedCharset())
                {
                case "SHIFT_JIS":
                    return(Encoding.Shift_JIS);

                case "UTF-8":
                    return(Encoding.Utf8);

                case "UTF-7":
                    return(Encoding.Utf7);

                case "WINDOWS-1252":
                    return(Encoding.Windows1252);

                case "BIG5":
                    if (Path.GetFileName(File).ToLowerInvariant() == "stoklosy.b3d" && fInfo.Length == 18256)
                    {
                        //Polish Warsaw metro object file uses diacritics in filenames
                        return(Encoding.Windows1252);
                    }
                    return(Encoding.Big5);

                case "EUC-KR":
                    return(Encoding.EUC_KR);
                }
                Det.Reset();
                return(Encoding.Unknown);
            }
            catch
            {
                return(Encoding.Unknown);
            }
        }
        private async void button_Click(object sender, RoutedEventArgs e)
        {
            CharSetBox.Text  = "";
            PageBox.Text     = "";
            button.IsEnabled = false;
            try
            {
                HttpWebRequest  hwr = (HttpWebRequest)HttpWebRequest.Create(UrlBox.Text);
                HttpWebResponse res;
                try
                {
                    res = (HttpWebResponse)await hwr.GetResponseAsync();
                }
                catch
                {
                    CharSetBox.Text = "网页获取错误!";
                    return;
                }

                if (res.StatusCode == HttpStatusCode.OK)
                {
                    Stream       mystream = res.GetResponseStream();
                    MemoryStream msTemp   = new MemoryStream();
                    int          len      = 0;
                    byte[]       buff     = new byte[512];

                    while ((len = mystream.Read(buff, 0, 512)) > 0)
                    {
                        msTemp.Write(buff, 0, len);
                    }
                    res.Dispose();

                    if (msTemp.Length > 0)
                    {
                        msTemp.Seek(0, SeekOrigin.Begin);
                        byte[] PageBytes = new byte[msTemp.Length];
                        msTemp.Read(PageBytes, 0, PageBytes.Length);

                        msTemp.Seek(0, SeekOrigin.Begin);
                        int               DetLen     = 0;
                        byte[]            DetectBuff = new byte[4096];
                        UniversalDetector Det        = new UniversalDetector(null);
                        while ((DetLen = msTemp.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone())
                        {
                            Det.HandleData(DetectBuff, 0, DetectBuff.Length);
                        }
                        Det.DataEnd();
                        if (Det.GetDetectedCharset() != null)
                        {
                            CharSetBox.Text = "OK! CharSet=" + Det.GetDetectedCharset();
                            string page = System.Text.Encoding.GetEncoding(Det.GetDetectedCharset()).GetString(PageBytes);
                            if (page.Length > 2000)
                            {
                                page = page.Substring(0, 2000);
                            }
                            PageBox.Text = page;
                        }
                    }
                }
            }
            catch
            {
            }
            finally
            {
                button.IsEnabled = true;
            }
        }
示例#19
0
        /// <summary>Gets the character endcoding of a file</summary>
        /// <param name="File">The absolute path to a file</param>
        /// <returns>The character encoding, or unknown</returns>
        internal static Encoding GetEncodingFromFile(string File)
        {
            try
            {
                byte[] Data = System.IO.File.ReadAllBytes(File);
                if (Data.Length >= 3)
                {
                    if (Data[0] == 0xEF & Data[1] == 0xBB & Data[2] == 0xBF)
                    {
                        return(Encoding.Utf8);
                    }
                    if (Data[0] == 0x2b & Data[1] == 0x2f & Data[2] == 0x76)
                    {
                        return(Encoding.Utf7);
                    }
                }
                if (Data.Length >= 2)
                {
                    if (Data[0] == 0xFE & Data[1] == 0xFF)
                    {
                        return(Encoding.Utf16Be);
                    }
                    if (Data[0] == 0xFF & Data[1] == 0xFE)
                    {
                        return(Encoding.Utf16Le);
                    }
                }
                if (Data.Length >= 4)
                {
                    if (Data[0] == 0x00 & Data[1] == 0x00 & Data[2] == 0xFE & Data[3] == 0xFF)
                    {
                        return(Encoding.Utf32Be);
                    }
                    if (Data[0] == 0xFF & Data[1] == 0xFE & Data[2] == 0x00 & Data[3] == 0x00)
                    {
                        return(Encoding.Utf32Le);
                    }
                }

                UniversalDetector Det = new UniversalDetector(null);
                Det.HandleData(Data, 0, Data.Length);
                Det.DataEnd();
                switch (Det.GetDetectedCharset())
                {
                case "SHIFT_JIS":
                    return(Encoding.Shift_JIS);

                case "UTF-8":
                    return(Encoding.Utf8);

                case "UTF-7":
                    return(Encoding.Utf7);

                case "WINDOWS-1252":
                    return(Encoding.Windows1252);

                case "BIG5":
                    return(Encoding.Big5);
                }
                Det.Reset();
                return(Encoding.Unknown);
            }
            catch
            {
                return(Encoding.Unknown);
            }
        }
示例#20
0
        private void Check_Click(object sender, EventArgs e)
        {
            CharSetBox.Text = "";
            PageBox.Text    = "";

            HttpWebRequest  hwr = (HttpWebRequest)HttpWebRequest.Create(UrlBox.Text);
            HttpWebResponse res;

            try
            {
                res = (HttpWebResponse)hwr.GetResponse();
            }
            catch
            {
                CharSetBox.Text = "网页获取错误!";
                return;
            }

            if (res.StatusCode == HttpStatusCode.OK)
            {
                Stream       mystream = res.GetResponseStream();
                MemoryStream msTemp   = new MemoryStream();
                int          len      = 0;
                byte[]       buff     = new byte[512];
                StreamReader ReadPage = new StreamReader(mystream);


                while ((len = mystream.Read(buff, 0, 512)) > 0)
                {
                    msTemp.Write(buff, 0, len);
                }
                res.Close();

                if (msTemp.Length > 0)
                {
                    msTemp.Seek(0, SeekOrigin.Begin);
                    byte[] PageBytes = new byte[msTemp.Length];
                    msTemp.Read(PageBytes, 0, PageBytes.Length);

                    msTemp.Seek(0, SeekOrigin.Begin);
                    int    DetLen     = 0;
                    byte[] DetectBuff = new byte[4096];

                    //CharsetListener listener = new CharsetListener();

                    UniversalDetector Det = new UniversalDetector(null);
                    //while ((DetLen = msTemp.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone())
                    //{
                    //    Det.HandleData(DetectBuff, 0, DetectBuff.Length);
                    //}
                    Det.HandleData(PageBytes, 0, PageBytes.Length);
                    Det.DataEnd();
                    if (Det.GetDetectedCharset() != null)
                    {
                        CharSetBox.Text = "OK! CharSet=" + Det.GetDetectedCharset();
                        PageBox.Text    = System.Text.Encoding.GetEncoding(Det.GetDetectedCharset()).GetString(PageBytes);
                    }
                    Det.Reset();
                }
            }
        }
示例#21
0
        /// <summary>
        /// HttpWebRequest读取网页 字符集将自动匹配如果找不倒指定字符集,则使用utf-8
        /// </summary>
        /// <param name="url">url</param>
        /// <param name="parament">一个用于区分的参数 </param>
        private static string GetWeb(string url, string encoding)
        {
            string strHtmlContent = "";

            //字符集编码

            if (url.IndexOf("http") == -1)//如果米有HTTP
            {
                throw new Exception("请提供完整的HTTP地址");
            }
            System.Net.HttpWebRequest myrequest = (System.Net.HttpWebRequest)System.Net.WebRequest.Create(url);
            myrequest.Timeout = 600000;//超时时间 10 分钟
            //设置没有缓存
            myrequest.Headers.Set("Pragma", "no-cache");
            System.IO.Stream           mystream   = new System.IO.MemoryStream();
            System.Net.HttpWebResponse myresponse = (System.Net.HttpWebResponse)myrequest.GetResponse();
            mystream = myresponse.GetResponseStream();
            //用于读取数据的内存流
            System.IO.MemoryStream memoryStream = new System.IO.MemoryStream();

            #region 自动判断编码字符集

            //查看流长时是不是有效数据
            int    len  = 0;
            byte[] buff = new byte[512];
            while ((len = mystream.Read(buff, 0, buff.Length)) > 0)
            {
                memoryStream.Write(buff, 0, len);
            }

            if (memoryStream.Length > 0)
            {
                //设置流指向头
                memoryStream.Seek(0, System.IO.SeekOrigin.Begin);
                int DetLen = 0;
                //编码字符体的buffer 默认需要4KB的数据
                byte[] DetectBuff = new byte[4096];
                //开始取得编码
                UniversalDetector Det = new UniversalDetector(null);
                //从当前流中读取块并写入到buff中
                while ((DetLen = memoryStream.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone())
                {
                    Det.HandleData(DetectBuff, 0, DetectBuff.Length);
                }
                Det.DataEnd();
                //得到字符集合
                if (Det.GetDetectedCharset() != null)
                {
                    if (encoding == "")
                    {
                        //得到字符集
                        encoding = Det.GetDetectedCharset();
                    }
                }
                //设置流指向头
                memoryStream.Seek(0, System.IO.SeekOrigin.Begin);
            }

            #endregion
            System.Text.Encoding   myencoding     = System.Text.Encoding.GetEncoding(encoding);
            System.IO.StreamReader mystreamreader = new System.IO.StreamReader(memoryStream, myencoding);
            strHtmlContent = mystreamreader.ReadToEnd();
            mystream.Close();
            mystreamreader.Dispose();
            mystream.Close();
            mystream.Dispose();
            return(strHtmlContent);
        }
示例#22
0
        /// <summary>
        /// HttpWebRequest读取网页 字符集将自动匹配如果找不倒指定字符集,则使用utf-8
        /// </summary>
        /// <param name="url">url</param>
        /// <param name="parament">一个用于区分的参数 </param>
        private static string GetWeb(string url, string encoding)
        {
            string strHtmlContent = "";

            System.IO.Stream          mystream  = new System.IO.MemoryStream();
            System.Net.HttpWebRequest myrequest = null;
            try
            {
                myrequest = (System.Net.HttpWebRequest)System.Net.WebRequest.Create(url);

                //字符集编码

                if (url.IndexOf("http") == -1)//如果米有HTTP
                {
                    throw new Exception("请提供完整的HTTP地址");
                }

                myrequest.Timeout = 20 * 1000;//超时时间 20秒
                //设置没有缓存
                myrequest.Headers.Set("Pragma", "no-cache");
                System.Net.HttpWebResponse myresponse = null;
                if (myrequest.KeepAlive)
                {
                    try
                    {
                        myresponse = (System.Net.HttpWebResponse)myrequest.GetResponse();
                        mystream   = myresponse.GetResponseStream();
                    }
                    catch (Exception ex)
                    {
                        System.Diagnostics.Debug.WriteLine(DateTime.Now + "获取网页内容出错:url:" + url + "\r\n" + ex.Message + " " + (ex.StackTrace == null ? " " : " " + ex.StackTrace));

                        return(strHtmlContent);
                    }
                }
                //用于读取数据的内存流
                System.IO.MemoryStream memoryStream = new System.IO.MemoryStream();

                #region 自动判断编码字符集

                //查看流长时是不是有效数据
                int    len  = 0;
                byte[] buff = new byte[512];
                while ((len = mystream.Read(buff, 0, buff.Length)) > 0)
                {
                    memoryStream.Write(buff, 0, len);
                }

                if (memoryStream.Length > 0)
                {
                    //设置流指向头
                    memoryStream.Seek(0, System.IO.SeekOrigin.Begin);
                    int DetLen = 0;
                    //编码字符体的buffer 默认需要4KB的数据
                    byte[] DetectBuff = new byte[4096];
                    //开始取得编码
                    UniversalDetector Det = new UniversalDetector(null);
                    //从当前流中读取块并写入到buff中
                    while ((DetLen = memoryStream.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone())
                    {
                        Det.HandleData(DetectBuff, 0, DetectBuff.Length);
                    }
                    Det.DataEnd();
                    //得到字符集合
                    if (Det.GetDetectedCharset() != null)
                    {
                        if (encoding == "")
                        {
                            //得到字符集
                            encoding = Det.GetDetectedCharset();
                        }
                    }
                    //设置流指向头
                    memoryStream.Seek(0, System.IO.SeekOrigin.Begin);
                }

                #endregion
                System.Text.Encoding   myencoding     = System.Text.Encoding.GetEncoding(encoding);
                System.IO.StreamReader mystreamreader = new System.IO.StreamReader(memoryStream, myencoding);
                strHtmlContent = mystreamreader.ReadToEnd();
                mystreamreader.Dispose();
                if (myresponse != null)
                {
                    myresponse.Close();
                }
            }
            catch (Exception ex)
            {
                System.Diagnostics.Debug.WriteLine(DateTime.Now + "获取网页内容出错:url:" + url + "\r\n" + ex.Message + " " + (ex.StackTrace == null ? " " : " " + ex.StackTrace));
            }
            finally
            {
                mystream.Close();
                mystream.Dispose();
                // HttpWebRequest 不会自己销毁对象
                //销毁关闭连接
                if (myrequest != null)
                {
                    myrequest.Abort();
                }
            }
            return(strHtmlContent);
        }
示例#23
0
 public UniversalDetectorProxy()
 {
     this._UniversalDetector = new UniversalDetector (null);
 }
示例#24
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="url"></param>
        /// <param name="cookies"></param>
        /// <param name="refrere"></param>
        /// <param name="encoding">1gbk,2utf8,3auto</param>
        /// <param name="timeout"></param>
        /// <param name="isRedirect"></param>
        /// <returns></returns>
        public string httpGET(string url, ref CookieCollection cookies, string refrere, int encoding, int timeout, bool isRedirect)
        {
            url = getDealUrl(url);
            Stream          stream          = null;
            HttpWebResponse httpWebResponse = null;
            HttpWebRequest  httpWebRequest  = null;
            string          result;

            try {
                ServicePointManager.Expect100Continue      = false;
                ServicePointManager.DefaultConnectionLimit = 1000;
                ServicePointManager.ServerCertificateValidationCallback = new RemoteCertificateValidationCallback(CheckValidationResult);
                httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
                httpWebRequest.Headers.Clear();
                httpWebRequest.AutomaticDecompression = DecompressionMethods.GZip;
                httpWebRequest.CookieContainer        = xkCookies.CookieContainer(cookies, url);
                httpWebRequest.KeepAlive         = true;
                httpWebRequest.ProtocolVersion   = HttpVersion.Version10;
                httpWebRequest.Method            = "GET";
                httpWebRequest.Referer           = refrere;
                httpWebRequest.Timeout           = timeout * 1000;
                httpWebRequest.AllowAutoRedirect = false;
                httpWebRequest.Accept            = "image/jpeg, application/x-ms-application, image/gif, application/xaml+xml, image/pjpeg, application/x-ms-xbap, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";
                httpWebRequest.Headers.Add("Accept-Language", "zh-cn");
                httpWebRequest.UserAgent = useragent;
                string text = httpWebRequest.Headers.ToString();
                httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
                stream          = httpWebResponse.GetResponseStream();
                xkCookies.UpCookie(ref cookies, url, httpWebResponse.Headers["Set-Cookie"], httpWebResponse.Cookies);
                string tmp_result = "";
                if (httpWebResponse.ContentEncoding.ToLower().Contains("gzip"))
                {
                    stream = new GZipStream(stream, CompressionMode.Decompress);
                }
                else
                {
                    if (httpWebResponse.ContentEncoding.ToLower().Contains("deflate"))
                    {
                        stream = new DeflateStream(stream, CompressionMode.Decompress);
                    }
                }

                Stream       mystream = httpWebResponse.GetResponseStream();
                MemoryStream msTemp   = new MemoryStream();
                int          len      = 0;
                byte[]       buff     = new byte[512];

                while ((len = mystream.Read(buff, 0, 512)) > 0)
                {
                    msTemp.Write(buff, 0, len);
                }
                httpWebResponse.Close();

                if (msTemp.Length > 0)
                {
                    msTemp.Seek(0, SeekOrigin.Begin);
                    byte[] PageBytes = new byte[msTemp.Length];
                    msTemp.Read(PageBytes, 0, PageBytes.Length);

                    msTemp.Seek(0, SeekOrigin.Begin);
                    int               DetLen     = 0;
                    byte[]            DetectBuff = new byte[4096];
                    UniversalDetector Det        = new UniversalDetector(null);
                    while ((DetLen = msTemp.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone())
                    {
                        Det.HandleData(DetectBuff, 0, DetectBuff.Length);
                    }
                    Det.DataEnd();
                    if (Det.GetDetectedCharset() != null)
                    {
                        tmp_result = System.Text.Encoding.GetEncoding(Det.GetDetectedCharset()).GetString(PageBytes);
                    }
                    else
                    {
                        tmp_result = System.Text.Encoding.GetEncoding("GBK").GetString(PageBytes);
                    }
                }

                tmp_result = string.Concat(new object[]
                {
                    tmp_result,
                    "\r\n\r\n=================================================\r\n\r\n本次请求:",
                    url,
                    " 响应结果:",
                    httpWebResponse.StatusCode,
                    "\r\n\r\nCookie数量",
                    httpWebRequest.CookieContainer.Count,
                    "\r\n",
                    httpWebRequest.CookieContainer.GetCookieHeader(new Uri(url)),
                    "\r\nrequest:\r\n",
                    text,
                    "\r\nresponse:\r\n",
                    httpWebResponse.Headers.ToString(),
                    "\r\n\r\n=================================================\r\n\r\n"
                });
                if (isRedirect)
                {
                    if (httpWebResponse.Headers["Location"] != null && httpWebResponse.Headers["Location"].Length > 2)
                    {
                        string url_redirect = "";
                        if (httpWebResponse.Headers["Location"].ToLower().Contains("http://"))
                        {
                            url_redirect = httpWebResponse.Headers["Location"];
                        }
                        else
                        {
                            url_redirect = geturl(httpWebResponse.Headers["Location"], url);
                        }
                        tmp_result = httpGET(url_redirect, ref cookies, url, 3, 10, isRedirect) + tmp_result;
                    }
                    else
                    {
                        if (httpWebResponse.Headers["Refresh"] != null && httpWebResponse.Headers["Refresh"].Length > 2)
                        {
                            string text3 = httpWebResponse.Headers["Refresh"].ToLower().Replace("url=", "`").Split('`')[1];
                            if (!text3.Contains("http://"))
                            {
                                text3 = geturl(text3, url);
                            }
                            tmp_result = httpGET(text3, ref cookies, url, 3, 10, isRedirect) + tmp_result;
                        }
                    }
                    if (tmp_result.Contains("Refresh"))
                    {
                        Winista.Text.HtmlParser.Util.NodeList htmlNodes = new Parser(new Lexer(tmp_result)).Parse(new TagNameFilter("meta"));
                        if (htmlNodes.Count > 1)
                        {
                            for (int i = 0; i < htmlNodes.Count; i++)
                            {
                                MetaTag option = (MetaTag)htmlNodes.ElementAt(i);
                                if (option.GetAttribute("http-equiv") == "Refresh")
                                {
                                    string content = option.GetAttribute("content");
                                    string text3   = content.ToLower().Replace("url=", "`").Split('`')[1];

                                    if (!text3.Contains("http://"))
                                    {
                                        text3 = geturl(text3, url);
                                    }
                                    tmp_result = httpGET(text3, ref cookies, url, 3, 10, isRedirect) + tmp_result;
                                }
                            }
                        }
                    }
                }
                httpWebResponse.Close();
                httpWebRequest.Abort();
                result = tmp_result;

                if (!url.Contains(":8888") && !url.Contains("renzhe") && !url.Contains("zq535228") && !url.Contains("whoissoft") && !url.Contains("chinaz"))
                {
                    EchoHelper.Echo(string.Format("成功获取:{0}的HTML内容。", url), null, EchoHelper.EchoType.普通信息);
                }
            } catch (Exception ex) {
                result = ex.Message;
            } finally {
                if (stream != null)
                {
                    stream.Close();
                }
                if (httpWebResponse != null)
                {
                    httpWebResponse.Close();
                }
                if (httpWebRequest != null)
                {
                    httpWebRequest.Abort();
                }
            }
            return(result);
        }