예제 #1
0
        private void button2_Click(object sender, EventArgs e)
        {
            EnumGloabParas.EnumEncodeType enocodeType = GetEndcode();
            switch (enocodeType)
            {
            case EnumGloabParas.EnumEncodeType.AUTO:
            case EnumGloabParas.EnumEncodeType.UTF8:
                this.textBox2.Text = System.Web.HttpUtility.UrlDecode(this.textBox1.Text, Encoding.UTF8);
                break;

            case EnumGloabParas.EnumEncodeType.BIG5:
                this.textBox2.Text = System.Web.HttpUtility.UrlDecode(this.textBox1.Text, Encoding.GetEncoding("big5"));
                break;

            case EnumGloabParas.EnumEncodeType.GB2312:
                this.textBox2.Text = System.Web.HttpUtility.UrlDecode(this.textBox1.Text, Encoding.GetEncoding("gb2312"));
                break;

            case EnumGloabParas.EnumEncodeType.GBK:
                this.textBox2.Text = System.Web.HttpUtility.UrlDecode(this.textBox1.Text, Encoding.GetEncoding("gbk"));
                break;
            }
        }
예제 #2
0
 public DataTable GetDataTable(DataTable dataTable, List <TaskColumnItem> columnItemList, cGatherUrlItem item, string webCookie, EnumGloabParas.EnumEncodeType webEncode, bool isAjax, ref string errMsg)
 {
     try
     {
         string html = this.GetHtml(item.Url, webCookie, webEncode, item.StartPos, item.EndPos, isAjax);
         if (string.IsNullOrEmpty(html))
         {
             errMsg = "网页获取为空,有可能是采集范围设置错误或网络错误";
             return(null);
         }
         dataTable = this.GetDataTable(dataTable, columnItemList, html, ref errMsg);
     }
     catch (Exception ex)
     {
         errMsg = ex.Message;
     }
     return(dataTable);
 }
예제 #3
0
        public string GetHtml(string url, string cookie, EnumGloabParas.EnumEncodeType webCode, string startPos, string endPos, bool IsAjax)
        {
            Encoding        wCode;
            string          PostPara;
            CookieContainer CookieCon = new CookieContainer();

            if (Regex.IsMatch(url, @"<POST>.*</POST>", RegexOptions.IgnoreCase))
            {
                wReq = (HttpWebRequest)WebRequest.Create(@url.Substring(0, url.IndexOf("<POST>")));
            }
            else
            {
                Uri uri = new Uri(url);
                wReq = (HttpWebRequest)WebRequest.Create(uri);
            }
            Match  a    = Regex.Match(url, @"(http://).[^/]*[?=/]", RegexOptions.IgnoreCase);
            string url1 = a.Groups[0].Value.ToString();

            wReq.Referer = url1;
            if (cookie != "")
            {
                CookieCollection cl = GetWebCookies(cookie);
                CookieCon.Add(new Uri(url), cl);
                wReq.CookieContainer = CookieCon;
            }
            if (Regex.IsMatch(url, @"(?<=<POST>)[\S\s]*(?=</POST>)", RegexOptions.IgnoreCase))
            {
                Match s = Regex.Match(url, @"(?<=<POST>).*(?=</POST>)", RegexOptions.IgnoreCase);
                PostPara = s.Groups[0].Value.ToString();
                byte[] pPara = Encoding.ASCII.GetBytes(PostPara);
                wReq.ContentType   = "application/x-www-form-urlencoded";
                wReq.ContentLength = pPara.Length;
                wReq.Method        = "POST";
                System.IO.Stream reqStream = wReq.GetRequestStream();
                reqStream.Write(pPara, 0, pPara.Length);
                reqStream.Close();
            }
            else
            {
                wReq.Method      = "GET";
                wReq.ContentType = "text/html";
            }
            this.Load();
            HttpWebResponse wResp = GetResponse(wReq, 0, null);

            System.IO.Stream respStream = wResp.GetResponseStream();
            string           strWebData = "";

            switch (webCode)
            {
            case EnumGloabParas.EnumEncodeType.AUTO:
                try
                {
                    wCode = Encoding.Default;
                    string cType        = wResp.ContentType.ToLower();
                    Match  charSetMatch = Regex.Match(cType, "(?<=charset=)([^<]*)*", RegexOptions.IgnoreCase | RegexOptions.Multiline);
                    string webCharSet   = charSetMatch.ToString();
                    wCode = System.Text.Encoding.GetEncoding(webCharSet);
                }
                catch
                {
                    wCode = Encoding.Default;
                }
                break;

            case EnumGloabParas.EnumEncodeType.GB2312:
                wCode = Encoding.GetEncoding("gb2312");
                break;

            case EnumGloabParas.EnumEncodeType.UTF8:
                wCode = Encoding.UTF8;
                break;

            case EnumGloabParas.EnumEncodeType.GBK:
                wCode = Encoding.GetEncoding("GBK");
                break;

            default:
                wCode = Encoding.UTF8;
                break;
            }
            if (wResp.ContentEncoding == "gzip")
            {
                GZipStream             myGZip = new GZipStream(respStream, CompressionMode.Decompress);
                System.IO.StreamReader reader;
                reader     = new System.IO.StreamReader(myGZip, wCode);
                strWebData = reader.ReadToEnd();
                reader.Close();
                reader.Dispose();
            }
            else
            {
                System.IO.StreamReader reader;
                reader     = new System.IO.StreamReader(respStream, wCode);
                strWebData = reader.ReadToEnd();
                reader.Close();
                reader.Dispose();
            }
            if (!string.IsNullOrEmpty(startPos) && !string.IsNullOrEmpty(endPos))
            {
                string Splitstr = "(" + startPos + ").*?(" + endPos + ")";
                Match  aa       = Regex.Match(strWebData, Splitstr, RegexOptions.IgnoreCase | RegexOptions.Multiline);
                if (aa.Success)
                {
                    strWebData = aa.Groups[0].ToString();
                }
            }
            if (IsAjax == true)
            {
                strWebData = System.Web.HttpUtility.UrlDecode(strWebData, Encoding.UTF8);
            }
            wResp.Close();
            wReq.Abort();
            return(strWebData);
        }
예제 #4
0
 private EnumGloabParas.EnumEncodeType GetEndcode()
 {
     EnumGloabParas.EnumEncodeType enocodeType = (EnumGloabParas.EnumEncodeType)Enum.Parse(typeof(EnumGloabParas.EnumEncodeType), comboBoxExt1.SelectedValue.ToString());
     return(enocodeType);
 }