public static string getHtml(string myUrl) { HttpHelper http = new HttpHelper(); HttpItem item = new HttpItem() { URL = myUrl, //URL 必需项 Method = "get", //URL 可选项 默认为Get Expect100Continue = false, Allowautoredirect = false, KeepAlive = true, UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36", ContentType = "application/x-www-form-urlencoded",//返回类型 可选项有默认值 Timeout = HTML_TIMEOUT, //Cookie = cookieString, }; item.Header.Add("AcceptEncoding", "gzip,deflate"); HttpResult result = http.GetHtml(item); NetworkSpeed.increment(Encoding.Default.GetBytes(result.Html).Length); string html = result.Html; html = html.Replace(@"\s", string.Empty).Replace(@"\r", string.Empty).Replace(@"\n", string.Empty).Replace(@"\f", string.Empty); return(html); }
public static async Task <string> getImg(string imgUrl, string path, string fileName, httpParameter MyHttpParameter) { return(await Task.Run(() => { try { var watch = new Stopwatch(); watch.Start(); if (path.Equals("")) { throw new Exception("未指定保存文件的路径"); } string imgName = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf("/") + 1); HttpHelper http = new HttpHelper(); HttpItem item = new HttpItem() { URL = imgUrl, //URL 必需项 Method = "get", //URL 可选项 默认为Get Timeout = IMG_TIMEOUT, //连接超时时间 可选项默认为100000 ReadWriteTimeout = IMG_TIMEOUT, UserAgent = "Mozilla / 5.0(Windows NT 10.0; Win64; x64; rv: 55.0) Gecko / 20100101 Firefox / 55.0", //用户的浏览器类型,版本,操作系统 可选项有默认值 ContentType = "text/html", //返回类型 可选项有默认值 //Cookie = MyHttpParameter.cookie, //Host = MyHttpParameter.host, //Referer = MyHttpParameter.referer, ResultType = ResultType.Byte }; HttpResult result = http.GetHtml(item); watch.Stop(); if (result.ResultByte == null) { throw new Exception(result.Html); } if (result.ResultByte.Length < MIN_IMG_SIZE) { throw new Exception("Invalid image size:" + result.ResultByte.Length); } NetworkSpeed.increment(result.ResultByte.Length); NetworkSpeed.addTotalSize(result.ResultByte.Length); var milliseconds = watch.ElapsedMilliseconds;//获取请求执行时间 string saveResult = saveImg(result.ResultByte, path, fileName, imgName); if (!saveResult.StartsWith("Error:")) { return "Name:" + saveResult + " Time:" + milliseconds + "ms"; } else { throw new Exception(saveResult); } } catch (Exception ex) { return (ex.Message.StartsWith("Error:") ? "" : "Error:") + ex.Message + " ImgUrl:" + imgUrl; } })); }
private void refreshUI() { while (true) { toolStripStatusLabel1.Text = DateTime.Now.ToString(); if (MySpider != null) { toolStripStatusLabel2.Text = "Downloads:" + MySpider.finishImgCount.ToString(); toolStripStatusLabel3.Text = MySpider.finishPageCount + "/" + MySpider.fetchPageCount; toolStripStatusLabel4.Text = NetworkSpeed.getSpeed(); } Thread.Sleep(500); } }
private void end() { if (hasEnded) { return; } hasEnded = true; workFinishFlag = true; watch.Stop(); myWriteLine("All Tasks Finished!", ConsoleColor.Yellow); TimeSpan ts = watch.Elapsed; string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds / 10); myWriteLine("Total Time:" + elapsedTime, ConsoleColor.Yellow); myWriteLine("Total Images Count:" + finishImgCount + " Total Pages Count:" + finishPageCount, ConsoleColor.Yellow); myWriteLine("Total Download Size:" + NetworkSpeed.getTotalSizeText(), ConsoleColor.Yellow); }
private void HSButton_Click(object sender, EventArgs e) { if (HSButton.Text == "Start Download") { Task clickTask = new Task(() => { HSButton.Text = "Finish Download"; settingGroupBox.Enabled = false; WebsiteInfo.websiteList[WebsiteInfo.websiteList.FindIndex(s => s.siteName == siteComboBox.Text)] = new WebSiteSetting() { siteName = siteComboBox.Text, domain = urlTextBox.Text, imgType = imgTypeTextBox.Text, imgKeys = imgKeysTextBox.Text.Split('|'), pageRegex = pageRegTextBox.Text, urlPattern = urlPatTextBox.Text, firstPageUrlPattern = _1stUrlTextBox.Text, }; saveCurrentSettings(); NetworkSpeed.init(); MySettings = new SpiderSettings(); MySettings.TextKeywords.Add("P"); MySettings.imgType = imgTypeTextBox.Text; MySettings.domain = urlTextBox.Text; MySettings.siteName = siteComboBox.Text; MySettings.savePath = pathTextBox.Text; MySettings.startPage = (long)startPageNUD.Value; MySettings.endPage = (long)endPageNUD.Value; MySpider = new GetSpider(MySettings); MySpider.OnPageFetched += (s, ex) => { mylistBox.Items.Insert(0, ex.imgInfoResult.picIndex + " " + ex.imgInfoResult.title); progressBar.Maximum = (int)MySpider.fetchPageCount; }; MySpider.OnPageFinished += (s, ex) => { if (ex != null && MySpider != null) { //if (mylistBox.Items.Contains(ex.imgInfoResult.picIndex + " " + ex.imgInfoResult.title)) { mylistBox.Items.Remove(ex.imgInfoResult.picIndex + " " + ex.imgInfoResult.title); //} progressBar.Value = (int)MySpider.finishPageCount; } if (MySpider != null && MySpider.workFinishFlag) { progressBar.Value = 0; mylistBox.Items.Clear(); settingGroupBox.Enabled = true; HSButton.Enabled = true; HSButton.Text = "Start Download"; } }; MySpider.run(); //MySpider.test(); } ); clickTask.Start(); } else if (HSButton.Text == "Finish Download") { HSButton.Enabled = false; MySpider.stopWorkFlag = true; } }