/// <summary> /// 采集网页 /// </summary> /// <param name="url"></param> /// <param name="pageEncode"></param> /// <returns></returns> public static string getPageContent(string url, string pageEncode) { var http = new HttpHelper4(); var httpItem = new HttpItem() { URL = url, Method = "GET" }; if (pageEncode != "自动编码") { httpItem.Encoding = Encoding.GetEncoding(pageEncode); } var httpResult = http.GetHtml(httpItem); return(httpResult.Html == "本次请求并未返回任何数据" ? string.Empty : httpResult.Html); }
private void btnCreate_Click(object sender, EventArgs e) { List.Clear(); //this.dataGridView_List.Rows.Clear(); //1.根据关键字获取网站 //2.随机取出列表 //3. string keyword = this.txtContentKeyWord.Text; string keywordEncode = HttpUtility.UrlEncode(keyword, Encoding.GetEncoding("utf-8")); var i = 1; //http://wenda.so.com/search/?ie=utf-8&q=android%20%E5%A4%9A%E7%BA%BF%E7%A8%8B&src=360chrome_search //1.获取网站 var http = new HttpHelper4(); var httpResult = http.GetHtml(new HttpItem() { URL = "http://wenda.so.com/search/?ie=utf-8&q=" + keywordEncode + "&src=360chrome_search&pn=0" }); var httpHtml = httpResult.Html; //2.解析标签 var doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(httpHtml); var node = doc.DocumentNode; var css = node.CssSelect(".item"); //3.添加队列 foreach (var c in css) { if (c != null) { var url = c.CssSelect(".qa-i-hd a").ToArray()[0].Attributes["href"].Value; var title = c.CssSelect(".qa-i-hd a").ToArray()[0].InnerText; var summary = c.CssSelect(".qa-i-bd").ToArray()[0].InnerText; //var time = c.CssSelect(""); List.Add(new ContentHelper() { Url = "http://wenda.so.com" + url, Title = title, Summary = summary }); } } //4.重新载入 this.Bind_DataList(); }
private void btnCreate_Click(object sender, EventArgs e) { List.Clear(); string keyword = this.txtContentKeyWord.Text; string keywordEncode = HttpUtility.UrlEncode(keyword, Encoding.GetEncoding("utf-8")); var i = 1; var http = new HttpHelper4(); var httpResult = http.GetHtml(new HttpItem() { URL = "http://wenda.so.com/search/?ie=utf-8&q=" + keywordEncode + "&src=360chrome_search&pn=0" }); var httpHtml = httpResult.Html; var doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(httpHtml); var node = doc.DocumentNode; var css = node.CssSelect(".item"); foreach (var c in css) { if (c != null) { var url = c.CssSelect(".qa-i-hd a").ToArray()[0].Attributes["href"].Value; var title = c.CssSelect(".qa-i-hd a").ToArray()[0].InnerText; var summary = c.CssSelect(".qa-i-bd").ToArray()[0].InnerText; List.Add(new ContentHelper() { Url = "http://wenda.so.com" + url, Title = title, Summary = summary }); } } this.Bind_DataList(); }
/// <summary> /// 获取网页内容 /// </summary> /// <param name="url"></param> public void GetHtml(string url, string cookie) { if (!this.WebBrowser) { var http = new HttpHelper4(); var httpItem = new HttpItem(); httpItem.URL = url; if (!string.IsNullOrEmpty(cookie)) { httpItem.Cookie = cookie; } var httpResult = http.GetHtml(httpItem); var html = httpResult.Html; if (OutHtmlHandler != null) { OutHtmlHandler(html); } } else { webBrowser.ObjectForScripting = false; webBrowser.ScriptErrorsSuppressed = true; if (!string.IsNullOrEmpty(cookie)) { webBrowser.Document.Cookie = cookie; } webBrowser.DocumentCompleted += (object sender, WebBrowserDocumentCompletedEventArgs e) => { var html = webBrowser.Document.Body.Parent.OuterHtml; if (OutHtmlHandler != null) { OutHtmlHandler(html); } }; webBrowser.Navigate(new Uri(url)); } }
/// <summary> /// 拖动按入数据 /// </summary> private void frmDeskTop_DragDrop(object sender, DragEventArgs e) { if (e.Data.GetDataPresent(DataFormats.Html) || e.Data.GetDataPresent(DataFormats.Text)) { object Item; MemoryStream vMemoryStream; string webTitle = "新标题"; Item = e.Data.GetData(DataFormats.Html, true);//转换为指定格式 bool htmlflag = false; if (Item == null) { Item = e.Data.GetData(DataFormats.Text); vMemoryStream = new MemoryStream(Encoding.UTF8.GetBytes(Item.ToString())); } else { //自动获取标题 htmlflag = true; vMemoryStream = e.Data.GetData("Html Format") as MemoryStream; } vMemoryStream.Position = 0; byte[] vBytes = new byte[vMemoryStream.Length]; vMemoryStream.Read(vBytes, 0, (int)vMemoryStream.Length); string s1 = Encoding.UTF8.GetString(vBytes); string webContent = string.Empty; if (htmlflag) { Regex regcontent = new Regex(@"<!--StartFragment-->([\s\S]*?)<!--EndFragment-->", RegexOptions.IgnoreCase); webContent = regcontent.Match(s1).ToString(); webContent = StringHelper.Instance.Replace(webContent, "<!--StartFragment-->", ""); webContent = StringHelper.Instance.Replace(webContent, "<!--EndFragment-->", ""); try { webTitle = Regex.Match(s1, "<title>.+?</title>", RegexOptions.IgnoreCase | RegexOptions.Multiline).ToString(); webTitle = StringHelper.Instance.Replace(webTitle, "<title>", ""); webTitle = StringHelper.Instance.Replace(webTitle, "</title>", ""); if (string.IsNullOrEmpty(webTitle)) { //自动获取标题 Regex regUrl = new Regex(@"SourceURL:([\s\S]*?)\r\n", RegexOptions.IgnoreCase); string webUrl = regUrl.Match(s1).ToString(); webUrl = StringHelper.Instance.Replace(webUrl, "SourceURL:", ""); webUrl = StringHelper.Instance.Replace(webUrl, "\r\n", ""); var http = new HttpHelper4(); var httpResult = http.GetHtml(new HttpItem() { URL = webUrl }); var html = httpResult.Html; var doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); var docNode = doc.DocumentNode; webTitle = docNode.CssSelect("title").ToArray()[0].InnerText; //string c = webContent; //c = HtmlHelper.ParseTags(c); //c.Replace("\r\n", ""); //c = c.Substring(0, 200); //webTitle = c; } } catch { webTitle = "新标题"; } } else { webContent = s1; } //frmHandInsert myWebInsert = new frmHandInsert(); //myWebInsert.Title = webTitle; //myWebInsert.Content = content; ////myWebInsert.TopMost = true; //myWebInsert.Show(); frmHandWebInsert ff = new frmHandWebInsert(); ff.Title = webTitle; ff.Content = webContent; ff.Show(); } }
private void StartDiyWeb() { string LocalSQLiteName = "Data\\Collection\\" + Model.TaskName + "\\SpiderResult.db"; DataTable dtData = DbHelper.Query(LocalSQLiteName, "Select * From Content").Tables[0]; var listDiyUrl = DALDiyWebUrlHelper.GetList(" And SelfId=" + Model.ID, "", 0); HttpHelper4 http = new HttpHelper4(); int taskId = Model.ID; foreach (DataRow dr in dtData.Rows) { int resultId = int.Parse(dr["Id"].ToString()); foreach (var m in listDiyUrl) { try { string getUrl = m.Url; string postParams = m.UrlParams; StringBuilder sbContent = new StringBuilder(); foreach (ModelTaskLabel mTaskLabel in Model.ListTaskLabel) { string pageEncodeContent = dr[mTaskLabel.LabelName].ToString().Replace("'", "''"); //可能需要编码实际测试才知道 getUrl = getUrl.Replace("[" + mTaskLabel.LabelName + "]", pageEncodeContent); postParams = postParams.Replace("[" + mTaskLabel.LabelName + "]", pageEncodeContent); sbContent.Append(pageEncodeContent); } string md5key = StringHelper.Instance.MD5(taskId.ToString() + resultId.ToString() + sbContent.ToString(), 32).ToLower(); //判断该条记录这个weburl是否发过 if (!DALDataPublishLogHelper.ChkRecord( Model.ID, resultId, md5key)) { //记录日志 DALDataPublishLogHelper.Insert(new ModelDataPublishLog() { TaskId = taskId, ResultId = resultId, DesKey = md5key, CreateTime = DateTime.Now.ToString() }); } else { continue; } //开始发布网站 var result = http.GetHtml(new HttpItem() { URL = getUrl, Postdata = postParams, ContentType = "application/x-www-form-urlencoded" }); var html = result.Html; } catch (Exception ex) { continue; } } } if (PublishCompalteDelegate != null) { gatherEv.Message = "发布到自定义Web网站完成!"; PublishCompalteDelegate(this, gatherEv); } }
/// <summary> /// 获取站点信息 /// </summary> private void cmbWebSite_SelectedIndexChanged(object sender, EventArgs e) { var model = (ModelSiteInfo)((ListItem2)this.cmbWebSite.SelectedItem).Value; if (model == null) { return; } this.btnSubmit.Enabled = false; this.cmbClassList.Text = string.Empty; this.cmbClassList.Items.Clear(); var th = new ThreadMultiHelper(1); th.WorkMethod += new ThreadMultiHelper.DelegateWork(delegate(int taskindex, int threadindex) { string nowTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"); string baseData = "cmd=getclasslist" + "&username="******"&userpwd=" + model.UserPwd + "&time=" + nowTime; string md5key = "&md5key=" + StringHelper.Instance.MD5(baseData, 32).ToLower(); string senUrl = model.Url; string sendData = baseData + md5key; HttpHelper4 http = new HttpHelper4(); var result = http.GetHtml(new HttpItem() { URL = senUrl + "?" + sendData }); var html = result.Html; try { var json = JObject.Parse(html); var code = json["code"].Value <string>(); var list = json["list"].ToArray(); this.lblResult.Text = "操作结果成功!结果为:" + code; List <ModelClassList> listClassList = new List <ModelClassList>(); this.cmbClassList.Items.Clear(); foreach (var l in list) { listClassList.Add( new ModelClassList() { ClassId = l["classid"].Value <string>(), ClassName = l["classname"].Value <string>(), CreateTime = "" } ); } OutMsg = new OutCmbMsg(OutClassListMsg); OutMsg(listClassList); } catch (Exception ex) { } }); th.CompleteEvent += new ThreadMultiHelper.DelegateComplete(delegate() { this.btnSubmit.Enabled = true; }); th.Start(); }
/// <summary> /// 确定发布数据 /// </summary> private void btnSubmit_Click(object sender, EventArgs e) { var model = (ModelSiteInfo)((ListItem2)this.cmbWebSite.SelectedItem).Value; if (model == null) { MessageBox.Show("请选择一个网站!"); this.cmbWebSite.Focus(); return; } if (this.cmbClassList.Text == string.Empty) { MessageBox.Show("请选择一个分类!"); this.cmbClassList.Focus(); return; } var title = this.txtTitle.Text; var content = this.fckHtmlEditorControl1.InnerHtml; if (string.IsNullOrEmpty(title) || string.IsNullOrEmpty(content)) { MessageBox.Show("标题或者内容为空!"); this.txtTitle.Focus(); return; } var selectedItem = (ListItem)this.cmbClassList.SelectedItem; var classid = selectedItem.Value; var classname = selectedItem.Text; if (classid == string.Empty || classid == "0") { MessageBox.Show("请选择一个分类!"); this.cmbClassList.Focus(); return; } var th = new ThreadMultiHelper(1); th.WorkMethod += new ThreadMultiHelper.DelegateWork(delegate(int taskindex, int threadindex) { string nowTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"); string baseData = "cmd=sendcontent" + "&username="******"&userpwd=" + model.UserPwd + "&title=" + StringHelper.Instance.UrlEncode(title, model.Encode) + "&content=" + StringHelper.Instance.UrlEncode(content, model.Encode) + "&classid=" + classid + "&classtitle=" + StringHelper.Instance.UrlEncode(classname, model.Encode) + "&author=" + string.Empty + "&time=" + nowTime; string md5key = "&md5key=" + StringHelper.Instance.MD5(baseData, 32).ToLower(); string sendUrl = model.Url; string sendData = baseData + md5key; HttpHelper4 http = new HttpHelper4(); var result = http.GetHtml(new HttpItem() { URL = sendUrl, Method = "post", Postdata = sendData, ContentType = "application/x-www-form-urlencoded", Encoding = Encoding.GetEncoding(model.Encode) }); var html = result.Html; this.lblResult.Text = "操作结果成功!结果为:" + html; try { var json = JObject.Parse(html); var code = json["code"].Value <string>(); this.lblResult.Text = "操作结果成功!结果为:" + code; } catch (Exception ex) { } }); th.CompleteEvent += new ThreadMultiHelper.DelegateComplete(delegate() { var r = MessageBox.Show(this, "发布内容成功!", "提示信息!", MessageBoxButtons.OKCancel); if (r == System.Windows.Forms.DialogResult.OK) { this.Close(); this.Dispose(); } }); th.Start(); }