private void cboRules_SelectedIndexChanged(object sender, EventArgs e) { btnRequest.Enabled = true; lblMsg.Text = ""; txtRepContent.Text = ""; if (string.IsNullOrEmpty(this.cboRules.SelectedValue.ToString())) { txtTitleRule.Text = ""; txtContentRule.Text = ""; txtListRule.Text = ""; txtRequestUrl.Text = ""; isOk = false; return; } isOk = true; txtRequestUrl.Text = this.cboRules.SelectedValue.ToString(); var rule = Spider.GetRuleByName(this.cboRules.Text); txtTitleRule.Text = rule.TitleRule; txtContentRule.Text = rule.ContentRule; txtListRule.Text = rule.ListRule; encoding = rule.Encoding; }
private void CatchJokesAction(Action <string> action) { int endPage = 1; int startPage = 1; if (cbIsRepeat.Checked) { startPage = txtStart.Text.ToInt32(); endPage = txtEndPage.Text.ToInt32(); for (int i = startPage; i <= endPage; i++) { action(string.Format("正在抓取:{0}", string.Format(txtRequestUrl.Text, i))); List <JokeInfo> jokes = new List <JokeInfo>(); var content = Spider.GetHtmlContent(string.Format(txtRequestUrl.Text, i), encoding); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(content); string jokelistXPath = txtListRule.Text.Trim(); var nodes = doc.DocumentNode.SelectNodes(jokelistXPath); if (nodes == null || nodes.Count == 0) { lblMsg.Text = "没有抓取到集合数据,请重新定义规则"; continue; } else { lblMsg.Text = nodes.Count.ToString(); txtRepContent.Text = nodes[0].InnerHtml; //return; } JokeInfo jokeinfo; HtmlNode temp; foreach (var node in nodes) { temp = HtmlNode.CreateNode(node.OuterHtml); jokeinfo = new JokeInfo(); jokeinfo.Title = temp.SelectSingleNode(txtTitleRule.Text.Trim()).InnerText.Trim(); jokeinfo.Content = temp.SelectSingleNode(txtContentRule.Text.Trim()).InnerText.Trim(); if (string.IsNullOrEmpty(jokeinfo.Content) || string.IsNullOrEmpty(jokeinfo.Title)) { continue; } jokes.Add(jokeinfo); } if (jokes.Count == 0) { lblMsg.Text = "没有抓取到数据,请重新定义规则!"; btnRequest.Enabled = true; return; } jokeLogic.AddJokes(ToJokes(jokes)); lblMsg.Text = "抓取数据成功"; } } else { action(string.Format("正在抓取:{0}", txtRequestUrl.Text)); List <JokeInfo> jokes = new List <JokeInfo>(); var content = Spider.GetHtmlContent(txtRequestUrl.Text, encoding); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(content); string jokelistXPath = txtListRule.Text.Trim(); var nodes = doc.DocumentNode.SelectNodes(jokelistXPath); if (nodes == null || nodes.Count == 0) { lblMsg.Text = "没有抓取到集合数据,请重新定义规则"; btnRequest.Enabled = true; return; } else { lblMsg.Text = nodes.Count.ToString(); txtRepContent.Text = nodes[0].InnerHtml; //return; } JokeInfo jokeinfo; HtmlNode temp; foreach (var node in nodes) { temp = HtmlNode.CreateNode(node.OuterHtml); jokeinfo = new JokeInfo(); jokeinfo.Title = temp.SelectSingleNode(txtTitleRule.Text.Trim()).InnerText.Trim(); jokeinfo.Content = temp.SelectSingleNode(txtContentRule.Text.Trim()).InnerText.Trim(); if (string.IsNullOrEmpty(jokeinfo.Content) || string.IsNullOrEmpty(jokeinfo.Title)) { continue; } jokes.Add(jokeinfo); } if (jokes.Count == 0) { lblMsg.Text = "没有抓取到数据,请重新定义规则!"; btnRequest.Enabled = true; return; } jokeLogic.AddJokes(ToJokes(jokes)); lblMsg.Text = "抓取数据成功"; } btnRequest.Enabled = true; lblCatchStatus.Text = ""; }