Beispiel #1
0
        private void cboRules_SelectedIndexChanged(object sender, EventArgs e)
        {
            btnRequest.Enabled = true;
            lblMsg.Text        = "";
            txtRepContent.Text = "";
            if (string.IsNullOrEmpty(this.cboRules.SelectedValue.ToString()))
            {
                txtTitleRule.Text   = "";
                txtContentRule.Text = "";
                txtListRule.Text    = "";
                txtRequestUrl.Text  = "";
                isOk = false;
                return;
            }
            isOk = true;
            txtRequestUrl.Text = this.cboRules.SelectedValue.ToString();
            var rule = Spider.GetRuleByName(this.cboRules.Text);

            txtTitleRule.Text   = rule.TitleRule;
            txtContentRule.Text = rule.ContentRule;
            txtListRule.Text    = rule.ListRule;
            encoding            = rule.Encoding;
        }
Beispiel #2
0
        private void CatchJokesAction(Action <string> action)
        {
            int endPage   = 1;
            int startPage = 1;

            if (cbIsRepeat.Checked)
            {
                startPage = txtStart.Text.ToInt32();
                endPage   = txtEndPage.Text.ToInt32();

                for (int i = startPage; i <= endPage; i++)
                {
                    action(string.Format("正在抓取:{0}", string.Format(txtRequestUrl.Text, i)));
                    List <JokeInfo> jokes            = new List <JokeInfo>();
                    var             content          = Spider.GetHtmlContent(string.Format(txtRequestUrl.Text, i), encoding);
                    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                    doc.LoadHtml(content);
                    string jokelistXPath = txtListRule.Text.Trim();
                    var    nodes         = doc.DocumentNode.SelectNodes(jokelistXPath);

                    if (nodes == null || nodes.Count == 0)
                    {
                        lblMsg.Text = "没有抓取到集合数据,请重新定义规则";
                        continue;
                    }
                    else
                    {
                        lblMsg.Text        = nodes.Count.ToString();
                        txtRepContent.Text = nodes[0].InnerHtml;
                        //return;
                    }
                    JokeInfo jokeinfo;
                    HtmlNode temp;
                    foreach (var node in nodes)
                    {
                        temp             = HtmlNode.CreateNode(node.OuterHtml);
                        jokeinfo         = new JokeInfo();
                        jokeinfo.Title   = temp.SelectSingleNode(txtTitleRule.Text.Trim()).InnerText.Trim();
                        jokeinfo.Content = temp.SelectSingleNode(txtContentRule.Text.Trim()).InnerText.Trim();
                        if (string.IsNullOrEmpty(jokeinfo.Content) || string.IsNullOrEmpty(jokeinfo.Title))
                        {
                            continue;
                        }
                        jokes.Add(jokeinfo);
                    }
                    if (jokes.Count == 0)
                    {
                        lblMsg.Text        = "没有抓取到数据,请重新定义规则!";
                        btnRequest.Enabled = true;
                        return;
                    }

                    jokeLogic.AddJokes(ToJokes(jokes));
                    lblMsg.Text = "抓取数据成功";
                }
            }
            else
            {
                action(string.Format("正在抓取:{0}", txtRequestUrl.Text));
                List <JokeInfo> jokes            = new List <JokeInfo>();
                var             content          = Spider.GetHtmlContent(txtRequestUrl.Text, encoding);
                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(content);
                string jokelistXPath = txtListRule.Text.Trim();
                var    nodes         = doc.DocumentNode.SelectNodes(jokelistXPath);

                if (nodes == null || nodes.Count == 0)
                {
                    lblMsg.Text        = "没有抓取到集合数据,请重新定义规则";
                    btnRequest.Enabled = true;
                    return;
                }
                else
                {
                    lblMsg.Text        = nodes.Count.ToString();
                    txtRepContent.Text = nodes[0].InnerHtml;
                    //return;
                }
                JokeInfo jokeinfo;
                HtmlNode temp;
                foreach (var node in nodes)
                {
                    temp             = HtmlNode.CreateNode(node.OuterHtml);
                    jokeinfo         = new JokeInfo();
                    jokeinfo.Title   = temp.SelectSingleNode(txtTitleRule.Text.Trim()).InnerText.Trim();
                    jokeinfo.Content = temp.SelectSingleNode(txtContentRule.Text.Trim()).InnerText.Trim();
                    if (string.IsNullOrEmpty(jokeinfo.Content) || string.IsNullOrEmpty(jokeinfo.Title))
                    {
                        continue;
                    }
                    jokes.Add(jokeinfo);
                }
                if (jokes.Count == 0)
                {
                    lblMsg.Text        = "没有抓取到数据,请重新定义规则!";
                    btnRequest.Enabled = true;
                    return;
                }

                jokeLogic.AddJokes(ToJokes(jokes));
                lblMsg.Text = "抓取数据成功";
            }

            btnRequest.Enabled  = true;
            lblCatchStatus.Text = "";
        }