예제 #1
0
        /// <summary>
        /// 采集网页
        /// </summary>
        /// <param name="url"></param>
        /// <param name="pageEncode"></param>
        /// <returns></returns>
        public static string getPageContent(string url, string pageEncode)
        {
            var http     = new HttpHelper4();
            var httpItem = new HttpItem()
            {
                URL    = url,
                Method = "GET"
            };

            if (pageEncode != "自动编码")
            {
                httpItem.Encoding = Encoding.GetEncoding(pageEncode);
            }
            var httpResult = http.GetHtml(httpItem);

            return(httpResult.Html == "本次请求并未返回任何数据" ? string.Empty : httpResult.Html);
        }
예제 #2
0
        private void btnCreate_Click(object sender, EventArgs e)
        {
            List.Clear();
            //this.dataGridView_List.Rows.Clear();
            //1.根据关键字获取网站
            //2.随机取出列表
            //3.
            string keyword       = this.txtContentKeyWord.Text;
            string keywordEncode = HttpUtility.UrlEncode(keyword, Encoding.GetEncoding("utf-8"));
            var    i             = 1;
            //http://wenda.so.com/search/?ie=utf-8&q=android%20%E5%A4%9A%E7%BA%BF%E7%A8%8B&src=360chrome_search

            //1.获取网站
            var http       = new HttpHelper4();
            var httpResult = http.GetHtml(new HttpItem()
            {
                URL = "http://wenda.so.com/search/?ie=utf-8&q=" + keywordEncode + "&src=360chrome_search&pn=0"
            });
            var httpHtml = httpResult.Html;
            //2.解析标签
            var doc = new HtmlAgilityPack.HtmlDocument();

            doc.LoadHtml(httpHtml);
            var node = doc.DocumentNode;
            var css  = node.CssSelect(".item");

            //3.添加队列
            foreach (var c in css)
            {
                if (c != null)
                {
                    var url     = c.CssSelect(".qa-i-hd a").ToArray()[0].Attributes["href"].Value;
                    var title   = c.CssSelect(".qa-i-hd a").ToArray()[0].InnerText;
                    var summary = c.CssSelect(".qa-i-bd").ToArray()[0].InnerText;
                    //var time = c.CssSelect("");
                    List.Add(new ContentHelper()
                    {
                        Url     = "http://wenda.so.com" + url,
                        Title   = title,
                        Summary = summary
                    });
                }
            }
            //4.重新载入
            this.Bind_DataList();
        }
        private void btnCreate_Click(object sender, EventArgs e)
        {
            List.Clear();
            string keyword       = this.txtContentKeyWord.Text;
            string keywordEncode = HttpUtility.UrlEncode(keyword, Encoding.GetEncoding("utf-8"));
            var    i             = 1;

            var http       = new HttpHelper4();
            var httpResult = http.GetHtml(new HttpItem()
            {
                URL = "http://wenda.so.com/search/?ie=utf-8&q=" + keywordEncode + "&src=360chrome_search&pn=0"
            });
            var httpHtml = httpResult.Html;

            var doc = new HtmlAgilityPack.HtmlDocument();

            doc.LoadHtml(httpHtml);
            var node = doc.DocumentNode;
            var css  = node.CssSelect(".item");

            foreach (var c in css)
            {
                if (c != null)
                {
                    var url     = c.CssSelect(".qa-i-hd a").ToArray()[0].Attributes["href"].Value;
                    var title   = c.CssSelect(".qa-i-hd a").ToArray()[0].InnerText;
                    var summary = c.CssSelect(".qa-i-bd").ToArray()[0].InnerText;

                    List.Add(new ContentHelper()
                    {
                        Url     = "http://wenda.so.com" + url,
                        Title   = title,
                        Summary = summary
                    });
                }
            }

            this.Bind_DataList();
        }
 /// <summary>
 /// 获取网页内容
 /// </summary>
 /// <param name="url"></param>
 public void GetHtml(string url, string cookie)
 {
     if (!this.WebBrowser)
     {
         var http     = new HttpHelper4();
         var httpItem = new HttpItem();
         httpItem.URL = url;
         if (!string.IsNullOrEmpty(cookie))
         {
             httpItem.Cookie = cookie;
         }
         var httpResult = http.GetHtml(httpItem);
         var html       = httpResult.Html;
         if (OutHtmlHandler != null)
         {
             OutHtmlHandler(html);
         }
     }
     else
     {
         webBrowser.ObjectForScripting     = false;
         webBrowser.ScriptErrorsSuppressed = true;
         if (!string.IsNullOrEmpty(cookie))
         {
             webBrowser.Document.Cookie = cookie;
         }
         webBrowser.DocumentCompleted += (object sender, WebBrowserDocumentCompletedEventArgs e) => {
             var html = webBrowser.Document.Body.Parent.OuterHtml;
             if (OutHtmlHandler != null)
             {
                 OutHtmlHandler(html);
             }
         };
         webBrowser.Navigate(new Uri(url));
     }
 }
예제 #5
0
        /// <summary>
        /// 拖动按入数据
        /// </summary>
        private void frmDeskTop_DragDrop(object sender, DragEventArgs e) {
            if (e.Data.GetDataPresent(DataFormats.Html)
                || e.Data.GetDataPresent(DataFormats.Text)) {
                object Item;
                MemoryStream vMemoryStream;
                string webTitle = "新标题";
                Item = e.Data.GetData(DataFormats.Html, true);//转换为指定格式
                bool htmlflag = false;
                if (Item == null) {
                    Item = e.Data.GetData(DataFormats.Text);
                    vMemoryStream = new MemoryStream(Encoding.UTF8.GetBytes(Item.ToString()));
                }
                else {
                    //自动获取标题
                    htmlflag = true;
                    vMemoryStream = e.Data.GetData("Html Format") as MemoryStream;
                }
                vMemoryStream.Position = 0;
                byte[] vBytes = new byte[vMemoryStream.Length];
                vMemoryStream.Read(vBytes, 0, (int)vMemoryStream.Length);
                string s1 = Encoding.UTF8.GetString(vBytes);
                string webContent = string.Empty;
                if (htmlflag) {
                    Regex regcontent = new Regex(@"<!--StartFragment-->([\s\S]*?)<!--EndFragment-->", RegexOptions.IgnoreCase);
                    webContent = regcontent.Match(s1).ToString();
                    webContent = StringHelper.Instance.Replace(webContent, "<!--StartFragment-->", "");
                    webContent = StringHelper.Instance.Replace(webContent, "<!--EndFragment-->", "");
                    try {
                        webTitle = Regex.Match(s1, "<title>.+?</title>", RegexOptions.IgnoreCase | RegexOptions.Multiline).ToString();
                        webTitle = StringHelper.Instance.Replace(webTitle, "<title>", "");
                        webTitle = StringHelper.Instance.Replace(webTitle, "</title>", "");
                        if (string.IsNullOrEmpty(webTitle)) {
                            //自动获取标题
                            Regex regUrl = new Regex(@"SourceURL:([\s\S]*?)\r\n", RegexOptions.IgnoreCase);
                            string webUrl = regUrl.Match(s1).ToString();
                            webUrl = StringHelper.Instance.Replace(webUrl, "SourceURL:", "");
                            webUrl = StringHelper.Instance.Replace(webUrl, "\r\n", "");

                            var http = new HttpHelper4();

                            var httpResult = http.GetHtml(new HttpItem() { 
                                URL = webUrl
                            });

                            var html = httpResult.Html;

                            var doc = new HtmlAgilityPack.HtmlDocument();
                            doc.LoadHtml(html);
                            var docNode = doc.DocumentNode;

                            webTitle = docNode.CssSelect("title").ToArray()[0].InnerText;
                            //string c = webContent;
                            //c = HtmlHelper.ParseTags(c);
                            //c.Replace("\r\n", "");
                            //c = c.Substring(0, 200);
                            //webTitle = c;
                        }
                    }
                    catch {
                        webTitle = "新标题";
                    }
                }
                else {
                    webContent = s1;
                }
                //frmHandInsert myWebInsert = new frmHandInsert();
                //myWebInsert.Title = webTitle;
                //myWebInsert.Content = content;
                ////myWebInsert.TopMost = true;
                //myWebInsert.Show();
                frmHandWebInsert ff = new frmHandWebInsert();
                ff.Title = webTitle;
                ff.Content = webContent;
                ff.Show();
            }
        }
예제 #6
0
        private void StartDiyWeb()
        {
            string    LocalSQLiteName = "Data\\Collection\\" + Model.TaskName + "\\SpiderResult.db";
            DataTable dtData          = DbHelper.Query(LocalSQLiteName, "Select * From Content").Tables[0];

            var         listDiyUrl = DALDiyWebUrlHelper.GetList(" And SelfId=" + Model.ID, "", 0);
            HttpHelper4 http       = new HttpHelper4();
            int         taskId     = Model.ID;

            foreach (DataRow dr in dtData.Rows)
            {
                int resultId = int.Parse(dr["Id"].ToString());
                foreach (var m in listDiyUrl)
                {
                    try {
                        string        getUrl     = m.Url;
                        string        postParams = m.UrlParams;
                        StringBuilder sbContent  = new StringBuilder();
                        foreach (ModelTaskLabel mTaskLabel in Model.ListTaskLabel)
                        {
                            string pageEncodeContent = dr[mTaskLabel.LabelName].ToString().Replace("'", "''");
                            //可能需要编码实际测试才知道
                            getUrl     = getUrl.Replace("[" + mTaskLabel.LabelName + "]", pageEncodeContent);
                            postParams = postParams.Replace("[" + mTaskLabel.LabelName + "]", pageEncodeContent);
                            sbContent.Append(pageEncodeContent);
                        }
                        string md5key = StringHelper.Instance.MD5(taskId.ToString() + resultId.ToString() + sbContent.ToString(), 32).ToLower();
                        //判断该条记录这个weburl是否发过
                        if (!DALDataPublishLogHelper.ChkRecord(
                                Model.ID, resultId, md5key))
                        {
                            //记录日志
                            DALDataPublishLogHelper.Insert(new ModelDataPublishLog()
                            {
                                TaskId     = taskId,
                                ResultId   = resultId,
                                DesKey     = md5key,
                                CreateTime = DateTime.Now.ToString()
                            });
                        }
                        else
                        {
                            continue;
                        }

                        //开始发布网站
                        var result = http.GetHtml(new HttpItem()
                        {
                            URL         = getUrl,
                            Postdata    = postParams,
                            ContentType = "application/x-www-form-urlencoded"
                        });
                        var html = result.Html;
                    }
                    catch (Exception ex) {
                        continue;
                    }
                }
            }
            if (PublishCompalteDelegate != null)
            {
                gatherEv.Message = "发布到自定义Web网站完成!";
                PublishCompalteDelegate(this, gatherEv);
            }
        }
        /// <summary>
        /// 获取站点信息
        /// </summary>
        private void cmbWebSite_SelectedIndexChanged(object sender, EventArgs e)
        {
            var model = (ModelSiteInfo)((ListItem2)this.cmbWebSite.SelectedItem).Value;

            if (model == null)
            {
                return;
            }
            this.btnSubmit.Enabled = false;

            this.cmbClassList.Text = string.Empty;
            this.cmbClassList.Items.Clear();

            var th = new ThreadMultiHelper(1);

            th.WorkMethod += new ThreadMultiHelper.DelegateWork(delegate(int taskindex, int threadindex) {
                string nowTime  = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss");
                string baseData = "cmd=getclasslist"
                                  + "&username="******"&userpwd=" + model.UserPwd
                                  + "&time=" + nowTime;
                string md5key    = "&md5key=" + StringHelper.Instance.MD5(baseData, 32).ToLower();
                string senUrl    = model.Url;
                string sendData  = baseData + md5key;
                HttpHelper4 http = new HttpHelper4();
                var result       = http.GetHtml(new HttpItem()
                {
                    URL = senUrl + "?" + sendData
                });
                var html = result.Html;
                try {
                    var json = JObject.Parse(html);
                    var code = json["code"].Value <string>();
                    var list = json["list"].ToArray();

                    this.lblResult.Text = "操作结果成功!结果为:" + code;

                    List <ModelClassList> listClassList = new List <ModelClassList>();
                    this.cmbClassList.Items.Clear();
                    foreach (var l in list)
                    {
                        listClassList.Add(
                            new ModelClassList()
                        {
                            ClassId    = l["classid"].Value <string>(),
                            ClassName  = l["classname"].Value <string>(),
                            CreateTime = ""
                        }
                            );
                    }
                    OutMsg = new OutCmbMsg(OutClassListMsg);
                    OutMsg(listClassList);
                }
                catch (Exception ex) {
                }
            });
            th.CompleteEvent += new ThreadMultiHelper.DelegateComplete(delegate() {
                this.btnSubmit.Enabled = true;
            });
            th.Start();
        }
        /// <summary>
        /// 确定发布数据
        /// </summary>
        private void btnSubmit_Click(object sender, EventArgs e)
        {
            var model = (ModelSiteInfo)((ListItem2)this.cmbWebSite.SelectedItem).Value;

            if (model == null)
            {
                MessageBox.Show("请选择一个网站!");
                this.cmbWebSite.Focus();
                return;
            }

            if (this.cmbClassList.Text == string.Empty)
            {
                MessageBox.Show("请选择一个分类!");
                this.cmbClassList.Focus();
                return;
            }

            var title   = this.txtTitle.Text;
            var content = this.fckHtmlEditorControl1.InnerHtml;

            if (string.IsNullOrEmpty(title) || string.IsNullOrEmpty(content))
            {
                MessageBox.Show("标题或者内容为空!");
                this.txtTitle.Focus();
                return;
            }

            var selectedItem = (ListItem)this.cmbClassList.SelectedItem;
            var classid      = selectedItem.Value;
            var classname    = selectedItem.Text;

            if (classid == string.Empty || classid == "0")
            {
                MessageBox.Show("请选择一个分类!");
                this.cmbClassList.Focus();
                return;
            }
            var th = new ThreadMultiHelper(1);

            th.WorkMethod += new ThreadMultiHelper.DelegateWork(delegate(int taskindex, int threadindex) {
                string nowTime  = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss");
                string baseData = "cmd=sendcontent"
                                  + "&username="******"&userpwd=" + model.UserPwd
                                  + "&title=" + StringHelper.Instance.UrlEncode(title, model.Encode)
                                  + "&content=" + StringHelper.Instance.UrlEncode(content, model.Encode)
                                  + "&classid=" + classid
                                  + "&classtitle=" + StringHelper.Instance.UrlEncode(classname, model.Encode)
                                  + "&author=" + string.Empty
                                  + "&time=" + nowTime;
                string md5key    = "&md5key=" + StringHelper.Instance.MD5(baseData, 32).ToLower();
                string sendUrl   = model.Url;
                string sendData  = baseData + md5key;
                HttpHelper4 http = new HttpHelper4();
                var result       = http.GetHtml(new HttpItem()
                {
                    URL         = sendUrl,
                    Method      = "post",
                    Postdata    = sendData,
                    ContentType = "application/x-www-form-urlencoded",
                    Encoding    = Encoding.GetEncoding(model.Encode)
                });
                var html            = result.Html;
                this.lblResult.Text = "操作结果成功!结果为:" + html;

                try {
                    var json            = JObject.Parse(html);
                    var code            = json["code"].Value <string>();
                    this.lblResult.Text = "操作结果成功!结果为:" + code;
                }
                catch (Exception ex) {
                }
            });
            th.CompleteEvent += new ThreadMultiHelper.DelegateComplete(delegate() {
                var r = MessageBox.Show(this, "发布内容成功!", "提示信息!", MessageBoxButtons.OKCancel);
                if (r == System.Windows.Forms.DialogResult.OK)
                {
                    this.Close();
                    this.Dispose();
                }
            });
            th.Start();
        }