Exemple #1
0
        private void btnUpdate_Click(object sender, EventArgs e)
        {
            //获得Row里的值
            KiwiCrawler.Model.Urlconfigs_k model = GetModelByRow();

            frmEdit frmEdit_k = new frmEdit(model);

            if (frmEdit_k.ShowDialog(this) == DialogResult.Cancel)
            {
                dgvTaskCapture.Rows.Clear();
                Urlconfigs_kBll     urlBll  = new Urlconfigs_kBll();
                List <Urlconfigs_k> urlList = null;
                urlList = urlBll.GetModelList("");//后期改成分页的
                ListToDataGridView(dgvTaskCapture, urlList);
            }
        }
        private bool SettingCustomValues(Int32 tag)
        {

            bool isOk = true;
            configModel = GetModelByRow();
            //爬虫配置
            filter = new BloomFilter<string>(200000);
            //线程
            if (radioThreadC.Checked && !(String.IsNullOrEmpty(txtThread.Text.Trim())))
            {
                Settings.ThreadCount = Convert.ToByte(txtThread.Text.Trim());
            }
            if (radioThreadM.Checked)
            {
                Settings.ThreadCount = 1;
            }
            //深度
            if (radioDepthC.Checked && !(String.IsNullOrEmpty(txtDepth.Text.Trim())))
            {
                Settings.Depth = Convert.ToByte(txtDepth.Text.Trim());
            }
            if (radioDepthM.Checked)
            {
                Settings.Depth = configModel.kPageTotal == null ? Convert.ToInt32(100) : Convert.ToInt32(configModel.kPageTotal + 1);
            }
            //速度1~5
            if (radioSpeedNo.Checked)
            {
                Settings.AutoSpeedLimit = false;
            }
            if (radioSpeedYes.Checked)
            {
                Settings.AutoSpeedLimit = true;
            }
            if (string.IsNullOrEmpty(configModel.kUrl))
            {
                isOk = false;
                MessageBox.Show("种子地址为空");
            }
            else
            {
                if (tag == 0)//0代表单个点击模式
                {
                    Settings.SeedsAddress.Clear();
                    Settings.SeedsAddress.Add(configModel.kUrl);
                }

            }
            // 设置爬取时忽略的 Link,通过后缀名的方式,可以添加多个
            Settings.EscapeLinks.Add(".jpg");
            // 设置 URL 关键字
            // Settings.HrefKeywords.Add(string.Format("/{0}/bj", CityName));
            // 设置都是锁定域名,去除二级域名后,判断域名是否相等,相等则认为是同一个站点
            // 例如:mail.pzcast.com 和 www.pzcast.com
            Settings.LockHost = false;
            //URL配置
            // 设置请求的 User-Agent HTTP 标头的值
            // settings.UserAgent 已提供默认值,如有特殊需求则自行设置

            // 设置请求页面的超时时间,默认值 15000 毫秒
            //Settings.Timeout = 60000; //按照自己的要求确定超时时间

            // 设置用于过滤的正则表达式
            //Settings.RegularFilterExpressions.Add("<a .+ href='(.+)'>下一页</a>");//  string strReg = "<a .+ href='(.+)'>下一页</a>";

            if (configModel.kDetailPatternType == "正则表达式")
            {
                if (string.IsNullOrEmpty(configModel.kDetailPattern))
                {
                    isOk = false;
                    MessageBox.Show("详细页提取模板为空");
                }
                //else
                //{
                //    detailRegStr = model.kDetailPattern;
                //}

            }
            if (configModel.kNextPagePatternType == "正则表达式")
            {
                if (string.IsNullOrEmpty(configModel.kNextPagePattern))
                {
                    isOk = false;
                    MessageBox.Show("下一页提取模板为空");
                }
                //else
                //{
                //    nextPageStr = model.kNextPagePattern;    
                //}                
            }
            return isOk;


        }
Exemple #3
0
        private bool SettingCustomValues(Int32 tag)
        {
            bool isOk = true;

            configModel = GetModelByRow();
            //爬虫配置
            filter = new BloomFilter <string>(200000);
            //线程
            if (radioThreadC.Checked && !(String.IsNullOrEmpty(txtThread.Text.Trim())))
            {
                Settings.ThreadCount = Convert.ToByte(txtThread.Text.Trim());
            }
            if (radioThreadM.Checked)
            {
                Settings.ThreadCount = 1;
            }
            //深度
            if (radioDepthC.Checked && !(String.IsNullOrEmpty(txtDepth.Text.Trim())))
            {
                Settings.Depth = Convert.ToByte(txtDepth.Text.Trim());
            }
            if (radioDepthM.Checked)
            {
                Settings.Depth = configModel.kPageTotal == null?Convert.ToInt32(100) : Convert.ToInt32(configModel.kPageTotal + 1);
            }
            //速度1~5
            if (radioSpeedNo.Checked)
            {
                Settings.AutoSpeedLimit = false;
            }
            if (radioSpeedYes.Checked)
            {
                Settings.AutoSpeedLimit = true;
            }
            if (string.IsNullOrEmpty(configModel.kUrl))
            {
                isOk = false;
                MessageBox.Show("种子地址为空");
            }
            else
            {
                if (tag == 0)//0代表单个点击模式
                {
                    Settings.SeedsAddress.Clear();
                    Settings.SeedsAddress.Add(configModel.kUrl);
                }
            }
            // 设置爬取时忽略的 Link,通过后缀名的方式,可以添加多个
            Settings.EscapeLinks.Add(".jpg");
            // 设置 URL 关键字
            // Settings.HrefKeywords.Add(string.Format("/{0}/bj", CityName));
            // 设置都是锁定域名,去除二级域名后,判断域名是否相等,相等则认为是同一个站点
            // 例如:mail.pzcast.com 和 www.pzcast.com
            Settings.LockHost = false;
            //URL配置
            // 设置请求的 User-Agent HTTP 标头的值
            // settings.UserAgent 已提供默认值,如有特殊需求则自行设置

            // 设置请求页面的超时时间,默认值 15000 毫秒
            //Settings.Timeout = 60000; //按照自己的要求确定超时时间

            // 设置用于过滤的正则表达式
            //Settings.RegularFilterExpressions.Add("<a .+ href='(.+)'>下一页</a>");//  string strReg = "<a .+ href='(.+)'>下一页</a>";

            if (configModel.kDetailPatternType == "正则表达式")
            {
                if (string.IsNullOrEmpty(configModel.kDetailPattern))
                {
                    isOk = false;
                    MessageBox.Show("详细页提取模板为空");
                }
                //else
                //{
                //    detailRegStr = model.kDetailPattern;
                //}
            }
            if (configModel.kNextPagePatternType == "正则表达式")
            {
                if (string.IsNullOrEmpty(configModel.kNextPagePattern))
                {
                    isOk = false;
                    MessageBox.Show("下一页提取模板为空");
                }
                //else
                //{
                //    nextPageStr = model.kNextPagePattern;
                //}
            }
            return(isOk);
        }
Exemple #4
0
 /// <summary>
 /// 更新一条数据
 /// </summary>
 public bool Update(KiwiCrawler.Model.Urlconfigs_k model)
 {
     return(dal.Update(model));
 }
Exemple #5
0
 /// <summary>
 /// 增加一条数据
 /// </summary>
 public bool Add(KiwiCrawler.Model.Urlconfigs_k model)
 {
     return(dal.Add(model));
 }
Exemple #6
0
        /// <summary>
        /// 分页获取数据列表
        /// </summary>
        //public DataSet GetList(int PageSize,int PageIndex,string strWhere)
        //{
        //return dal.GetList(PageSize,PageIndex,strWhere);
        //}

        #endregion  BasicMethod
        #region  ExtensionMethod
        /// <summary>
        /// 添加一条信息,返回添加数据的主键
        /// </summary>
        /// <param name="model"></param>
        /// <returns></returns>
        public int AddBringId(KiwiCrawler.Model.Urlconfigs_k model)
        {
            return(dal.AddBringId(model));
        }