Exemple #1
0
        private void SiteComboBox_SelectedIndexChanged(object sender, EventArgs e)
        {
            if (this.siteComboBox.SelectedValue == null)
            {
                return;
            }

            SiteParameter siteParameter = siteComboBox.SelectedItem as SiteParameter;

            this.siteNameTextBox.Text      = siteParameter.SiteName;
            this.urlTextBox.Text           = siteParameter.UrlPattern;
            this.startUrlTextBox.Text      = siteParameter.StartUrl;
            this.itemTextBox.Text          = siteParameter.ItemPattern;
            this.startNumber.Value         = siteParameter.StartNumber;
            this.pageStepNumber.Value      = siteParameter.PageStepNumber ?? 0;
            this.captionPosition.Value     = siteParameter.CaptionPosition;
            this.urlPosition.Value         = siteParameter.UrlPosition;
            this.datePosition.Value        = siteParameter.DatePosition;
            this.categoryTextBox.Text      = siteParameter.CategoryPattern;
            this.indexCodeTextBox.Text     = siteParameter.IndexCodePattern;
            this.issueCodeTextBox.Text     = siteParameter.IssueCodePattern;
            this.publishAgencyTextBox.Text = siteParameter.PublishAgencyPattern;
            this.keywordTextBox.Text       = siteParameter.KeywordPattern;
            this.attachmentTextBox.Text    = siteParameter.AttachmentPattern;
            this.publishDateTextBox.Text   = siteParameter.PublishDatePattern;
            this.contentTextBox.Text       = siteParameter.ContentPattern;
            BindSource(siteParameter);
        }
Exemple #2
0
        private void AddDicButton_Click(object sender, EventArgs e)
        {
            if (this.siteComboBox.SelectedValue == null)
            {
                return;
            }

            if (string.IsNullOrWhiteSpace(this.parseKeyTextBox.Text) || string.IsNullOrWhiteSpace(this.parseValueTextBox.Text))
            {
                this.testLogTextBox.Text = "Please input key and value";
                return;
            }

            SiteParameter siteParameter = siteComboBox.SelectedItem as SiteParameter;

            if (siteParameter.CustomProcessors == null)
            {
                siteParameter.CustomProcessors = new Dictionary <string, string>();
                siteParameter.CustomProcessors.Add(this.parseKeyTextBox.Text, this.parseValueTextBox.Text);
            }
            else
            {
                if (siteParameter.CustomProcessors.Keys.Contains(this.parseKeyTextBox.Text))
                {
                    siteParameter.CustomProcessors[this.parseKeyTextBox.Text] = this.parseValueTextBox.Text;
                }
                else
                {
                    siteParameter.CustomProcessors.Add(this.parseKeyTextBox.Text, this.parseValueTextBox.Text);
                }
            }
            BindSource(siteParameter);
            this.parseKeyTextBox.Text   = string.Empty;
            this.parseValueTextBox.Text = string.Empty;
        }
Exemple #3
0
        /// <summary>
        /// 保存数据
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        protected void btnSave_Click(Object sender, EventArgs e)
        {
            #region 保存更改

            var config = SiteParameter.Config;
            var lang   = config.Languages.Find(a => { return(a.language == Language.SelectedValue); });
            if (lang == null)
            {
                lang = new Language();
                config.Languages.Add(lang);
            }
            lang   = this.GetFormValue <Language>(lang);
            config = this.GetFormValue <SiteParameter>(config);

            config.DataTypeOptions.ForEach(a => { a.Selected = false; });
            var datatype = config.DataTypeOptions.Find(a => { return(a.Name == @DataType.SelectedValue); });
            if (datatype != null)
            {
                datatype.Selected         = true;
                datatype.ConnectionString = ConnectionString.Text;
            }

            SiteParameter.SaveConfig();

            #endregion

            Alert("保存成功!", "success");
        }
Exemple #4
0
 public ParamPageReader(SiteParameter siteParameter, IHtmlReader htmlReader, IItemReader itemReader)
 {
     this.siteParameter = siteParameter ?? throw new ArgumentNullException(nameof(siteParameter));
     this.htmlReader    = htmlReader ?? throw new ArgumentNullException(nameof(htmlReader));
     this.itemReader    = itemReader ?? throw new ArgumentNullException(nameof(itemReader));
     this.pageNumber    = this.siteParameter.StartNumber;
 }
Exemple #5
0
        public static ISiteCrawler Create(SiteParameter siteParameter)
        {
            if (siteParameter == null)
            {
                throw new ArgumentNullException(nameof(siteParameter));
            }

            string dataServiceName = GetValueOrDefault(siteParameter.CustomProcessors, "IDataService");
            string htmlReaderName  = GetValueOrDefault(siteParameter.CustomProcessors, "IHtmlReader");
            string pageParserName  = GetValueOrDefault(siteParameter.CustomProcessors, "IPageParser");
            string itemReaderName  = GetValueOrDefault(siteParameter.CustomProcessors, "IItemReader");
            string pageReaderName  = GetValueOrDefault(siteParameter.CustomProcessors, "IPageReader");

            IHtmlReader       htmlReader          = Container.Resolve <IHtmlReader>(htmlReaderName);
            ParameterOverride htmlReaderParameter = new ParameterOverride("htmlReader", htmlReader);

            ParameterOverride siteParameterParameter = new ParameterOverride("siteParameter", siteParameter);

            IItemReader       itemReader          = Container.Resolve <IItemReader>(itemReaderName, siteParameterParameter);
            ParameterOverride itemReaderParameter = new ParameterOverride("itemReader", itemReader);

            IPageReader pageReader = Container.Resolve <IPageReader>(pageReaderName, siteParameterParameter, htmlReaderParameter, itemReaderParameter);

            IPageParser pageParser = Container.Resolve <IPageParser>(pageParserName, siteParameterParameter, htmlReaderParameter);

            return(new GeneralSiteCrawler(pageReader, pageParser));
        }
Exemple #6
0
        public GeneralSiteCrawler(SiteParameter siteParameter)
        {
            IItemReader itemReader = new RegexItemReader(siteParameter);

            IHtmlReader htmlReader = new HttpClientReader();

            this.pageReader = new SequentialPageReader(siteParameter, htmlReader, itemReader);
            this.pageParser = new RegexPageParser(siteParameter, htmlReader);
        }
Exemple #7
0
        private void NewButton_Click(object sender, EventArgs e)
        {
            SiteParameter siteParameter = new SiteParameter
            {
                SiteName = "New",
            };

            siteParameters.Add(siteParameter);
            this.siteComboBox.SelectedItem = siteParameter;
        }
Exemple #8
0
        private void Content_ValueChanged(object sender, EventArgs e)
        {
            if (this.siteComboBox.SelectedValue == null)
            {
                return;
            }

            SiteParameter siteParameter = siteComboBox.SelectedItem as SiteParameter;

            siteParameter.ContentPattern = this.contentTextBox.Text;
        }
Exemple #9
0
        private void PublishDate_ValueChanged(object sender, EventArgs e)
        {
            if (this.siteComboBox.SelectedValue == null)
            {
                return;
            }

            SiteParameter siteParameter = siteComboBox.SelectedItem as SiteParameter;

            siteParameter.PublishDatePattern = string.IsNullOrWhiteSpace(this.publishDateTextBox.Text) ? null : this.publishDateTextBox.Text;
        }
Exemple #10
0
        private void DatePosition_ValueChanged(object sender, EventArgs e)
        {
            if (this.siteComboBox.SelectedValue == null)
            {
                return;
            }

            SiteParameter siteParameter = siteComboBox.SelectedItem as SiteParameter;

            siteParameter.DatePosition = (int)this.datePosition.Value;
        }
Exemple #11
0
        private void StartNumber_ValueChanged(object sender, EventArgs e)
        {
            if (this.siteComboBox.SelectedValue == null)
            {
                return;
            }

            SiteParameter siteParameter = siteComboBox.SelectedItem as SiteParameter;

            siteParameter.StartNumber = (int)this.startNumber.Value;
        }
Exemple #12
0
        private void ItemTextBox_TextChanged(object sender, EventArgs e)
        {
            if (this.siteComboBox.SelectedValue == null)
            {
                return;
            }

            SiteParameter siteParameter = siteComboBox.SelectedItem as SiteParameter;

            siteParameter.ItemPattern = this.itemTextBox.Text;
        }
Exemple #13
0
        private void SiteNameTextBox_TextChanged(object sender, EventArgs e)
        {
            if (this.siteComboBox.SelectedValue == null)
            {
                return;
            }

            SiteParameter siteParameter = siteComboBox.SelectedItem as SiteParameter;

            siteParameter.SiteName = this.siteNameTextBox.Text;
            siteParameters.ResetItem(siteParameters.IndexOf(siteParameter));
        }
Exemple #14
0
        private void BindSource(SiteParameter siteParameter)
        {
            if (siteParameter.CustomProcessors == null || !siteParameter.CustomProcessors.Keys.Any())
            {
                this.parseDicDataGridView.DataSource = null;
                return;
            }

            this.parseDicDataGridView.DataSource       = siteParameter.CustomProcessors?.Select(s => new { Key = s.Key, Value = s.Value }).ToArray();
            this.parseDicDataGridView.Rows[0].Selected = false;
            this.parseDicDataGridView.CurrentCell      = null;
            this.parseDicDataGridView.ClearSelection();
        }
        public GeneralSiteCrawler(SiteParameter siteParameter)
        {
            this.dataService = new DbDataService(CrawlerDbHelper.GetContext());
            IItemReader itemReader = new RegexItemReader(siteParameter);

            IHtmlReader htmlReader = new HttpClientReader();

            this.pageReader = new SequentialPageReader(siteParameter, htmlReader, itemReader);
            this.pageParser = new RegexPageParser(siteParameter, htmlReader);
            this.pageParser.SetErrorHandler((url, exception) =>
                                            this.dataService.AddLog(new CrawlerLog
            {
                Url     = url,
                LogTime = DateTime.Now,
                Message = exception.Message
            }));
        }
Exemple #16
0
        private void PageStepNumber_ValueChanged(object sender, EventArgs e)
        {
            if (this.siteComboBox.SelectedValue == null)
            {
                return;
            }

            SiteParameter siteParameter = siteComboBox.SelectedItem as SiteParameter;

            if (this.pageStepNumber.Value == 0)
            {
                siteParameter.PageStepNumber = null;
            }
            else
            {
                siteParameter.PageStepNumber = Convert.ToInt32(this.pageStepNumber.Value);
            }
        }
Exemple #17
0
        private void removeDicButton_Click(object sender, EventArgs e)
        {
            var rows = this.parseDicDataGridView.SelectedRows;

            if (rows.Count < 1)
            {
                this.testLogTextBox.Text = "Please select an item delete";
                return;
            }

            string        key           = rows[0].Cells[0].Value.ToString();
            SiteParameter siteParameter = siteComboBox.SelectedItem as SiteParameter;

            siteParameter.CustomProcessors.Remove(key);
            if (!siteParameter.CustomProcessors.Any())
            {
                siteParameter.CustomProcessors = null;
            }

            BindSource(siteParameter);
        }
        bool DefoultUpadte3(bool a01, bool a0, bool a1, bool a2, bool a3, bool a4, bool a5, bool a6, bool a7, bool a8, bool a9, bool a10)
        {
            //根据编号查询对象
            var           item = dbm.SiteParameters.FirstOrDefault();
            SiteParameter sp   = new SiteParameter
            {
                Id            = item.Id,
                Copyright     = item.Copyright,
                Describe      = item.Describe,
                Keyword       = item.Keyword,
                SiteName      = item.SiteName,
                SiteUrl       = item.SiteUrl,
                OpenSite      = item.OpenSite,
                siteState     = item.siteState,
                userRegiste   = item.userRegiste,
                trader        = item.trader,
                tradePwd      = item.tradePwd,
                outTime       = item.outTime,
                emailUserPwd  = item.emailUserPwd,
                emailUserName = item.emailUserName,
                SendEmail     = item.SendEmail,

                sendFreezeEmail   = a01,
                sendRecoverEmail  = a0,
                sendZhuanzEmail   = a1,
                sendWithdrawEmail = a2,
                sendreChargeEmail = a3,

                sendFreezeMsg   = a4,
                sendThawMsg     = a5,
                sendZhuanzMsg   = a6,
                sendWithdrawMsg = a7,
                sendreChargeMsg = a8,
                BankZhuanz      = a9,
                OnlinePayment   = a10
            };
            bool r = dbm.UpdateSite(sp);

            return(r);
        }
        bool DefoultUpadte2(SiteParameter item, bool a1, bool a2, string a3, string a4, string a5, string a6, string a7)
        {
            SiteParameter sp = new SiteParameter
            {
                Id          = item.Id,
                Copyright   = item.Copyright,
                Describe    = item.Describe,
                Keyword     = item.Keyword,
                SiteName    = item.SiteName,
                SiteUrl     = item.SiteUrl,
                OpenSite    = item.OpenSite,
                siteState   = item.siteState,
                userRegiste = item.userRegiste,

                trader   = a1,
                tradePwd = a2,
                outTime  = a3,

                emailUserPwd  = a4,
                emailUserName = a5,
                emaiSendName  = a6,
                SendEmail     = a7,

                sendFreezeEmail   = item.sendFreezeEmail,
                sendZhuanzEmail   = item.sendZhuanzEmail,
                sendWithdrawEmail = item.sendWithdrawEmail,
                sendreChargeEmail = item.sendreChargeEmail,
                sendFreezeMsg     = item.sendFreezeMsg,
                sendThawMsg       = item.sendThawMsg,
                sendZhuanzMsg     = item.sendZhuanzMsg,
                sendWithdrawMsg   = item.sendWithdrawMsg,
                sendreChargeMsg   = item.sendreChargeMsg,
                BankZhuanz        = item.BankZhuanz,
                OnlinePayment     = item.OnlinePayment
            };
            bool r = dbm.UpdateSite(sp);

            return(r);
        }
        public void Crawl(SiteParameter siteParameter)
        {
            Stopwatch      stopwatch = Stopwatch.StartNew();
            ArticleMonitor monitor   = new ArticleMonitor()
            {
                StartTime = DateTime.Now, SiteName = siteParameter.SiteName
            };

            if (!string.IsNullOrWhiteSpace(siteParameter.StartUrl))
            {
                monitor.SiteUrl = siteParameter.StartUrl;
            }
            else
            {
                monitor.SiteUrl = string.Format(siteParameter.UrlPattern, siteParameter.StartNumber, siteParameter.PageStepNumber);
            }
            IEnumerable <Article> articles = this.pageReader.GetArticals().ToArray();

            articles = articles.Select(article => this.pageParser.GetArticleDetails(article)).ToArray();

            this.dataService.AddOrUpdateArticles(articles, monitor);

            int attachmentCount = 0;

            foreach (var article in articles)
            {
                var attatchments = this.pageParser.GetAttachments(article);
                attachmentCount += attatchments?.Count() ?? 0;

                this.dataService.AddOrUpdateArticleAttachments(attatchments);
            }
            this.dataService.AddOrUpdateArticleMontior(monitor);
            string info = string.Format("{0} articles crawled, {1} attachments crawled.", articles.Count(), attachmentCount);

            Logging.WriteEntry(this, LogType.Information, info);

            Logging.WriteEntry(this, LogType.Information, $"{stopwatch.Elapsed} elapsed.");
        }
        bool DefoultUpadte(SiteParameter item, string a1, string a2, string a3, string a4, string a5, bool a6, bool a7, bool a8)
        {
            SiteParameter sp = new SiteParameter
            {
                Id          = item.Id,
                Copyright   = a1,
                Describe    = a2,
                Keyword     = a3,
                SiteName    = a4,
                SiteUrl     = a5,
                OpenSite    = a6,
                siteState   = a7,
                userRegiste = a8,

                tradePwd          = item.tradePwd,
                outTime           = item.outTime,
                emailUserPwd      = item.emailUserPwd,
                emailUserName     = item.emailUserName,
                emaiSendName      = item.emaiSendName,
                SendEmail         = item.SendEmail,
                sendFreezeEmail   = item.sendFreezeEmail,
                sendRecoverEmail  = item.sendRecoverEmail,
                sendZhuanzEmail   = item.sendZhuanzEmail,
                sendWithdrawEmail = item.sendWithdrawEmail,
                sendreChargeEmail = item.sendreChargeEmail,
                sendFreezeMsg     = item.sendFreezeMsg,
                sendThawMsg       = item.sendThawMsg,
                sendZhuanzMsg     = item.sendZhuanzMsg,
                sendWithdrawMsg   = item.sendWithdrawMsg,
                sendreChargeMsg   = item.sendreChargeMsg,
                BankZhuanz        = item.BankZhuanz,
                OnlinePayment     = item.OnlinePayment
            };
            bool r = dbm.UpdateSite(sp);

            return(r);
        }
Exemple #22
0
 public bool UpdateSite(SiteParameter p)
 {
     sp.SetCtx(db);
     return(sp.Update(p));
 }
Exemple #23
0
        private void TestButton_Click(object sender, EventArgs e)
        {
            if (this.siteComboBox.SelectedValue == null)
            {
                return;
            }
            SiteParameter siteParameter = siteComboBox.SelectedItem as SiteParameter;

            siteParameter.StartUrl = string.Format(siteParameter.UrlPattern, siteParameter.StartNumber * (siteParameter.PageStepNumber == null ? 1 : siteParameter.PageStepNumber));
            StringBuilder log = new StringBuilder();

            log.Append("Start crawl...\r\n");
            testLogTextBox.Text = log.ToString();
            log.Append(string.Format("Current list url:{0}\r\n", siteParameter.StartUrl));
            testLogTextBox.Text = log.ToString();
            string listError  = string.Empty;
            string listResult = CreateHttpWebRequest(siteParameter.StartUrl, out listError);

            if (!string.IsNullOrWhiteSpace(listError))
            {
                log.Append(string.Format("Crawl list failed,because {0}\r\n", listError));
                testLogTextBox.Text = log.ToString();
                log.Append("End crawler...\r\n");
                testLogTextBox.Text = log.ToString();
                return;
            }
            DataTable table = new DataTable();

            table.Columns.Add("URL");
            table.Columns.Add("Caption");
            table.Columns.Add("Category");
            table.Columns.Add("IndexCode");
            table.Columns.Add("IssueCode");
            table.Columns.Add("PublishAgency");
            table.Columns.Add("Keyword");
            table.Columns.Add("Publish Date");
            table.Columns.Add("Attachment");
            log.Append("Analysis list html...\r\n");
            testLogTextBox.Text = log.ToString();
            Regex           regex   = new Regex(siteParameter.ItemPattern, RegexOptions.Multiline | RegexOptions.IgnoreCase);
            MatchCollection matches = regex.Matches(listResult);

            log.Append(string.Format("Analysis list total:{0}\r\n", matches.Count));
            if (matches.Count < 1)
            {
                log.Append("End crawler...\r\n");
                testLogTextBox.Text = log.ToString();
                return;
            }
            foreach (Match match in matches)
            {
                DataRow dr        = table.NewRow();
                string  detailUrl = new Uri(new Uri(siteParameter.StartUrl), match.Groups[siteParameter.UrlPosition].Value).AbsoluteUri.ToString();
                log.Append(string.Format("Current detail url:{0}\r\n", detailUrl));
                testLogTextBox.Text = log.ToString();
                dr[0] = detailUrl;
                dr[1] = match.Groups[siteParameter.CaptionPosition].Value;
                if (datePosition.Value > 0)
                {
                    dr[7] = match.Groups[siteParameter.DatePosition].Value;
                }
                string detailError  = string.Empty;
                string detailResult = CreateHttpWebRequest(detailUrl, out detailError);
                if (!string.IsNullOrWhiteSpace(detailError))
                {
                    log.Append(string.Format("Crawl detail failed,because {0}\r\n", detailError));
                    testLogTextBox.Text = log.ToString();
                    continue;
                }
                log.Append("Analysis detail html...\r\n");
                testLogTextBox.Text = log.ToString();
                //analysis detail html
                AnalysisDetailHtml(detailResult, siteParameter, dr);
                table.Rows.Add(dr);
                dataGridView.DataSource = table;
            }
            log.Append("End crawler...\r\n");
            testLogTextBox.Text = log.ToString();
        }
Exemple #24
0
 public RegexItemReader(SiteParameter siteParameter)
 {
     this.siteParameter = siteParameter ?? throw new ArgumentNullException(nameof(siteParameter));
     this.pattern       = new Regex(siteParameter.ItemPattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Compiled);
 }
Exemple #25
0
 public void AnalysisDetailHtml(string detailResult, SiteParameter siteParameter, DataRow dr)
 {
     if (!string.IsNullOrWhiteSpace(siteParameter.CategoryPattern))
     {
         if (Regex.IsMatch(detailResult, siteParameter.CategoryPattern, RegexOptions.IgnoreCase))
         {
             dr[2] = Regex.Match(detailResult, siteParameter.CategoryPattern).Groups[1].Value;
         }
     }
     if (!string.IsNullOrWhiteSpace(siteParameter.IndexCodePattern))
     {
         if (Regex.IsMatch(detailResult, siteParameter.IndexCodePattern, RegexOptions.IgnoreCase))
         {
             dr[3] = Regex.Match(detailResult, siteParameter.IndexCodePattern).Groups[1].Value;
         }
     }
     if (!string.IsNullOrWhiteSpace(siteParameter.IssueCodePattern))
     {
         if (Regex.IsMatch(detailResult, siteParameter.IssueCodePattern, RegexOptions.IgnoreCase))
         {
             dr[4] = Regex.Match(detailResult, siteParameter.IssueCodePattern).Groups[1].Value;
         }
     }
     if (!string.IsNullOrWhiteSpace(siteParameter.PublishAgencyPattern))
     {
         if (Regex.IsMatch(detailResult, siteParameter.PublishAgencyPattern, RegexOptions.IgnoreCase))
         {
             dr[5] = Regex.Match(detailResult, siteParameter.PublishAgencyPattern).Groups[1].Value;
         }
     }
     if (!string.IsNullOrWhiteSpace(siteParameter.KeywordPattern))
     {
         if (Regex.IsMatch(detailResult, siteParameter.KeywordPattern, RegexOptions.IgnoreCase))
         {
             dr[6] = Regex.Match(detailResult, siteParameter.KeywordPattern).Groups[1].Value;
         }
     }
     if (datePosition.Value < 1 && !string.IsNullOrWhiteSpace(siteParameter.PublishDatePattern))
     {
         if (Regex.IsMatch(detailResult, siteParameter.PublishDatePattern, RegexOptions.IgnoreCase))
         {
             dr[7] = Regex.Match(detailResult, siteParameter.PublishDatePattern).Groups[1].Value;
         }
     }
     if (!string.IsNullOrWhiteSpace(siteParameter.AttachmentPattern))
     {
         if (Regex.IsMatch(detailResult, siteParameter.AttachmentPattern, RegexOptions.IgnoreCase))
         {
             Match         match      = Regex.Match(detailResult, siteParameter.AttachmentPattern);
             StringBuilder attachment = new StringBuilder();
             foreach (Group group in match.Groups.Cast <Group>().Skip(1))
             {
                 if (string.IsNullOrWhiteSpace(group.Value))
                 {
                     continue;
                 }
                 attachment.Append(string.Format("Url:{0},", HttpUtility.HtmlDecode(group.Value)));
             }
             dr[8] = attachment.ToString().TrimEnd(',');
         }
     }
 }
Exemple #26
0
 public HenanHrPageParser(SiteParameter siteParameter, IHtmlReader htmlReader) : base(siteParameter, htmlReader)
 {
 }
Exemple #27
0
 public JsonPageParser(SiteParameter siteParameter, IHtmlReader htmlReader)
 {
     this.SiteParameter = siteParameter ?? throw new ArgumentNullException(nameof(siteParameter));
     this.HtmlReader    = htmlReader ?? throw new ArgumentNullException(nameof(htmlReader));
 }