private void AnalyzeBtn_Click(object sender, EventArgs e) { BinaryFormatter bf = new BinaryFormatter(); FileStream fs = new FileStream("RawItem.bry", FileMode.Open); var dataList = bf.Deserialize(fs) as AnalyzeData[]; foreach (var data in dataList) { var items = data.Items; var crawlID = data.CrawlID; var crawl = CrawlBusiness.GetByCrawlID(crawlID); foreach (var item in items) { Analyzer.Core.Analyzer.AnalyzeItem(item, null, crawl); } } }
private void Search(SiteEntity siteEntity, KeywordQuery keywordQuery, List <AnalyzeData> resultDataList, int ProgressPercStart, int ProgressPercEnd) { var firstCrawl = CrawlBusiness.GetTopBySiteID(siteEntity.SiteID, ""); var crawlID = firstCrawl.CrawlID; string lastItemID = null; var keyword = keywordQuery.Keyword; var keywordExclude = ""; var startPage = keywordQuery.StartPage; var endPage = keywordQuery.EndPage; var crawl = CrawlBusiness.GetByCrawlID(crawlID); var site = SiteBusiness.GetBySiteID(crawl.SiteID); ListResponse result = null; for (int currentPage = startPage; currentPage <= endPage; currentPage++) { CrawlRequest request = CrawlRequest.GetQueryUrl(crawlID, keyword, currentPage, keywordExclude, "", ""); crawl.KeywordQuery = keyword; crawl.KeywordAny = ""; crawl.KeywordNot = keywordExclude; crawl.KeywordSite = ""; var response = Core.Crawler.SimpleCrawler.CrawlList_Single(request, crawl, site, true, null); var currentItems = response.ExtractItems(crawl); if (!(currentItems == null || !currentItems.Any() || currentItems.Last().ItemID == lastItemID)) { lastItemID = currentItems.Last().ItemID; //bool stopCrawl; //ExistCheck.ExistCheck_List( // response, null, // (Enums.ExistItemStrategy) crawl.ExistItemStrategy, // (Enums.ContentDetailLevel) site.ContentDetailLevel, crawl.IssueID, // crawl.CrawlID, out stopCrawl); if (result == null) { result = response; } else { result.CombineList(response); } } backgroundWorker1.ReportProgress(ProgressPercStart + (ProgressPercEnd - ProgressPercStart) * (currentPage + 1) / (endPage - startPage + 1)); } //Get Item if (result != null) { if (DetailChk.Checked) { this.Text = Site.Name + " 抓取Items"; Core.Crawler.SimpleCrawler.CrawlItem_Multi( result, crawl, site, msg => { }); this.Text = @"Palas搜索工具"; } var items = result.ExtractItems(crawl, null); AnalyzeData data = new AnalyzeData() { Items = items, CrawlID = crawl.CrawlID }; resultDataList.Add(data); } }