Пример #1
0
        private void AnalyzeBtn_Click(object sender, EventArgs e)
        {
            BinaryFormatter bf       = new BinaryFormatter();
            FileStream      fs       = new FileStream("RawItem.bry", FileMode.Open);
            var             dataList = bf.Deserialize(fs) as AnalyzeData[];

            foreach (var data in dataList)
            {
                var items   = data.Items;
                var crawlID = data.CrawlID;

                var crawl = CrawlBusiness.GetByCrawlID(crawlID);
                foreach (var item in items)
                {
                    Analyzer.Core.Analyzer.AnalyzeItem(item, null, crawl);
                }
            }
        }
Пример #2
0
        private void Search(SiteEntity siteEntity, KeywordQuery keywordQuery, List <AnalyzeData> resultDataList, int ProgressPercStart, int ProgressPercEnd)
        {
            var          firstCrawl     = CrawlBusiness.GetTopBySiteID(siteEntity.SiteID, "");
            var          crawlID        = firstCrawl.CrawlID;
            string       lastItemID     = null;
            var          keyword        = keywordQuery.Keyword;
            var          keywordExclude = "";
            var          startPage      = keywordQuery.StartPage;
            var          endPage        = keywordQuery.EndPage;
            var          crawl          = CrawlBusiness.GetByCrawlID(crawlID);
            var          site           = SiteBusiness.GetBySiteID(crawl.SiteID);
            ListResponse result         = null;

            for (int currentPage = startPage; currentPage <= endPage; currentPage++)
            {
                CrawlRequest request = CrawlRequest.GetQueryUrl(crawlID, keyword, currentPage, keywordExclude, "", "");
                crawl.KeywordQuery = keyword;
                crawl.KeywordAny   = "";
                crawl.KeywordNot   = keywordExclude;
                crawl.KeywordSite  = "";
                var response     = Core.Crawler.SimpleCrawler.CrawlList_Single(request, crawl, site, true, null);
                var currentItems = response.ExtractItems(crawl);
                if (!(currentItems == null || !currentItems.Any() || currentItems.Last().ItemID == lastItemID))
                {
                    lastItemID = currentItems.Last().ItemID;
                    //bool stopCrawl;
                    //ExistCheck.ExistCheck_List(
                    //                           response, null,
                    //                           (Enums.ExistItemStrategy) crawl.ExistItemStrategy,
                    //                           (Enums.ContentDetailLevel) site.ContentDetailLevel, crawl.IssueID,
                    //                           crawl.CrawlID, out stopCrawl);
                    if (result == null)
                    {
                        result = response;
                    }
                    else
                    {
                        result.CombineList(response);
                    }
                }

                backgroundWorker1.ReportProgress(ProgressPercStart + (ProgressPercEnd - ProgressPercStart) * (currentPage + 1) / (endPage - startPage + 1));
            }

            //Get Item
            if (result != null)
            {
                if (DetailChk.Checked)
                {
                    this.Text = Site.Name + " 抓取Items";
                    Core.Crawler.SimpleCrawler.CrawlItem_Multi(
                        result, crawl, site, msg =>
                    {
                    });
                    this.Text = @"Palas搜索工具";
                }

                var         items = result.ExtractItems(crawl, null);
                AnalyzeData data  = new AnalyzeData()
                {
                    Items   = items,
                    CrawlID = crawl.CrawlID
                };
                resultDataList.Add(data);
            }
        }