Beispiel #1
0
        static void Main(string[] args)
        {
            ConsoleInitial();
            LinkBLL    bllLink    = new LinkBLL();
            ArticleBLL bllArticle = new ArticleBLL();

            linkList = bllLink.SelectByStatus(100, Link.StatusAttribute.IsUse);

            if (linkList.Count <= 0)
            {
                Link link = new Link();
                link.Url   = BaseUrl;
                link.Depth = 0;
                bllLink.AddUrl(BaseUrl, 0);
                linkList.Add(link);
            }
            int isUseLinkIndex = linkList.Count - 1;

            while (linkList.Count > 0)
            {
                Link link = linkList[isUseLinkIndex];

                //添加加载记录,修改成不能用(下次不查询)
                //Loaded.Add(url, depth);
                bllLink.UpdateStatusByNotUse(link.Url);
                HttpHelper.HttpItem httpItem = new HttpHelper.HttpItem();
                httpItem.URL      = link.Url;
                httpItem.Method   = "GET";
                httpItem.Encoding = Encoding.UTF8;
                string html = HttpHelper.GetHtml(httpItem);

                CrawlerHelper crawlerHelper = new CrawlerHelper(html);
                string[]      links         = crawlerHelper.GetLinks;

                AddUrls(links, link.Depth + 1, BaseUrl);

                Console.WriteLine(string.Format("需加载{0},已加载{1}", bllLink.SelectCountByStatusIsUse(), bllLink.SelectCountByStatusNotUse()));
                //删除 当前需要加载的
                //Unload.Remove(url);
                isUseLinkIndex--;
                if (isUseLinkIndex <= -1)
                {
                    linkList       = bllLink.SelectByStatus(100, Link.StatusAttribute.IsUse);
                    isUseLinkIndex = linkList.Count - 1;
                }
            }


            //加载需要下载的链接
            linkList = bllLink.SelectByIsUseDownload(100, ArticleSuffix);
            int isDownloadIndex = linkList.Count - 1;

            while (linkList.Count > 0)
            {
                Link link = linkList[isDownloadIndex];
                Console.WriteLine("----{0}=={1}-----", isDownloadIndex, link.Id);

                HttpHelper.HttpItem httpItem = new HttpHelper.HttpItem();
                httpItem.URL      = link.Url;
                httpItem.Method   = "GET";
                httpItem.Encoding = Encoding.UTF8;
                string html = HttpHelper.GetHtml(httpItem);

                CrawlerHelper crawlerHelper = new CrawlerHelper(html);
                Article       article       = crawlerHelper.GetArticle;
                if (article.Title != string.Empty && article.Content != string.Empty)
                {
                    article.ArticleUrl = link.Url;
                    int artInsertId = bllArticle.InsertByLinkId(article, link.Id);
                    if (artInsertId > 0)
                    {
                        Console.WriteLine("添加成功-----{0}", article.Title);
                    }
                }
                else
                {
                    bllLink.UpdateIsDownloadById(link.Id);
                }
                isDownloadIndex--;
                if (isDownloadIndex <= -1)
                {
                    linkList        = bllLink.SelectByIsUseDownload(100, ArticleSuffix);
                    isDownloadIndex = linkList.Count - 1;
                }
            }
            Console.ReadKey();


            Console.WriteLine("按任意键退出...");
            Console.ReadKey();
        }