Beispiel #1
0
        public void Process(Crawler crawler, PropertyBag propertyBag)
        {
            var rsp = propertyBag.GetResponse();

            try
            {
                HtmlDocument htmlDoc  = HtmlParse.LoadFromHtml(propertyBag);
                var          siteType = HtmlParse.RecogSite(propertyBag.ResponseUri);
                var          records  = Parse(htmlDoc, siteType);
                if (records == null)
                {
                    return;
                }
            }
            catch (NullReferenceException)
            {
            }
        }
Beispiel #2
0
        void Run(string[] args)
        {
            //var uri = new Uri("http://www.fzztb.gov.cn/index_629.htm");
            //var uri = new Uri("http://caigou.jdzol.com/html/list_1433.html");
            //var uri = new Uri("http://www.gzzfcg.gov.cn/products.asp?BigClassID=34&SmallClassID=1");
            var uri = new Uri("http://www.ncszfcg.gov.cn/more.cfm?sid=100002011&c_code=791");

            var siteType = HtmlParse.RecogSite(uri);

            Crawler c = new Crawler(uri, new HtmlDocumentProcessor(), new CrawlProcessor())
            {
                MaximumCrawlDepth  = 5,
                MaximumThreadCount = 5,
                IncludeFilter      = IncludeFilter(siteType),
                ExcludeFilter      = ExcludeFilter(siteType)
            };

            c.Crawl();

            Console.Write("End");
            Console.ReadKey();
        }