コード例 #1
0
        /// <summary>
        ///
        /// </summary>
        /// <returns></returns>
        public CrawlerResults Run()
        {
            var results = new CrawlerResults
            {
                SourceId = _crawlSettings.SourceId
            };

            var startTime = DateTime.Now;

            LoggerInfo("Starting Web Crawl");

            BaseUrl    = UrlParser.GetHostName(_crawlSettings.SeedUrl);
            BaseSchema = UrlParser.GetSchema(_crawlSettings.SeedUrl);

            var seedPageResp = HttpClient.GetRequest(_crawlSettings.SeedUrl);

            LoggerInfo(string.Format("Crawling {0}", _crawlSettings.SeedUrl));

            LinksProcessed = new List <string>();
            LinksToIndex   = new List <string>();

            var links = GetLinks(seedPageResp);
            var depth = 1;

            ProcessLinks(links, depth);

            var searchableContent = new List <IWebCrawlPage>();

            foreach (var link in LinksToIndex)
            {
                LoggerDebug(string.Format("Extracting {0}", link));

                var page = ProcessPage(link);

                if (page != null)
                {
                    searchableContent.Add(page);
                }

                if (HandleStatusCallBack())
                {
                    return(results);
                }

                System.Threading.Thread.Sleep(1000);
            }

            LoggerInfo("Running Indexer");


            var indexResults = _Indexer.RunUpdate(searchableContent, null, null);

            results.CrawlPages = searchableContent;
            results.CrawledCnt = searchableContent.Count;
            results.IndexedCnt = indexResults.TotalCnt;
            results.TotalCnt   = indexResults.TotalCnt;
            results.ErrorCnt   = indexResults.ErrorCnt;
            results.Duration   = (DateTime.Now - startTime);

            LoggerInfo("Web Crawler finished.");

            return(results);
        }