Exemple #1
0
        protected virtual void SchedulePageLinks(CrawledPage crawledPage)
        {
            IEnumerable <Uri> crawledPageLinks = _hyperLinkParser.GetLinks(crawledPage);

            foreach (Uri uri in crawledPageLinks)
            {
                //Added due to a bug in the Uri class related to this (http://stackoverflow.com/questions/2814951/system-uriformatexception-invalid-uri-the-hostname-could-not-be-parsed)
                try
                {
                    PageToCrawl page = new CrawledPage(uri);
                    page.ParentUri  = crawledPage.Uri;
                    page.CrawlDepth = crawledPage.CrawlDepth + 1;
                    page.IsInternal = _isInternalDecisionMaker(uri, _crawlContext.RootUri);
                    page.IsRoot     = false;
                    _scheduler.Add(page);
                }
                catch {}
            }
        }
Exemple #2
0
 protected virtual void ParsePageLinks(CrawledPage crawledPage)
 {
     crawledPage.ParsedLinks = _hyperLinkParser.GetLinks(crawledPage);
 }