Esempio n. 1
0
        public void ProcessLinks(Abot.Poco.CrawledPage page)
        {
            if (page.ParsedLinks == null || page.ParsedLinks.Count() == 0)
            {
                _logger.DebugFormat("CrawledPage contained 0 parsed links");
                LinksToCrawl  = new List <LinkToCrawl>();
                LinksToByPass = new List <CrawledLink>();
                return;
            }

            LinksToByPass     = new List <CrawledLink>();
            MapOfLinksToCrawl = new Dictionary <string, LinkToCrawl>();

            using (var factory = _provider.GetInstanceOf <IModelFactory>())
            {
                var         sessionId    = page.PageBag.SessionId;
                var         crawlerId    = page.PageBag.CrawlerId;
                LinkToCrawl link         = null;
                CrawledLink bypassedLink = null;
                foreach (var targetUri in page.ParsedLinks)
                {
                    ProcessLink(page, factory, targetUri, sessionId, crawlerId);
                }

                LinksToCrawl = MapOfLinksToCrawl.Values.ToList();
                MapOfLinksToCrawl.Clear();
                MapOfLinksToCrawl = null;
                if (_logger.IsDebugEnabled)
                {
                    _logger.DebugFormat("TargetUrls of new LinksToCrawl: {0}",
                                        String.Join("; ", LinksToCrawl.Select(o => o.TargetUrl)));
                    _logger.DebugFormat("TargetUrls of new LinksToByPass: {0}",
                                        String.Join("; ", LinksToByPass.Select(o => o.TargetUrl)));
                }
            }
        }