コード例 #1
0
 protected override bool IsLinkExist(string l)
 {
     if (ReferenceEquals(LinksPool, null) || LinksPool.Count.Equals(0))
     {
         return(false);
     }
     try {
         var link = LinksPool.Where(e => e.Equals(l)).FirstOrDefault();
         if (!ReferenceEquals(link, null))
         {
             return(true);
         }
         return(false);
     } catch (Exception) {
         return(false);
     }
 }
コード例 #2
0
 protected override void LinksScrapperThread(string l)
 {
     _l.info("Processor thread");
     if (ValidationService.Validate(l))
     {
         ScrapingBrowser _b = new ScrapingBrowser();
         try {
             var htmlNode = _b.NavigateToPage(new Uri(l)).Html;
             if (!ReferenceEquals(htmlNode, null))
             {
                 //var links = htmlNode.SelectNodes("//body//a/@href");
                 var links = htmlNode.CssSelect("a");
                 if (!ReferenceEquals(links, null))
                 {
                     try {
                         var productNameNode = htmlNode.CssSelect(requestScrappingSite.SiteProductPageIndicationSelector);
                         if (!ReferenceEquals(productNameNode, null) && !ReferenceEquals(productNameNode.First(), null))
                         {
                             var preparedLink = PrepareLink(l);
                             _l.info($"Link {preparedLink} is a valid link, adding it to collection!");
                             LinksPool.Add(preparedLink);
                         }
                         else
                         {
                             _l.warn("Node selection error: not a product name");
                         }
                     } catch (Exception) {
                         //  _l.warn($"Url {l} is not a valid product page, skip it");
                     }
                     foreach (var link in links)
                     {
                         var linkValue = link.GetAttributeValue("href", "").Trim();
                         if (ValidationService.Validate(linkValue))
                         {
                             var preparedLink = PrepareLink(linkValue);
                             if (!IsLinkExist(preparedLink) && IsNotExcluded(preparedLink))
                             {
                                 LinksPool.Add(preparedLink);
                                 TurnableLinksList.Add(preparedLink);
                             }
                             else
                             {
                                 //  _l.warn($"Link {preparedLink} already scrapped, skip it...");
                             }
                         }
                         else
                         {
                             //  _l.warn($"Link {linkValue} is not our required link!");
                         }
                     }
                 }
                 else
                 {
                     _l.warn($"Any links on the page {l}");
                 }
             }
             else
             {
                 _l.warn($"Nothing to scrap from url {l}");
             }
         } catch (AggregateException e) {
             _l.error(String.Concat(e.Message, " -> ", l));
         }
     }
     else
     {
         if (ReferenceEquals(ValidationService.GetExceptMessage(), null))
         {
             // _l.warn($"Link {l} already processed, skipping it...");
         }
         else
         {
             _l.warn(ValidationService.GetExceptMessage());
         }
     }
 }