protected override bool IsLinkExist(string l) { if (ReferenceEquals(LinksPool, null) || LinksPool.Count.Equals(0)) { return(false); } try { var link = LinksPool.Where(e => e.Equals(l)).FirstOrDefault(); if (!ReferenceEquals(link, null)) { return(true); } return(false); } catch (Exception) { return(false); } }
protected override void LinksScrapperThread(string l) { _l.info("Processor thread"); if (ValidationService.Validate(l)) { ScrapingBrowser _b = new ScrapingBrowser(); try { var htmlNode = _b.NavigateToPage(new Uri(l)).Html; if (!ReferenceEquals(htmlNode, null)) { //var links = htmlNode.SelectNodes("//body//a/@href"); var links = htmlNode.CssSelect("a"); if (!ReferenceEquals(links, null)) { try { var productNameNode = htmlNode.CssSelect(requestScrappingSite.SiteProductPageIndicationSelector); if (!ReferenceEquals(productNameNode, null) && !ReferenceEquals(productNameNode.First(), null)) { var preparedLink = PrepareLink(l); _l.info($"Link {preparedLink} is a valid link, adding it to collection!"); LinksPool.Add(preparedLink); } else { _l.warn("Node selection error: not a product name"); } } catch (Exception) { // _l.warn($"Url {l} is not a valid product page, skip it"); } foreach (var link in links) { var linkValue = link.GetAttributeValue("href", "").Trim(); if (ValidationService.Validate(linkValue)) { var preparedLink = PrepareLink(linkValue); if (!IsLinkExist(preparedLink) && IsNotExcluded(preparedLink)) { LinksPool.Add(preparedLink); TurnableLinksList.Add(preparedLink); } else { // _l.warn($"Link {preparedLink} already scrapped, skip it..."); } } else { // _l.warn($"Link {linkValue} is not our required link!"); } } } else { _l.warn($"Any links on the page {l}"); } } else { _l.warn($"Nothing to scrap from url {l}"); } } catch (AggregateException e) { _l.error(String.Concat(e.Message, " -> ", l)); } } else { if (ReferenceEquals(ValidationService.GetExceptMessage(), null)) { // _l.warn($"Link {l} already processed, skipping it..."); } else { _l.warn(ValidationService.GetExceptMessage()); } } }