protected override bool IsLinkIndexed(string l) { if (ReferenceEquals(IndexedLinks, null) || IndexedLinks.Count.Equals(0)) { return(false); } try { var link = IndexedLinks.Where(e => e.Equals(l)).FirstOrDefault(); if (!ReferenceEquals(link, null)) { return(true); } return(false); } catch (Exception) { return(false); } }
private void LinksProcessor() { if (TurnableLinksList.Count > 0) { var currentTurnableList = TurnableLinksList; var removeList = new List <string>(); var MathRandom = new Random(); int OperationsLimitPerMoment = MathRandom.Next(requestScrappingSite.SiteBaseRequestsIntervalMin, requestScrappingSite.SiteBaseRequestsIntervalMax); int CurrentOperationNumber = 0; for (var i = 0; i <= currentTurnableList.Count() - 1; i++) { string link = currentTurnableList.ElementAt(i); _l.info($"Current link: {link}"); ScrapingBrowser _b = new ScrapingBrowser(); try { if (CurrentOperationNumber >= OperationsLimitPerMoment) { Thread.Sleep(MathRandom.Next(requestScrappingSite.SiteBaseRequestsIntervalMin * 1000, requestScrappingSite.SiteBaseRequestsIntervalMax * 1000)); OperationsLimitPerMoment = MathRandom.Next(requestScrappingSite.SiteBaseRequestsIntervalMin, requestScrappingSite.SiteBaseRequestsIntervalMax); CurrentOperationNumber = 0; } CurrentOperationNumber++; var htmlNode = _b.NavigateToPage(new Uri(link)).Html; if (!ReferenceEquals(htmlNode, null)) { try { if (!ReferenceEquals(requestScrappingSite.SiteProductPageIndicationSelector, null) && !requestScrappingSite.SiteProductPageIndicationSelector.Equals(String.Empty)) { var productNameNode = SelectNode(requestScrappingSite.SiteProductPageIndicationSelector, htmlNode); if (!ReferenceEquals(productNameNode, null)) { var productName = productNameNode.InnerText; if (!ReferenceEquals(productName.Trim(), String.Empty)) { IndexedLinks.Add(link); _l.info($"Adding {link} to products list collection"); ScrapProductFromUrl(link, htmlNode, new WebScrapperBaseProxyEntity { }); } } } LinksScrapperThread(link); } catch (Exception e) { _l.error($"Product name node selector error : {e.Message}"); } } } catch (Exception e) { _l.error($"Internal scrapping browser error: {e.Message}"); } removeList.Add(link); } if (removeList.Count > 0) { foreach (var item in removeList) { TurnableLinksList.Remove(item); } } } _l.info($"Task {Thread.CurrentThread.Name} : {TurnableLinksList.Count} left in scrapping"); if (TurnableLinksList.Count > 0) { LinksProcessor(); } else { InvokeOnInstanceStatusUpdating(WebScrapperBaseStatuses.InstanceShuttedDown); _l.info($"Task {Thread.CurrentThread.Name} finished!"); } }