Example #1
0
 protected override bool IsLinkIndexed(string l)
 {
     if (ReferenceEquals(IndexedLinks, null) || IndexedLinks.Count.Equals(0))
     {
         return(false);
     }
     try {
         var link = IndexedLinks.Where(e => e.Equals(l)).FirstOrDefault();
         if (!ReferenceEquals(link, null))
         {
             return(true);
         }
         return(false);
     } catch (Exception) {
         return(false);
     }
 }
Example #2
0
        private void LinksProcessor()
        {
            if (TurnableLinksList.Count > 0)
            {
                var currentTurnableList = TurnableLinksList;
                var removeList          = new List <string>();

                var MathRandom = new Random();

                int OperationsLimitPerMoment = MathRandom.Next(requestScrappingSite.SiteBaseRequestsIntervalMin, requestScrappingSite.SiteBaseRequestsIntervalMax);
                int CurrentOperationNumber   = 0;

                for (var i = 0; i <= currentTurnableList.Count() - 1; i++)
                {
                    string link = currentTurnableList.ElementAt(i);
                    _l.info($"Current link: {link}");
                    ScrapingBrowser _b = new ScrapingBrowser();
                    try {
                        if (CurrentOperationNumber >= OperationsLimitPerMoment)
                        {
                            Thread.Sleep(MathRandom.Next(requestScrappingSite.SiteBaseRequestsIntervalMin * 1000, requestScrappingSite.SiteBaseRequestsIntervalMax * 1000));
                            OperationsLimitPerMoment = MathRandom.Next(requestScrappingSite.SiteBaseRequestsIntervalMin, requestScrappingSite.SiteBaseRequestsIntervalMax);
                            CurrentOperationNumber   = 0;
                        }
                        CurrentOperationNumber++;
                        var htmlNode = _b.NavigateToPage(new Uri(link)).Html;
                        if (!ReferenceEquals(htmlNode, null))
                        {
                            try {
                                if (!ReferenceEquals(requestScrappingSite.SiteProductPageIndicationSelector, null) &&
                                    !requestScrappingSite.SiteProductPageIndicationSelector.Equals(String.Empty))
                                {
                                    var productNameNode = SelectNode(requestScrappingSite.SiteProductPageIndicationSelector, htmlNode);
                                    if (!ReferenceEquals(productNameNode, null))
                                    {
                                        var productName = productNameNode.InnerText;
                                        if (!ReferenceEquals(productName.Trim(), String.Empty))
                                        {
                                            IndexedLinks.Add(link);
                                            _l.info($"Adding {link} to products list collection");
                                            ScrapProductFromUrl(link, htmlNode, new WebScrapperBaseProxyEntity {
                                            });
                                        }
                                    }
                                }
                                LinksScrapperThread(link);
                            } catch (Exception e) {
                                _l.error($"Product name node selector error : {e.Message}");
                            }
                        }
                    } catch (Exception e) {
                        _l.error($"Internal scrapping browser error: {e.Message}");
                    }
                    removeList.Add(link);
                }
                if (removeList.Count > 0)
                {
                    foreach (var item in removeList)
                    {
                        TurnableLinksList.Remove(item);
                    }
                }
            }
            _l.info($"Task {Thread.CurrentThread.Name} : {TurnableLinksList.Count} left in scrapping");
            if (TurnableLinksList.Count > 0)
            {
                LinksProcessor();
            }
            else
            {
                InvokeOnInstanceStatusUpdating(WebScrapperBaseStatuses.InstanceShuttedDown);
                _l.info($"Task {Thread.CurrentThread.Name} finished!");
            }
        }