Example #1
0
        private List <WebScrapeOutput> GetContent(HtmlDocument htmlDoc, WebScrapeInput input, int pageNumber)
        {
            List <WebScrapeOutput> result = new List <WebScrapeOutput>();

            try
            {
                var bodyContainer    = htmlDoc.DocumentNode.SelectSingleNode("//html");
                var parentContainers = htmlDoc.DocumentNode.SelectNodes(input.ParentContainer.XPath);

                if (parentContainers == null)
                {
                    Logger.Write($"ERROR: Parent Container not found for WebsiteName: {input.Website.Name}; URL: {input.Website.URL} in ProcessRequest -- RestClientScraper -> DataGrabber.");
                }
                else
                {
                    // loop through all parents
                    foreach (HtmlNode parentContainer in parentContainers)
                    {
                        var productContainers = parentContainer.SelectNodes(input.Container.XPath);

                        if (productContainers == null)
                        {
                            Logger.Write($"ERROR: Product Container not found for WebsiteName: {input.Website.Name} in ProcessRequest -- RestClientScraper -> DataGrabber.");
                        }
                        else
                        {
                            // loop through all products
                            foreach (HtmlNode container in productContainers)
                            {
                                // clear input product details
                                input.ClearProductDetails();

                                // get all product details -- which are not input attributes
                                foreach (ElementMapping mapping in input.ProductDetails.Where(v => !v.IsInputAttribute))
                                {
                                    mapping.Value = GetHTMLContent(container, bodyContainer, mapping, input.Website.URL);
                                }

                                // set result if value is not empty
                                if (!string.IsNullOrEmpty(input.ProductDetails.Where(v => !v.IsInputAttribute).FirstOrDefault().Value))
                                {
                                    result.Add(input.Format(pageNumber, input.Website.UniqueID, input.Website.MappingID)); // format to output
                                }
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Logger.Write("Exception in GetContent -- RestClientScraper -> DataGrabber. Message: " + ex.Message);
            }
            finally
            {
            }
            return(result);
        }