/// <summary> /// Get all in stock category links /// </summary> /// <param name="categoryLinks">IEnumerable with all category pages</param> /// <returns>IEnumerable with correct in stock category links</returns> public static IEnumerable <string> GetInStockCategoryLinks(IEnumerable <string> categoryLinks) { try { foreach (var link in categoryLinks) { var navigateToPage = StoreConnection.ConnectToStoreAddress(link); var allHrefsOnPage = navigateToPage.DocumentNode.SelectNodes("//a[@href]"); foreach (var href in allHrefsOnPage) { var hrefLink = href.Attributes["href"].Value; if (hrefLink.Contains("in-stoc") && !string.IsNullOrWhiteSpace(hrefLink)) { Console.WriteLine(hrefLink); } } } } catch (Exception ex) { Console.WriteLine("GetInStockCategoryLinks: " + ex.Message); } return(categoryInStockLinks); }
/// <summary> /// Get nodes /// </summary> /// <param name="categoryAddress"></param> /// <returns></returns> public static HtmlNodeCollection StoreCategoryHtmlNodeCollection( string categoryAddress) { var connectToStoreCategory = StoreConnection.ConnectToStoreAddress(categoryAddress); var storeCategoryHtmlNodeCollection = connectToStoreCategory.DocumentNode.SelectNodes($"//select[@class='js-trigger-catalog-toolbar-apply-filters']/option"); return(storeCategoryHtmlNodeCollection); }
/// <summary> /// Scrape products in category /// </summary> /// <param name="categoryPages"></param> private void ScrapeAllProductsInCategoryPage(IEnumerable <string> categoryPages) { var id = 0; foreach (var categoryPage in categoryPages) { var page = StoreConnection.ConnectToStoreAddress(categoryPage); var allNodes = page.DocumentNode.SelectNodes("//div[@class='Product']"); foreach (var node in allNodes) { var prices = FindPrices(node); AddProducts(id, node, prices); id++; } } }
/// <summary> /// Connect to site map and get all categories links /// </summary> /// <param name="siteMap"></param> /// <returns></returns> public IEnumerable <string> GetAllCategories(string siteMap) { var websiteLink = StoreConnection.ConnectToStoreAddress(siteMap); var allHrefNodes = GetAllHrefNodesFromLink(websiteLink); AddNonCategoryLinksToDictionary(_incorrectCategoryLinksDictionary); foreach (var node in allHrefNodes) { var siteLink = node.GetAttributeValue("href", string.Empty); if (!_incorrectCategoryLinksDictionary.ContainsKey(siteLink) && siteLink.Contains("http")) { _correctcategoryLinksList.Add(siteLink); } } return(GetDistinctCategories(_correctcategoryLinksList)); }
public void ReturnTest() { var connection = StoreConnection.ConnectToStoreAddress("https://www.thisiswhyimbroke.com/"); Console.WriteLine(connection.ParsedText); }