public Collection(string title, string url, Collection parentCollection) { this.Title = title; if (url.Contains("http")) this.Url = url.Substring(0, url.IndexOf("?")); else this.Url = "http://allrecipes.com" + url.Substring(0, url.IndexOf("?")); ChildCollections = new List<Collection>(); this.ParentCollection = parentCollection; }
private static List<Collection> getCollections(string url, Collection parentCollection) { List<Collection> childCollections = new List<Collection>(); var webGet = new HtmlWeb(); var document = webGet.Load(url); var containerLists = from node in document.DocumentNode.Descendants() where node.Name == "ul" && node.Attributes["id"] != null && node.Attributes["id"].Value == "subNavGroupContainer" select node; foreach (HtmlNode ulNode in containerLists) { string headerText = ""; foreach (HtmlNode recipeGroup in ulNode.Descendants()) { if (recipeGroup.Attributes["id"] != null && recipeGroup.Attributes["id"].Value == "subNavHeaderContainer") { var title = from node in recipeGroup.Descendants() where node.Name == "span" && node.Attributes["id"] != null && node.Attributes["id"].Value == "lblTitle" select node; foreach (HtmlNode headerNode in title) { headerText = headerNode.InnerText; } } else { var links = from node in recipeGroup.Descendants() where node.Name == "a" && node.Attributes["id"] != null && node.Attributes["id"].Value == "hlSubNavItem" select node; foreach (HtmlNode headerNode in links) { Collection col = new Collection(headerNode.InnerText, headerNode.Attributes["href"].Value, parentCollection); childCollections.Add(col); } } } } var resultsCount = from node in document.DocumentNode.Descendants() where node.Name == "p" && node.Attributes["class"] != null && node.Attributes["class"].Value.Contains("searchResultsCount") select node; long recipeCount = 0; foreach (HtmlNode countNode in resultsCount) { if (countNode.Attributes["class"] != null && countNode.Attributes["class"].Value.Contains("staff-picks")) continue; foreach (HtmlNode child in countNode.ChildNodes) { if (child.Name == "span") { if (child.Attributes["id"] != null && child.Attributes["id"].Value.Contains("Collections")) continue; recipeCount = long.Parse(child.InnerText.Replace(",", "")); } } } int numPages = 1; if (recipeCount > 15) { recipeCount -= 15; Double extraPages = (Double)recipeCount / 20.0; numPages += (int)Math.Ceiling(extraPages); } for (int pageNum = 1; pageNum <= numPages; pageNum++) { getRecipes(url + "?Page=" + pageNum, parentCollection); //if (recipesByUrl.Count > 10) // break; } /* foreach (Collection collection in childCollections) { if (!collectionsByUrl.ContainsKey(collection.Url)) { collectionsByUrl.Add(collection.Url, collection); collection.ChildCollections = getCollections(collection.Url, collection); } } */ return childCollections; }
private static void getRecipes(string url, Collection parentCollection) { var webGet = new HtmlWeb(); var document = webGet.Load(url); var itemWrapper = from node in document.DocumentNode.Descendants() where node.Name == "div" && node.Attributes["id"] != null && node.Attributes["id"].Value == "divGridItemWrapper" select node; foreach (HtmlNode wrapper in itemWrapper) { var items = from node in wrapper.Descendants() where node.Name == "a" && node.Attributes["class"] != null && node.Attributes["class"].Value == "title" select node; foreach (HtmlNode item in items) { Recipe recipe; String recipeUrl = item.Attributes["href"].Value; if (recipesByUrl.ContainsKey(recipeUrl)) recipe = recipesByUrl[recipeUrl]; else { recipe = new Recipe(item.InnerText, recipeUrl); getRecipeContents(recipe); recipesByUrl.Add(recipeUrl, recipe); } recipe.Collections.Add(parentCollection); } } }