private static void AddNameOwnerInformation(DaycareProvider provider, HtmlDocument doc) { var headers = doc.DocumentNode.Descendants("h2").ToList(); provider.name = headers[0].InnerText; provider.owner = headers[1].InnerText; }
private static void AddContactInformation(DaycareProvider provider, HtmlDocument doc) { var header = doc.DocumentNode.Descendants("h4").Where(d => d.InnerText.Contains("Contact Information")); if (header.Count() > 0) { var headerContent = header.First(); var sibling = headerContent.NextSibling; while (sibling != null && sibling.Name != "h4") { if (sibling.InnerText.Trim().StartsWith("Email address:")) { provider.email = sibling.NextSibling.InnerText; } if (sibling.InnerText.Trim().StartsWith("Phone:")) { provider.phone = sibling.InnerText.Replace("Phone:", "").Trim(); } if (sibling.InnerText.Trim().StartsWith("Web Site Link:")) { provider.website = sibling.NextSibling.Attributes["href"].Value; } sibling = sibling.NextSibling; } } }
private static void AddOperationInformation(DaycareProvider provider, HtmlDocument doc) { var header = doc.DocumentNode.Descendants("h4").Where(d => d.InnerText.Contains("Days/Hours")); if (header.Count() > 0) { var headerContent = header.First(); var sibling = headerContent.NextSibling; while (sibling != null && sibling.Name != "h4") { if (sibling.InnerText.Trim().StartsWith("Days of the Week:")) { provider.daysOfTheWeek = sibling.InnerText.Replace("Days of the Week:", "").Trim(); } if (sibling.InnerText.Trim().StartsWith("Hours:")) { provider.hours = sibling.InnerText.Replace("Hours:", "").Trim(); } sibling = sibling.NextSibling; } } }
private static void AddLocationInformation(DaycareProvider provider, HtmlDocument doc) { var header = doc.DocumentNode.Descendants("h4").Where(d => d.InnerText.Contains("Location:")); if (header.Count() > 0) { var headerContent = header.First(); provider.location = headerContent.InnerText.Split(new char[] { ' ' }, 2).Last(); var sibling = headerContent.NextSibling; while (sibling != null && sibling.Name != "h4") { if (sibling.InnerText.Trim().Length > 0 && sibling.Name != "a") { provider.address += sibling.InnerText.Replace("\n", "") + ", "; } sibling = sibling.NextSibling; } if (provider.address != null) { provider.address = provider.address.Trim().TrimEnd(','); } } }
private static DaycareProvider ScrapeDaycareProvider(string url) { HtmlWeb hweb = new HtmlWeb(); HtmlDocument hdoc = hweb.Load(url); var provider = new DaycareProvider(); provider.listingUrl = url; AddNameOwnerInformation(provider, hdoc); AddLocationInformation(provider, hdoc); AddContactInformation(provider, hdoc); AddOperationInformation(provider, hdoc); return(provider); }