// =========================================================================== // = Private Methods // =========================================================================== private static IEnumerable <Beer> LoadFromWeb() { var data = ScrapeHelper.FetchParseAsync("http://www.fynefest.com/?page_id=5", transform: TransformHtml).Result; var next = data.QuerySelector("h1"); string breweryName = ""; while (next != null) { if (!next.Name.Equals("p")) { next = next.NextSibling; continue; } if (next.Elements().Any(X => X.Name.Equals("strong", StringComparison.OrdinalIgnoreCase))) { breweryName = WebUtility.HtmlDecode(next.Element("strong").InnerText).Trim(); if (breweryName.Contains("–")) { breweryName = breweryName.Split('–').First().Trim(); } next = next.NextSibling; continue; } var record = WebUtility.HtmlDecode(next.InnerText); var lines = record.Split(new[] { '\n' }); var regex = new Regex(@"^(?<beerName>[^–]+?)\s*–?\s*(?<abv>[0-9]+\.?[0-9]*)%?\s*\((?<caskOrKeg>[KC])\)\s*$"); var match = regex.Match(lines.First()); if (!match.Success) { throw new ApplicationException("Invalid beer: " + lines.First()); } var beerName = match.Groups["beerName"].Value.Trim(); var abv = Decimal.Parse(match.Groups["abv"].Value.Trim()); var isCask = match.Groups["caskOrKeg"].Value.Trim() == "C"; var description = lines.Last().Trim(); var beer = new Beer { BreweryName = breweryName, BeerName = beerName, ABV = abv, Id = $"{breweryName}$$${beerName}$$${abv}$$${isCask}", StyleName = description }; next = next.NextSibling; yield return(beer); } }
public async Task <IList <Beer> > Fetch() { var beers = new List <Beer>(); Console.WriteLine("Scraping Cloudwater FFB..."); Console.WriteLine(); var client = new HttpClient(); var breweriesPage = await ScrapeHelper.FetchParseAsync("https://www.friendsandfamily.beer/family"); var breweryNodes = breweriesPage.QuerySelectorAll("h2 > a"); var breweryCount = breweryNodes.Count(); Console.WriteLine($"Found {breweryCount} breweries..."); Console.WriteLine(); foreach (var breweryNode in breweryNodes) { var href = breweryNode.Attributes["href"]?.Value; if (string.IsNullOrWhiteSpace(href)) { continue; } var breweryName = CultureInfo.CurrentCulture.TextInfo.ToTitleCase( breweryNode.InnerText.Trim()?.ToLower() ); Console.WriteLine(breweryName); Console.WriteLine("------------------------------"); var beersPage = await ScrapeHelper.FetchParseAsync(href); var beerNodes = beersPage.QuerySelectorAll("ul > li > p"); if (beerNodes.Count() == 0) { Console.WriteLine("No beers found (yet)"); } else { foreach (var beerNode in beerNodes) { string beerName = null; string description = null; // Style: <strong>beer name</strong> description <strong>(v)</strong> if (beerNode.ChildNodes[0].Name?.ToLower() == "strong") { beerName = beerNode.ChildNodes[0]?.InnerText?.Trim(); description = beerNode.ChildNodes[1]?.InnerText?.Trim(); } // Style: beer name, description <strong>(v)</strong> else { var parts = beerNode.ChildNodes[0]?.InnerText?.Trim()?.Split(','); beerName = parts[0]?.Trim(); description = string.Join(',', parts.Skip(1)); } description = description?.TrimStart(' ', ','); Console.WriteLine($"{beerName} ----- {description}"); beers.Add(new Beer { BreweryName = breweryName != null ? HtmlEntity.DeEntitize(breweryName) : null, BeerName = beerName != null ? HtmlEntity.DeEntitize(beerName) : null, Description = description != null ? HtmlEntity.DeEntitize(description) : null }); } } Console.WriteLine(); await Task.Delay(1000); } Console.WriteLine($"Found {beers.Count} beers."); return(beers); }