private static Brewery GetBreweryFromDocument(HtmlDocument doc, ref List <Region> regions) { Regex cityRegex = new Regex("(.)*(nbsp;)"); string name = getBreweryAttributeFromDocument(doc, "Jméno pivovaru"); string type = getBreweryAttributeFromDocument(doc, "Typ pivovaru"); string address = getBreweryAttributeFromDocument(doc, "Adresa pivovaru"); string city = cityRegex.Replace(getBreweryAttributeFromDocument(doc, "Adresa pivovaru", 1), ""); string regionName = getBreweryAttributeFromDocument(doc, "Adresa pivovaru", 2); string webSite = getBreweryAttributeFromDocument(doc, "Domovské stránky"); string imageRelativeUrl = doc?.DocumentNode?.SelectNodes("//img[@class='logotyp']")?.ToList()?.First()?.GetAttributeValue("src", null); string foundationYearString = doc?.DocumentNode?.SelectNodes("//span[@title='Založení pivovaru']")?.ToList()?.First()?.InnerText; int yearOfFoundation = 0; if (foundationYearString != null) { int.TryParse(Regex.Match(foundationYearString, @"\d+").Value, out yearOfFoundation); } var result = regions.Where(r => regionName.StartsWith(r.Abbreviation)); var breweryRegion = result.Any() ? result.First() : null; var brewery = new Brewery() { Name = name, Type = type, YearOfFoundation = yearOfFoundation, City = city, Address = address, Region = breweryRegion, WebSiteUrl = webSite, ImageUrl = imageRelativeUrl == null ? null : "http://ceskepivo-ceskezlato.cz/" + imageRelativeUrl }; if (breweryRegion != null) { breweryRegion.Breweries.Add(brewery); } return(brewery); }
private static List <Beer> GetBeersFromDocument(HtmlDocument doc, Brewery brewery) { var beersForBrewery = new List <Beer>(); var tableElements = doc?.DocumentNode?.SelectNodes("//div[@class='beer-box']")?.ToList(); if (tableElements != null) { for (var i = 0; i < tableElements.Count; i++) { Console.WriteLine($" Parsing Beer {i + 1}/{tableElements.Count}"); var beer = GetBeerFromNode(tableElements[i], brewery); if (beer.Description.Contains("Vaření piva ukončeno.")) { Console.WriteLine($" --Beer is now longer being brewed. Skipping..."); continue; } beersForBrewery.Add(beer); } } return(beersForBrewery); }
private static Beer GetBeerFromNode(HtmlNode node, Brewery brewery) { Regex cityRegex = new Regex("(.)*(nbsp;)"); string name = node.SelectSingleNode(".//div[@class='beer-title']//h4")?.InnerText; string description = node.SelectSingleNode(".//div[@class='beer-text']")?.InnerText; string tags = node.SelectSingleNode(".//div[@class='beer-spec']//span")?.InnerText; string epmAndAlcohol = node.SelectNodes(".//div[@class='beer-spec']/strong")?.ToList()?.Last().InnerText; RegexOptions options = RegexOptions.None; Regex regex = new Regex("[ ]{2,}", options); var cleanedTags = regex.Replace(tags, " "); var stringTagList = cleanedTags.Split(' ').Where(t => t != ""); var tagsList = new List <Tag>(); foreach (var stringTag in stringTagList) { string tagName = stringTag.Trim(); if (stringTag.Length < 4 || stringTag == "pivo") { continue; } if (stringTag.Last() == 'ý') { tagName = stringTag.Remove(stringTag.Length - 1, 1) + "é"; } var tagCondition = TagSet.Where(t => t.Name == tagName); if (tagCondition.Any()) { tagsList.Add(tagCondition.First()); } else { var tag = new Tag { Name = tagName }; TagSet.Add(tag); tagsList.Add(tag); } } if (name.Contains("IPA") || name.Contains("India Pale Ale")) { tagsList.Add(TagSet.Where(t => t.Name == "IPA").First()); } if (name.Contains("APA")) { tagsList.Add(TagSet.Where(t => t.Name == "APA").First()); } if (name.Contains("hořk")) { tagsList.Add(TagSet.Where(t => t.Name == "hořké").First()); } if (name.Contains("10") || name.Contains("Desít")) { tagsList.Add(TagSet.Where(t => t.Name == "10tka").First()); } if (name.Contains("12") || name.Contains("Dvan")) { tagsList.Add(TagSet.Where(t => t.Name == "12tka").First()); } if (description.Contains("nejsiln")) { tagsList.Add(TagSet.Where(t => t.Name == "silné").First()); } if (name.Contains("jantar")) { tagsList.Add(TagSet.Where(t => t.Name == "jantar").First()); } double epm = 0.0; double alcoholContent = 0.0; if (epmAndAlcohol != null) { var splitString = epmAndAlcohol.Split('/'); if (splitString.Count() >= 1) { double.TryParse(Regex.Match(epmAndAlcohol.Split('/').First(), @"([0-9]*[.])?[0-9]+").Value, out epm); } if (splitString.Count() == 2) { double.TryParse(Regex.Match(epmAndAlcohol.Split('/').Last(), @"([0-9]*[.])?[0-9]+").Value, out alcoholContent); } } string imageRelativeUrl = node.SelectSingleNode(".//div[@class='beer-view']//img")?.GetAttributeValue("src", null); var beer = new Beer() { Name = name, Description = description, Epm = epm, AlcoholContentPercentage = alcoholContent, Category = tags, Brewery = brewery, ImageUrl = imageRelativeUrl == null ? null : "http://ceskepivo-ceskezlato.cz/" + imageRelativeUrl, Tags = tagsList, IsPopular = PopularBeerNames.Contains(name) ? true : false }; return(beer); }