public static Album ParseAlbum(string htmlData) { if (htmlData == "") { return(null); } try { HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(htmlData); var infoTitle = htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//h1") == null ? null : htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//h1").First(); /* ExternalAlbumId */ var externalAlbumIdStr = htmlDoc.DocumentNode.SelectNodes("//head//meta[@property='og:url']") == null ? "" : htmlDoc.DocumentNode.SelectNodes("//head//meta[@property='og:url']").First().GetAttributeValue("content", ""); int externalAlbumId = Int32.Parse(Path.GetFileName(externalAlbumIdStr)); /* Album name */ var albumNameSection = htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//h1").First(); string albumName = ""; if (albumNameSection.LastChild.LastChild.LastChild == null) { albumName = albumNameSection.LastChild.InnerText.Trim(); } else { albumName = albumNameSection.LastChild.LastChild.InnerText.Trim(); } /* Artist */ var artistLinks = htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//h1//a"); var artists = new List <Artist>(); foreach (var artistStr in artistLinks) { if (artistStr.GetAttributeValue("href", "").Contains("artist")) { var artistUrl = "https://www.discogs.com" + artistStr.GetAttributeValue("href", ""); var artistData = RequestHelper.GetPageData(artistUrl); var artist = ParseArtist(artistData); if (artist != null) { artists.Add(artist); } } } if (artists.Count == 0) { artists.Add(new Artist { Name = "Unknown", ExternalArtistId = -1 }); } /* Released */ var releasedStr = htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//a[contains(@href, 'year')]") == null ? "" : htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//a[contains(@href, 'year')]").First().InnerText; int released = ParseYearFromDate(releasedStr); /* Format */ var profileDivs = htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//div"); var formatStr = ""; if (profileDivs != null) { for (int i = 0; i < profileDivs.Count - 1; i++) { if (profileDivs[i].InnerHtml.Contains("Format:")) { formatStr = profileDivs[i + 1].InnerText.Trim(); break; i++; } } } /* Genre, Style */ List <Genre> albumGenres = new List <Genre>(); List <Style> albumStyles = new List <Style>(); var profileHrefs = htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//a"); foreach (var profileHref in profileHrefs) { if (profileHref.GetAttributeValue("href", "").Contains("genre")) { albumGenres.Add(new Genre { GenreExternalName = Path.GetFileName(profileHref.GetAttributeValue("href", "")), GenreName = profileHref.InnerText.Trim() }); } if (profileHref.GetAttributeValue("href", "").Contains("style")) { albumStyles.Add(new Style { StyleExternalName = Path.GetFileName(profileHref.GetAttributeValue("href", "")), StyleName = profileHref.InnerText.Trim() }); } } /* Country */ var country = htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//a[contains(@href, 'country')]") == null ? "" : htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//a[contains(@href, 'country')]").First().InnerText.Trim(); /* numVersions */ var versions = htmlDoc.DocumentNode.SelectNodes("//div[contains(@class, 'm_versions')]"); int numVersions = 1; if (versions != null) { var viewAll = htmlDoc.DocumentNode.SelectNodes("//div[contains(@class, 'm_versions')]//h3//a"); if (viewAll == null) { var numVersionsStr = htmlDoc.DocumentNode.SelectNodes("//div[contains(@class, 'm_versions')]//tr[contains(@class, 'card r_tr')]"); numVersions = numVersionsStr.Count; } else { var allVersionsLink = viewAll.First().GetAttributeValue("href", ""); allVersionsLink = "https://www.discogs.com" + allVersionsLink; var versionsData = RequestHelper.GetPageData(allVersionsLink); numVersions = ParseNumVersions(versionsData); } } /* Cyrilic or latin */ bool isCyrilic = !Regex.IsMatch(albumName, @"\P{IsCyrillic}"); if (isCyrilic) { albumName = CyrilicToLatin(albumName); } Album album = new Album { Name = albumName, Country = country, Format = formatStr, ExternalAlbumId = externalAlbumId, NumVersions = numVersions, Artist = artists, Released = released, Tracklist = new List <Song>(), IsCyrilic = isCyrilic, Genres = albumGenres, Styles = albumStyles }; /* Tracklist */ var tracklistSections = htmlDoc.DocumentNode.SelectNodes("//div[@class='section tracklist']//table[@class='playlist']//tr[contains(@class, ' tracklist_track track')]"); foreach (var tracklistSection in tracklistSections) { var tracklistLink = tracklistSection.SelectNodes("td[@class='track tracklist_track_title ']//a") == null ? "" : tracklistSection.SelectNodes("td[@class='track tracklist_track_title ']//a").First().GetAttributeValue("href", ""); /*if (tracklistLink == "") * { * continue; * string songName = tracklistSection.SelectNodes("td[@class='track tracklist_track_title ']//a") == null ? "" : * tracklistSection.SelectNodes("td[@class='track tracklist_track_title ']//a").First().InnerText.Trim(); * Song s = new Song * { * Name = songName, * Genres = "", * Styles = new List<string>() * }; * album.Tracklist.Add(s); * }*/ if (tracklistLink != "") { tracklistLink = string.Format("https://www.discogs.com{0}", tracklistLink); var tracklistData = RequestHelper.GetPageData(tracklistLink); Song song = ParseSong(tracklistData); if (song != null) { album.Tracklist.Add(song); } } } return(album); } catch (Exception ex) { Console.WriteLine("Parse album: " + ex.Message); return(null); } }