Beispiel #1
0
        public static Album ParseAlbum(string htmlData)
        {
            if (htmlData == "")
            {
                return(null);
            }
            try
            {
                HtmlDocument htmlDoc = new HtmlDocument();
                htmlDoc.LoadHtml(htmlData);

                var infoTitle = htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//h1") == null ? null :
                                htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//h1").First();

                /* ExternalAlbumId */
                var externalAlbumIdStr = htmlDoc.DocumentNode.SelectNodes("//head//meta[@property='og:url']") == null ? "" :
                                         htmlDoc.DocumentNode.SelectNodes("//head//meta[@property='og:url']").First().GetAttributeValue("content", "");
                int externalAlbumId = Int32.Parse(Path.GetFileName(externalAlbumIdStr));

                /* Album name */
                var    albumNameSection = htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//h1").First();
                string albumName        = "";
                if (albumNameSection.LastChild.LastChild.LastChild == null)
                {
                    albumName = albumNameSection.LastChild.InnerText.Trim();
                }
                else
                {
                    albumName = albumNameSection.LastChild.LastChild.InnerText.Trim();
                }

                /* Artist */
                var artistLinks = htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//h1//a");
                var artists     = new List <Artist>();
                foreach (var artistStr in artistLinks)
                {
                    if (artistStr.GetAttributeValue("href", "").Contains("artist"))
                    {
                        var artistUrl  = "https://www.discogs.com" + artistStr.GetAttributeValue("href", "");
                        var artistData = RequestHelper.GetPageData(artistUrl);
                        var artist     = ParseArtist(artistData);
                        if (artist != null)
                        {
                            artists.Add(artist);
                        }
                    }
                }
                if (artists.Count == 0)
                {
                    artists.Add(new Artist {
                        Name = "Unknown", ExternalArtistId = -1
                    });
                }

                /* Released */
                var releasedStr = htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//a[contains(@href, 'year')]") == null ? "" :
                                  htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//a[contains(@href, 'year')]").First().InnerText;
                int released = ParseYearFromDate(releasedStr);

                /* Format */
                var profileDivs = htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//div");
                var formatStr   = "";
                if (profileDivs != null)
                {
                    for (int i = 0; i < profileDivs.Count - 1; i++)
                    {
                        if (profileDivs[i].InnerHtml.Contains("Format:"))
                        {
                            formatStr = profileDivs[i + 1].InnerText.Trim();
                            break;
                            i++;
                        }
                    }
                }

                /* Genre, Style */
                List <Genre> albumGenres  = new List <Genre>();
                List <Style> albumStyles  = new List <Style>();
                var          profileHrefs = htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//a");
                foreach (var profileHref in profileHrefs)
                {
                    if (profileHref.GetAttributeValue("href", "").Contains("genre"))
                    {
                        albumGenres.Add(new Genre
                        {
                            GenreExternalName = Path.GetFileName(profileHref.GetAttributeValue("href", "")),
                            GenreName         = profileHref.InnerText.Trim()
                        });
                    }
                    if (profileHref.GetAttributeValue("href", "").Contains("style"))
                    {
                        albumStyles.Add(new Style
                        {
                            StyleExternalName = Path.GetFileName(profileHref.GetAttributeValue("href", "")),
                            StyleName         = profileHref.InnerText.Trim()
                        });
                    }
                }

                /* Country */
                var country = htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//a[contains(@href, 'country')]") == null ? "" :
                              htmlDoc.DocumentNode.SelectNodes("//div[@class='profile']//a[contains(@href, 'country')]").First().InnerText.Trim();

                /* numVersions */
                var versions    = htmlDoc.DocumentNode.SelectNodes("//div[contains(@class, 'm_versions')]");
                int numVersions = 1;
                if (versions != null)
                {
                    var viewAll = htmlDoc.DocumentNode.SelectNodes("//div[contains(@class, 'm_versions')]//h3//a");
                    if (viewAll == null)
                    {
                        var numVersionsStr = htmlDoc.DocumentNode.SelectNodes("//div[contains(@class, 'm_versions')]//tr[contains(@class, 'card r_tr')]");
                        numVersions = numVersionsStr.Count;
                    }
                    else
                    {
                        var allVersionsLink = viewAll.First().GetAttributeValue("href", "");
                        allVersionsLink = "https://www.discogs.com" + allVersionsLink;
                        var versionsData = RequestHelper.GetPageData(allVersionsLink);
                        numVersions = ParseNumVersions(versionsData);
                    }
                }

                /* Cyrilic or latin */
                bool isCyrilic = !Regex.IsMatch(albumName, @"\P{IsCyrillic}");
                if (isCyrilic)
                {
                    albumName = CyrilicToLatin(albumName);
                }

                Album album = new Album
                {
                    Name            = albumName,
                    Country         = country,
                    Format          = formatStr,
                    ExternalAlbumId = externalAlbumId,
                    NumVersions     = numVersions,
                    Artist          = artists,
                    Released        = released,
                    Tracklist       = new List <Song>(),
                    IsCyrilic       = isCyrilic,
                    Genres          = albumGenres,
                    Styles          = albumStyles
                };

                /* Tracklist */
                var tracklistSections = htmlDoc.DocumentNode.SelectNodes("//div[@class='section tracklist']//table[@class='playlist']//tr[contains(@class, ' tracklist_track track')]");
                foreach (var tracklistSection in tracklistSections)
                {
                    var tracklistLink = tracklistSection.SelectNodes("td[@class='track tracklist_track_title ']//a") == null ? "" :
                                        tracklistSection.SelectNodes("td[@class='track tracklist_track_title ']//a").First().GetAttributeValue("href", "");

                    /*if (tracklistLink == "")
                     * {
                     *  continue;
                     *  string songName = tracklistSection.SelectNodes("td[@class='track tracklist_track_title ']//a") == null ? "" :
                     *  tracklistSection.SelectNodes("td[@class='track tracklist_track_title ']//a").First().InnerText.Trim();
                     *  Song s = new Song
                     *  {
                     *      Name = songName,
                     *      Genres = "",
                     *      Styles = new List<string>()
                     *  };
                     *  album.Tracklist.Add(s);
                     * }*/
                    if (tracklistLink != "")
                    {
                        tracklistLink = string.Format("https://www.discogs.com{0}", tracklistLink);
                        var  tracklistData = RequestHelper.GetPageData(tracklistLink);
                        Song song          = ParseSong(tracklistData);
                        if (song != null)
                        {
                            album.Tracklist.Add(song);
                        }
                    }
                }

                return(album);
            }
            catch (Exception ex)
            {
                Console.WriteLine("Parse album: " + ex.Message);
                return(null);
            }
        }