/// <summary>
        /// Parse the Detail Page returned from the Allmusic Scraper
        /// </summary>
        /// <param name="strUrl">URL of artist details page</param>
        /// <returns>True is scrape was sucessful</returns>
        public bool Parse(string strUrl)
        {
            var mainPage = new HtmlWeb().Load(strUrl);

            // moods
            var moods     = string.Empty;
            var moodNodes = mainPage.DocumentNode.SelectNodes(@"//section[@class=""moods""]/ul/*");

            if (moodNodes != null)
            {
                moods = moodNodes.Aggregate(moods, (current, mood) => current + (AllmusicSiteScraper.CleanInnerText(mood) + ", "));
                moods = moods.TrimEnd(new[] { ',', ' ' });
            }

            // artist name
            var artistName = AllmusicSiteScraper.CleanInnerText(mainPage.DocumentNode.SelectSingleNode(@"//h2[@clas=""artist-name""]"));

            // artist image URL
            var artistImg = AllmusicSiteScraper.CleanAttribute(mainPage.DocumentNode.SelectSingleNode(@"//div[@class=""artist-image""]/img"), "src");

            //years active
            var yearsActive = AllmusicSiteScraper.CleanInnerText(mainPage.DocumentNode.SelectSingleNode(@"//section[@class=""basic-info""]/div[@class=""active-dates""]/div"));

            //genre
            var genres     = string.Empty;
            var genreNodes = mainPage.DocumentNode.SelectNodes(@"//section[@class=""basic-info""]/div[@class=""genre""]/div/a");

            if (genreNodes != null)
            {
                genres = genreNodes.Aggregate(genres, (current, genre) => current + (AllmusicSiteScraper.CleanInnerText(genre) + ", "));
                genres = genres.TrimEnd(new[] { ',', ' ' }); // remove trailing ", "
            }

            // born / formed
            var born = AllmusicSiteScraper.CleanInnerText(mainPage.DocumentNode.SelectSingleNode(@"//section[@class=""basic-info""]/div[@class=""birth""]/div"));

            // styles
            var styles     = string.Empty;
            var styleNodes = mainPage.DocumentNode.SelectNodes(@"//section[@class=""basic-info""]/div[@class=""styles""]/div/a");

            if (styleNodes != null)
            {
                styles = styleNodes.Aggregate(styles, (current, style) => current + (AllmusicSiteScraper.CleanInnerText(style) + ", "));
                styles = styles.TrimEnd(new[] { ',', ' ' }); // remove trailing ", "
            }

            // bio
            var bio    = string.Empty;
            var bioURL = "http://www.allmusic.com/" + AllmusicSiteScraper.CleanAttribute(mainPage.DocumentNode.SelectSingleNode(@"//ul[@class=""tabs overview""]/li[@class=""tab biography""]/a"), "href");

            if (!string.IsNullOrEmpty(bioURL))
            {
                var bioPage = new HtmlWeb().Load(bioURL);
                bio = AllmusicSiteScraper.CleanInnerText(bioPage.DocumentNode.SelectSingleNode(@"//section[@class=""biography""]/div[@class=""text""]"));
            }

            // albums
            var albumList    = string.Empty;
            var albumPageURL = "http://www.allmusic.com/" + AllmusicSiteScraper.CleanAttribute(mainPage.DocumentNode.SelectSingleNode(@"//ul[@class=""tabs overview""]/li[@class=""tab discography""]/a"), "href");

            if (!string.IsNullOrEmpty(albumPageURL))
            {
                var albumPage = new HtmlWeb().Load(albumPageURL);
                var albums    = albumPage.DocumentNode.SelectNodes(@"//section[@class=""discography""]/table/tbody/tr");
                if (albums != null)
                {
                    foreach (var album in albums)
                    {
                        var year  = AllmusicSiteScraper.CleanInnerText(album.SelectSingleNode(@"td[@class=""year""]"));
                        var title = AllmusicSiteScraper.CleanInnerText(album.SelectSingleNode(@"td[@class=""title""]/a"));
                        var label = AllmusicSiteScraper.CleanInnerText(album.SelectSingleNode(@"td[@class=""label""]"));

                        albumList += year + " - " + title + " (" + label + ")" + Environment.NewLine;
                    }
                }
            }


            var artistInfo = new ArtistInfo
            {
                AMGBio       = bio,
                Albums       = albumList,
                Artist       = artistName,
                Born         = born,
                Compilations = string.Empty,
                Genres       = genres,
                Image        = artistImg,
                Instruments  = string.Empty,
                Misc         = string.Empty,
                Singles      = string.Empty,
                Styles       = styles,
                Tones        = moods,
                YearsActive  = yearsActive
            };

            Set(artistInfo);

            _bLoaded = true;
            return(_bLoaded);
        }
Exemple #2
0
        /// <summary>
        /// Take URL of an album details page and scrape details
        /// </summary>
        /// <param name="strUrl">URL of album details page</param>
        /// <returns>True if scrape was successful</returns>
        public bool Parse(string strUrl)
        {
            var albumPage = new HtmlWeb().Load(strUrl);

            // artist
            var strAlbumArtist = AllmusicSiteScraper.CleanInnerText(albumPage.DocumentNode.SelectSingleNode(@"//h3[@class=""album-artist""]/span/a"));

            // album
            var strAlbum = AllmusicSiteScraper.CleanInnerText(albumPage.DocumentNode.SelectSingleNode(@"//h2[@class=""album-title""]"));

            // Image URL
            var imgURL =
                AllmusicSiteScraper.CleanAttribute(
                    albumPage.DocumentNode.SelectSingleNode(@"//div[@class=""album-cover""]/div[@class=""album-contain""]/img"),
                    "src");

            // Rating
            var iRating     = 0;
            var ratingMatch = AllmusicSiteScraper.CleanInnerText(albumPage.DocumentNode.SelectSingleNode(@"//div[starts-with(@class,""allmusic-rating rating-allmusic"")]"));

            int.TryParse(ratingMatch, out iRating);

            // year
            var iYear     = 0;
            var yearMatch = AllmusicSiteScraper.CleanInnerText(albumPage.DocumentNode.SelectSingleNode(@"//div[@class=""release-date""]/span"));

            yearMatch = Regex.Replace(yearMatch, @".*(\d{4})", @"$1");
            int.TryParse(yearMatch, out iYear);

            // review
            var strReview = AllmusicSiteScraper.CleanInnerText(albumPage.DocumentNode.SelectSingleNode(@"//div[@itemprop=""reviewBody""]"));

            // build up track listing into one string
            var strTracks  = string.Empty;
            var trackNodes = albumPage.DocumentNode.SelectNodes(@"//tr[@itemprop=""track""]");

            if (trackNodes != null)
            {
                foreach (var track in trackNodes)
                {
                    var trackNo = AllmusicSiteScraper.CleanInnerText(track.SelectSingleNode(@"td[@class=""tracknum""]"));
                    var title   =
                        AllmusicSiteScraper.CleanInnerText(
                            track.SelectSingleNode(@"td[@class=""title-composer""]/div[@class=""title""]/a"));
                    var strDuration = AllmusicSiteScraper.CleanInnerText(track.SelectSingleNode(@"td[@class=""time""]"));
                    var iDuration   = 0;
                    var iPos        = strDuration.IndexOf(":", StringComparison.Ordinal);
                    if (iPos >= 0)
                    {
                        var strMin = strDuration.Substring(0, iPos);
                        var strSec = strDuration.Substring(iPos + 1);
                        int iMin = 0, iSec = 0;
                        Int32.TryParse(strMin, out iMin);
                        Int32.TryParse(strSec, out iSec);
                        iDuration = (iMin * 60) + iSec;
                    }

                    strTracks += trackNo + "@" + title + "@" + iDuration.ToString(CultureInfo.InvariantCulture) + "|";
                }
            }

            // genres
            var strGenres  = string.Empty;
            var genreNodes = albumPage.DocumentNode.SelectNodes(@"//section[@class=""basic-info""]/div[@class=""genre""]/div/a");

            if (genreNodes != null)
            {
                strGenres = genreNodes.Aggregate(strGenres, (current, genre) => current + (AllmusicSiteScraper.CleanInnerText(genre) + ", "));
                strGenres = strGenres.TrimEnd(new[] { ',', ' ' }); // remove trailing ", "
            }

            // build up styles into one string
            var strThemes  = string.Empty;
            var themeNodes = albumPage.DocumentNode.SelectNodes(@"//section[@class=""themes""]/div/span[@class=""theme""]/a");

            if (themeNodes != null)
            {
                strThemes = themeNodes.Aggregate(strThemes, (current, theme) => current + (AllmusicSiteScraper.CleanInnerText(theme) + ", "));
                strThemes = strThemes.TrimEnd(new[] { ',', ' ' }); // remove trailing ", "
            }

            // build up moods into one string
            var strMoods  = string.Empty;
            var moodNodes = albumPage.DocumentNode.SelectNodes(@"//section[@class=""moods""]/div/span[@class=""mood""]/a");

            if (moodNodes != null)
            {
                strMoods = moodNodes.Aggregate(strMoods, (current, mood) => current + (AllmusicSiteScraper.CleanInnerText(mood) + ", "));
                strMoods = strMoods.TrimEnd(new[] { ',', ' ' }); // remove trailing ", "
            }

            var album = new AlbumInfo
            {
                Album       = strAlbum,
                Artist      = strAlbumArtist,
                Genre       = strGenres,
                Tones       = strMoods,
                Styles      = strThemes,
                Review      = strReview,
                Image       = imgURL,
                Rating      = iRating,
                Tracks      = strTracks,
                AlbumArtist = strAlbumArtist,
                Year        = iYear
            };

            Set(album);

            Loaded = true;
            return(true);
        }