/// <summary> /// Parse the Detail Page returned from the Allmusic Scraper /// </summary> /// <param name="strUrl">URL of artist details page</param> /// <returns>True is scrape was sucessful</returns> public bool Parse(string strUrl) { var mainPage = new HtmlWeb().Load(strUrl); // moods var moods = string.Empty; var moodNodes = mainPage.DocumentNode.SelectNodes(@"//section[@class=""moods""]/ul/*"); if (moodNodes != null) { moods = moodNodes.Aggregate(moods, (current, mood) => current + (AllmusicSiteScraper.CleanInnerText(mood) + ", ")); moods = moods.TrimEnd(new[] { ',', ' ' }); } // artist name var artistName = AllmusicSiteScraper.CleanInnerText(mainPage.DocumentNode.SelectSingleNode(@"//h2[@clas=""artist-name""]")); // artist image URL var artistImg = AllmusicSiteScraper.CleanAttribute(mainPage.DocumentNode.SelectSingleNode(@"//div[@class=""artist-image""]/img"), "src"); //years active var yearsActive = AllmusicSiteScraper.CleanInnerText(mainPage.DocumentNode.SelectSingleNode(@"//section[@class=""basic-info""]/div[@class=""active-dates""]/div")); //genre var genres = string.Empty; var genreNodes = mainPage.DocumentNode.SelectNodes(@"//section[@class=""basic-info""]/div[@class=""genre""]/div/a"); if (genreNodes != null) { genres = genreNodes.Aggregate(genres, (current, genre) => current + (AllmusicSiteScraper.CleanInnerText(genre) + ", ")); genres = genres.TrimEnd(new[] { ',', ' ' }); // remove trailing ", " } // born / formed var born = AllmusicSiteScraper.CleanInnerText(mainPage.DocumentNode.SelectSingleNode(@"//section[@class=""basic-info""]/div[@class=""birth""]/div")); // styles var styles = string.Empty; var styleNodes = mainPage.DocumentNode.SelectNodes(@"//section[@class=""basic-info""]/div[@class=""styles""]/div/a"); if (styleNodes != null) { styles = styleNodes.Aggregate(styles, (current, style) => current + (AllmusicSiteScraper.CleanInnerText(style) + ", ")); styles = styles.TrimEnd(new[] { ',', ' ' }); // remove trailing ", " } // bio var bio = string.Empty; var bioURL = "http://www.allmusic.com/" + AllmusicSiteScraper.CleanAttribute(mainPage.DocumentNode.SelectSingleNode(@"//ul[@class=""tabs overview""]/li[@class=""tab biography""]/a"), "href"); if (!string.IsNullOrEmpty(bioURL)) { var bioPage = new HtmlWeb().Load(bioURL); bio = AllmusicSiteScraper.CleanInnerText(bioPage.DocumentNode.SelectSingleNode(@"//section[@class=""biography""]/div[@class=""text""]")); } // albums var albumList = string.Empty; var albumPageURL = "http://www.allmusic.com/" + AllmusicSiteScraper.CleanAttribute(mainPage.DocumentNode.SelectSingleNode(@"//ul[@class=""tabs overview""]/li[@class=""tab discography""]/a"), "href"); if (!string.IsNullOrEmpty(albumPageURL)) { var albumPage = new HtmlWeb().Load(albumPageURL); var albums = albumPage.DocumentNode.SelectNodes(@"//section[@class=""discography""]/table/tbody/tr"); if (albums != null) { foreach (var album in albums) { var year = AllmusicSiteScraper.CleanInnerText(album.SelectSingleNode(@"td[@class=""year""]")); var title = AllmusicSiteScraper.CleanInnerText(album.SelectSingleNode(@"td[@class=""title""]/a")); var label = AllmusicSiteScraper.CleanInnerText(album.SelectSingleNode(@"td[@class=""label""]")); albumList += year + " - " + title + " (" + label + ")" + Environment.NewLine; } } } var artistInfo = new ArtistInfo { AMGBio = bio, Albums = albumList, Artist = artistName, Born = born, Compilations = string.Empty, Genres = genres, Image = artistImg, Instruments = string.Empty, Misc = string.Empty, Singles = string.Empty, Styles = styles, Tones = moods, YearsActive = yearsActive }; Set(artistInfo); _bLoaded = true; return(_bLoaded); }
/// <summary> /// Take URL of an album details page and scrape details /// </summary> /// <param name="strUrl">URL of album details page</param> /// <returns>True if scrape was successful</returns> public bool Parse(string strUrl) { var albumPage = new HtmlWeb().Load(strUrl); // artist var strAlbumArtist = AllmusicSiteScraper.CleanInnerText(albumPage.DocumentNode.SelectSingleNode(@"//h3[@class=""album-artist""]/span/a")); // album var strAlbum = AllmusicSiteScraper.CleanInnerText(albumPage.DocumentNode.SelectSingleNode(@"//h2[@class=""album-title""]")); // Image URL var imgURL = AllmusicSiteScraper.CleanAttribute( albumPage.DocumentNode.SelectSingleNode(@"//div[@class=""album-cover""]/div[@class=""album-contain""]/img"), "src"); // Rating var iRating = 0; var ratingMatch = AllmusicSiteScraper.CleanInnerText(albumPage.DocumentNode.SelectSingleNode(@"//div[starts-with(@class,""allmusic-rating rating-allmusic"")]")); int.TryParse(ratingMatch, out iRating); // year var iYear = 0; var yearMatch = AllmusicSiteScraper.CleanInnerText(albumPage.DocumentNode.SelectSingleNode(@"//div[@class=""release-date""]/span")); yearMatch = Regex.Replace(yearMatch, @".*(\d{4})", @"$1"); int.TryParse(yearMatch, out iYear); // review var strReview = AllmusicSiteScraper.CleanInnerText(albumPage.DocumentNode.SelectSingleNode(@"//div[@itemprop=""reviewBody""]")); // build up track listing into one string var strTracks = string.Empty; var trackNodes = albumPage.DocumentNode.SelectNodes(@"//tr[@itemprop=""track""]"); if (trackNodes != null) { foreach (var track in trackNodes) { var trackNo = AllmusicSiteScraper.CleanInnerText(track.SelectSingleNode(@"td[@class=""tracknum""]")); var title = AllmusicSiteScraper.CleanInnerText( track.SelectSingleNode(@"td[@class=""title-composer""]/div[@class=""title""]/a")); var strDuration = AllmusicSiteScraper.CleanInnerText(track.SelectSingleNode(@"td[@class=""time""]")); var iDuration = 0; var iPos = strDuration.IndexOf(":", StringComparison.Ordinal); if (iPos >= 0) { var strMin = strDuration.Substring(0, iPos); var strSec = strDuration.Substring(iPos + 1); int iMin = 0, iSec = 0; Int32.TryParse(strMin, out iMin); Int32.TryParse(strSec, out iSec); iDuration = (iMin * 60) + iSec; } strTracks += trackNo + "@" + title + "@" + iDuration.ToString(CultureInfo.InvariantCulture) + "|"; } } // genres var strGenres = string.Empty; var genreNodes = albumPage.DocumentNode.SelectNodes(@"//section[@class=""basic-info""]/div[@class=""genre""]/div/a"); if (genreNodes != null) { strGenres = genreNodes.Aggregate(strGenres, (current, genre) => current + (AllmusicSiteScraper.CleanInnerText(genre) + ", ")); strGenres = strGenres.TrimEnd(new[] { ',', ' ' }); // remove trailing ", " } // build up styles into one string var strThemes = string.Empty; var themeNodes = albumPage.DocumentNode.SelectNodes(@"//section[@class=""themes""]/div/span[@class=""theme""]/a"); if (themeNodes != null) { strThemes = themeNodes.Aggregate(strThemes, (current, theme) => current + (AllmusicSiteScraper.CleanInnerText(theme) + ", ")); strThemes = strThemes.TrimEnd(new[] { ',', ' ' }); // remove trailing ", " } // build up moods into one string var strMoods = string.Empty; var moodNodes = albumPage.DocumentNode.SelectNodes(@"//section[@class=""moods""]/div/span[@class=""mood""]/a"); if (moodNodes != null) { strMoods = moodNodes.Aggregate(strMoods, (current, mood) => current + (AllmusicSiteScraper.CleanInnerText(mood) + ", ")); strMoods = strMoods.TrimEnd(new[] { ',', ' ' }); // remove trailing ", " } var album = new AlbumInfo { Album = strAlbum, Artist = strAlbumArtist, Genre = strGenres, Tones = strMoods, Styles = strThemes, Review = strReview, Image = imgURL, Rating = iRating, Tracks = strTracks, AlbumArtist = strAlbumArtist, Year = iYear }; Set(album); Loaded = true; return(true); }