public bool Parse(string html) { _songs.Clear(); HTMLUtil util = new HTMLUtil(); string strHtmlLow = html.ToLower(); int begIndex = 0; int endIndex = 0; // Extract Cover URL string pattern = @"<!--Begin.*?Album.*?Photo-->\s*?.*?<img.*?src=\""(.*?)\"""; if (FindPattern(pattern, html)) { _strImageURL = _match.Groups[1].Value; } // Extract Review pattern = @"<td.*?class=""tab_off""><a.*?href=""(.*?)"">.*?Review.*?</a>"; if (FindPattern(pattern, html)) { try { string contentinfo = AllmusicSiteScraper.GetHTTP(_match.Groups[1].Value); pattern = @"<p.*?class=""author"">.*\s*?.*?<p.*?class=""text"">(.*?)</p>"; if (FindPattern(pattern, contentinfo)) { string data = _match.Groups[1].Value; util.RemoveTags(ref data); util.ConvertHTMLToAnsi(data, out data); _strReview = data.Trim(); } } catch (Exception) {} } // Extract Artist pattern = @"<h3.*?artist</h3>\s*?.*?<a.*"">(.*)</a>"; if (FindPattern(pattern, html)) { _artist = _match.Groups[1].Value; util.RemoveTags(ref _artist); } // Extract Album pattern = @"<h3.*?album</h3>\s*?.*?<p>(.*)</P>"; if (FindPattern(pattern, html)) { _strTitle = _match.Groups[1].Value; util.RemoveTags(ref _strTitle); } // Extract Rating pattern = @"<h3.*?rating</h3>\s*?.*?src=""(.*?)"""; if (FindPattern(pattern, html)) { string strRating = _match.Groups[1].Value; util.RemoveTags(ref strRating); strRating = strRating.Substring(26, 1); try { _iRating = Int32.Parse(strRating); } catch (Exception) {} } // Release Date pattern = @"<h3.*?release.*?date</h3>\s*?.*?<p>(.*)</P>"; if (FindPattern(pattern, html)) { _strDateOfRelease = _match.Groups[1].Value; util.RemoveTags(ref _strDateOfRelease); // extract the year out of something like "1998 (release)" or "12 feb 2003" int nPos = _strDateOfRelease.IndexOf("19"); if (nPos > -1) { if ((int)_strDateOfRelease.Length >= nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) && Char.IsDigit(_strDateOfRelease[nPos + 3])) { string strYear = _strDateOfRelease.Substring(nPos, 4); _strDateOfRelease = strYear; } else { nPos = _strDateOfRelease.IndexOf("19", nPos + 2); if (nPos > -1) { if ((int)_strDateOfRelease.Length >= nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) && Char.IsDigit(_strDateOfRelease[nPos + 3])) { string strYear = _strDateOfRelease.Substring(nPos, 4); _strDateOfRelease = strYear; } } } } nPos = _strDateOfRelease.IndexOf("20"); if (nPos > -1) { if ((int)_strDateOfRelease.Length > nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) && Char.IsDigit(_strDateOfRelease[nPos + 3])) { string strYear = _strDateOfRelease.Substring(nPos, 4); _strDateOfRelease = strYear; } else { nPos = _strDateOfRelease.IndexOf("20", nPos + 1); if (nPos > -1) { if ((int)_strDateOfRelease.Length > nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) && Char.IsDigit(_strDateOfRelease[nPos + 3])) { string strYear = _strDateOfRelease.Substring(nPos, 4); _strDateOfRelease = strYear; } } } } } // Extract Genre begIndex = strHtmlLow.IndexOf("<h3>genre</h3>"); endIndex = strHtmlLow.IndexOf("</div>", begIndex + 2); if (begIndex != -1 && endIndex != -1) { string contentInfo = html.Substring(begIndex, endIndex - begIndex); pattern = @"(<li>(.*?)</li>)"; if (FindPattern(pattern, contentInfo)) { string data = ""; while (_match.Success) { data += string.Format("{0}, ", _match.Groups[2].Value); _match = _match.NextMatch(); } util.RemoveTags(ref data); util.ConvertHTMLToAnsi(data, out _strGenre); _strGenre = _strGenre.Trim(new[] { ' ', ',' }); } } // Extract Styles begIndex = strHtmlLow.IndexOf("<h3>style</h3>"); endIndex = strHtmlLow.IndexOf("</div>", begIndex + 2); if (begIndex != -1 && endIndex != -1) { string contentInfo = html.Substring(begIndex, endIndex - begIndex); pattern = @"(<li>(.*?)</li>)"; if (FindPattern(pattern, contentInfo)) { string data = ""; while (_match.Success) { data += string.Format("{0}, ", _match.Groups[2].Value); _match = _match.NextMatch(); } util.RemoveTags(ref data); util.ConvertHTMLToAnsi(data, out _strStyles); _strStyles = _strStyles.Trim(new[] { ' ', ',' }); } } // Extract Moods begIndex = strHtmlLow.IndexOf("<h3>moods</h3>"); endIndex = strHtmlLow.IndexOf("</div>", begIndex + 2); if (begIndex != -1 && endIndex != -1) { string contentInfo = html.Substring(begIndex, endIndex - begIndex); pattern = @"(<li>(.*?)</li>)"; if (FindPattern(pattern, contentInfo)) { string data = ""; while (_match.Success) { data += string.Format("{0}, ", _match.Groups[2].Value); _match = _match.NextMatch(); } util.RemoveTags(ref data); util.ConvertHTMLToAnsi(data, out _strTones); _strTones = _strTones.Trim(new[] { ' ', ',' }); } } // Extract Songs begIndex = strHtmlLow.IndexOf("<!-- tracks table -->"); endIndex = strHtmlLow.IndexOf("<!-- end tracks table -->", begIndex + 2); if (begIndex != -1 && endIndex != -1) { string contentInfo = html.Substring(begIndex, endIndex - begIndex); pattern = @"<tr.*class=""visible"".*?\s*?<td.*</td>\s*?.*<td.*</td>\s*?.*<td.*?>(?<track>.*)</td>" + @"\s*?.*<td.*</td>\s*?.*<td.*?>(?<title>.*)</td>\s*?.*?<td.*?>\s*?.*</td>\s*?.*?<td.*?>(?<duration>.*)</td>"; if (FindPattern(pattern, contentInfo)) { while (_match.Success) { // Tracknumber int iTrack = 0; try { iTrack = Int32.Parse(_match.Groups["track"].Value); } catch (Exception) {} // Song Title string strTitle = _match.Groups["title"].Value; util.RemoveTags(ref strTitle); util.ConvertHTMLToAnsi(strTitle, out strTitle); // Duration int iDuration = 0; string strDuration = _match.Groups["duration"].Value; int iPos = strDuration.IndexOf(":"); if (iPos >= 0) { string strMin, strSec; strMin = strDuration.Substring(0, iPos); iPos++; strSec = strDuration.Substring(iPos); int iMin = 0, iSec = 0; try { iMin = Int32.Parse(strMin); iSec = Int32.Parse(strSec); } catch (Exception) {} iDuration = iMin * 60 + iSec; } // Create new song object MusicSong newSong = new MusicSong(); newSong.Track = iTrack; newSong.SongName = strTitle; newSong.Duration = iDuration; _songs.Add(newSong); _match = _match.NextMatch(); } } } // Set to "Not available" if no value from web if (_artist.Length == 0) { _artist = GUILocalizeStrings.Get(416); } if (_strDateOfRelease.Length == 0) { _strDateOfRelease = GUILocalizeStrings.Get(416); } if (_strGenre.Length == 0) { _strGenre = GUILocalizeStrings.Get(416); } if (_strTones.Length == 0) { _strTones = GUILocalizeStrings.Get(416); } if (_strStyles.Length == 0) { _strStyles = GUILocalizeStrings.Get(416); } if (_strTitle.Length == 0) { _strTitle = GUILocalizeStrings.Get(416); } if (_strTitle2.Length == 0) { _strTitle2 = _strTitle; } Loaded = true; return(true); }
/// <summary> /// Parse the Detail Page returned from the Allmusic Scraper /// </summary> /// <param name="strHTML"></param> /// <returns></returns> public bool Parse(string strHTML) { HTMLUtil util = new HTMLUtil(); int begIndex = 0; int endIndex = 0; string strHTMLLow = strHTML.ToLower(); // Get the Artist Name string pattern = @"<h1.*class=""title"">(.*)</h1>"; if (!FindPattern(pattern, strHTML)) { return(false); } _strArtistName = _match.Groups[1].Value; // Born pattern = @"<h3>.*Born.*</h3>\s*?<p>(.*)</p>"; if (FindPattern(pattern, strHTML)) { string strValue = _match.Groups[1].Value; util.RemoveTags(ref strValue); util.ConvertHTMLToAnsi(strValue, out _strBorn); _strBorn = _strBorn.Trim(); } // Years Active pattern = @"(<span.*?class=""active"">(.*?)</span>)"; if (FindPattern(pattern, strHTML)) { while (_match.Success) { _strYearsActive += string.Format("{0}s, ", _match.Groups[2].Value); _match = _match.NextMatch(); } _strYearsActive = _strYearsActive.Trim(new[] { ' ', ',' }); } // Genre pattern = @"<div.*?id=""genre-style"">\s*?.*?\s*?<h3>.*?Genres.*?</h3>\s*?.*?(<p>(.*?)</p>)"; if (FindPattern(pattern, strHTML)) { string data = ""; while (_match.Success) { data += string.Format("{0}, ", _match.Groups[2].Value); _match = _match.NextMatch(); } util.RemoveTags(ref data); util.ConvertHTMLToAnsi(data, out _strGenres); _strGenres = _strGenres.Trim(new[] { ' ', ',' }); } // Style begIndex = strHTMLLow.IndexOf("<h3>styles</h3>"); endIndex = strHTMLLow.IndexOf("<!--end genre/styles-->", begIndex + 2); if (begIndex != -1 && endIndex != -1) { string contentInfo = strHTML.Substring(begIndex, endIndex - begIndex); pattern = @"(<li>(.*?)</li>)"; if (FindPattern(pattern, contentInfo)) { string data = ""; while (_match.Success) { data += string.Format("{0}, ", _match.Groups[2].Value); _match = _match.NextMatch(); } util.RemoveTags(ref data); util.ConvertHTMLToAnsi(data, out _strStyles); _strStyles = _strStyles.Trim(new[] { ' ', ',' }); } } // Mood begIndex = strHTMLLow.IndexOf("<h3>moods</h3>"); endIndex = strHTMLLow.IndexOf("</div>", begIndex + 2); if (begIndex != -1 && endIndex != -1) { string contentInfo = strHTML.Substring(begIndex, endIndex - begIndex); pattern = @"(<li>(.*?)</li>)"; if (FindPattern(pattern, contentInfo)) { string data = ""; while (_match.Success) { data += string.Format("{0}, ", _match.Groups[2].Value); _match = _match.NextMatch(); } util.RemoveTags(ref data); util.ConvertHTMLToAnsi(data, out _strTones); _strTones = _strTones.Trim(new[] { ' ', ',' }); } } // Instruments begIndex = strHTMLLow.IndexOf("<h3>instruments</h3>"); endIndex = strHTMLLow.IndexOf("</div>", begIndex + 2); if (begIndex != -1 && endIndex != -1) { string contentInfo = strHTML.Substring(begIndex, endIndex - begIndex); if (FindPattern(pattern, contentInfo)) { string data = ""; while (_match.Success) { data += string.Format("{0}, ", _match.Groups[2].Value); _match = _match.NextMatch(); } util.RemoveTags(ref data); util.ConvertHTMLToAnsi(data, out _strInstruments); _strInstruments = _strInstruments.Trim(new[] { ' ', ',' }); } } // picture URL pattern = @"<div.*?class=""image"">\s*?.*<img.*id=""artist_image"".*?src=\""(.*?)\"""; if (FindPattern(pattern, strHTML)) { _strArtistPictureURL = _match.Groups[1].Value; } // parse AMG BIOGRAPHY pattern = @"<td.*?class=""tab_off""><a.*?href=""(.*?)"">.*?Biography.*?</a>"; if (FindPattern(pattern, strHTML)) { try { string contentinfo = AllmusicSiteScraper.GetHTTP(_match.Groups[1].Value); begIndex = contentinfo.IndexOf("<!--Begin Biography -->"); endIndex = contentinfo.IndexOf("</div>", begIndex + 2); if (begIndex != -1 && endIndex != -1) { pattern = @"<p.*?class=""text"">(.*?)</p>"; if (FindPattern(pattern, contentinfo)) { string data = _match.Groups[1].Value; util.RemoveTags(ref data); util.ConvertHTMLToAnsi(data, out data); _strAMGBiography = data.Trim(); } } } catch (Exception) {} } string compilationPage = ""; string singlesPage = ""; string dvdPage = ""; string miscPage = ""; // discography (albums) pattern = @"<td.*class=""tab_off""><a.*?href=""(.*?)"">.*Discography.*</a>"; if (FindPattern(pattern, strHTML)) { // Get Link to other sub pages compilationPage = _match.Groups[1].Value + "/compilations"; singlesPage = _match.Groups[1].Value + "/singles-eps"; dvdPage = _match.Groups[1].Value + "/dvds-videos"; miscPage = _match.Groups[1].Value + "/other"; try { string contentinfo = AllmusicSiteScraper.GetHTTP(_match.Groups[1].Value); pattern = @"sorted.*? cell"">(?<year>.*?)</td>\s*?.*?</td>\s*.*?<a.*?"">(?<album>.*?)" + @"</a>.*?</td>\s*.*?</td>\s*.*?"">(?<label>.*?)</td>"; if (FindPattern(pattern, contentinfo)) { while (_match.Success) { string year = _match.Groups["year"].Value; string albumTitle = _match.Groups["album"].Value; string label = _match.Groups["label"].Value; util.RemoveTags(ref year); util.ConvertHTMLToAnsi(year, out year); util.RemoveTags(ref albumTitle); util.ConvertHTMLToAnsi(albumTitle, out albumTitle); util.RemoveTags(ref label); util.ConvertHTMLToAnsi(label, out label); try { string[] dAlbumInfo = { year.Trim(), albumTitle.Trim(), label.Trim() }; _discographyAlbum.Add(dAlbumInfo); } catch {} _match = _match.NextMatch(); } } } catch (Exception) {} } // Compilations if (compilationPage != "") { try { string contentinfo = AllmusicSiteScraper.GetHTTP(compilationPage); pattern = @"sorted.*? cell"">(?<year>.*?)</td>\s*?.*?</td>\s*.*?<a.*?"">(?<album>.*?)" + @"</a>.*?</td>\s*.*?</td>\s*.*?"">(?<label>.*?)</td>"; if (FindPattern(pattern, contentinfo)) { while (_match.Success) { string year = _match.Groups["year"].Value; string albumTitle = _match.Groups["album"].Value; string label = _match.Groups["label"].Value; util.RemoveTags(ref year); util.ConvertHTMLToAnsi(year, out year); util.RemoveTags(ref albumTitle); util.ConvertHTMLToAnsi(albumTitle, out albumTitle); util.RemoveTags(ref label); util.ConvertHTMLToAnsi(label, out label); try { string[] dAlbumInfo = { year.Trim(), albumTitle.Trim(), label.Trim() }; _discographyCompilations.Add(dAlbumInfo); } catch {} _match = _match.NextMatch(); } } } catch (Exception) {} } // Singles if (singlesPage != "") { try { string contentinfo = AllmusicSiteScraper.GetHTTP(singlesPage); pattern = @"sorted.*? cell"">(?<year>.*?)</td>\s*?.*?</td>\s*.*?<a.*?"">(?<album>.*?)" + @"</a>.*?</td>\s*.*?</td>\s*.*?"">(?<label>.*?)</td>"; if (FindPattern(pattern, contentinfo)) { while (_match.Success) { string year = _match.Groups["year"].Value; string albumTitle = _match.Groups["album"].Value; string label = _match.Groups["label"].Value; util.RemoveTags(ref year); util.ConvertHTMLToAnsi(year, out year); util.RemoveTags(ref albumTitle); util.ConvertHTMLToAnsi(albumTitle, out albumTitle); util.RemoveTags(ref label); util.ConvertHTMLToAnsi(label, out label); try { string[] dAlbumInfo = { year.Trim(), albumTitle.Trim(), label.Trim() }; _discographySingles.Add(dAlbumInfo); } catch {} _match = _match.NextMatch(); } } } catch (Exception) {} } // DVD Videos if (dvdPage != "") { try { string contentinfo = AllmusicSiteScraper.GetHTTP(dvdPage); pattern = @"sorted.*? cell"">(?<year>.*?)</td>\s*?.*?</td>\s*.*?<a.*?"">(?<album>.*?)" + @"</a>.*?</td>\s*.*?</td>\s*.*?"">(?<label>.*?)</td>"; if (FindPattern(pattern, contentinfo)) { while (_match.Success) { string year = _match.Groups["year"].Value; string albumTitle = _match.Groups["album"].Value; string label = _match.Groups["label"].Value; util.RemoveTags(ref year); util.ConvertHTMLToAnsi(year, out year); util.RemoveTags(ref albumTitle); util.ConvertHTMLToAnsi(albumTitle, out albumTitle); util.RemoveTags(ref label); util.ConvertHTMLToAnsi(label, out label); try { string[] dAlbumInfo = { year.Trim(), albumTitle.Trim(), label.Trim() }; _discographyMisc.Add(dAlbumInfo); } catch {} _match = _match.NextMatch(); } } } catch (Exception) {} } // Other if (miscPage != "") { try { string contentinfo = AllmusicSiteScraper.GetHTTP(miscPage); pattern = @"sorted.*? cell"">(?<year>.*?)</td>\s*?.*?</td>\s*.*?<a.*?"">(?<album>.*?)" + @"</a>.*?</td>\s*.*?</td>\s*.*?"">(?<label>.*?)</td>"; if (FindPattern(pattern, contentinfo)) { while (_match.Success) { string year = _match.Groups["year"].Value; string albumTitle = _match.Groups["album"].Value; string label = _match.Groups["label"].Value; util.RemoveTags(ref year); util.ConvertHTMLToAnsi(year, out year); util.RemoveTags(ref albumTitle); util.ConvertHTMLToAnsi(albumTitle, out albumTitle); util.RemoveTags(ref label); util.ConvertHTMLToAnsi(label, out label); try { string[] dAlbumInfo = { year.Trim(), albumTitle.Trim(), label.Trim() }; _discographyMisc.Add(dAlbumInfo); } catch {} _match = _match.NextMatch(); } } } catch (Exception) {} } _bLoaded = true; return(_bLoaded); }
public void ShowAlbumInfo(int parentWindowID, string artistName, string albumName, string strPath, MusicTag tag) { Log.Debug("Searching for album: {0} - {1}", albumName, artistName); var dlgProgress = (GUIDialogProgress)GUIWindowManager.GetWindow((int)Window.WINDOW_DIALOG_PROGRESS); var pDlgOK = (GUIDialogOK)GUIWindowManager.GetWindow((int)Window.WINDOW_DIALOG_OK); var errorEncountered = true; var album = new AlbumInfo(); var albumInfo = new MusicAlbumInfo(); if (m_database.GetAlbumInfo(albumName, artistName, ref album)) { // we already have album info in database so just use that albumInfo.Set(album); errorEncountered = false; } else {// lookup details. start with artist if (null != pDlgOK && !Win32API.IsConnectedToInternet()) { pDlgOK.SetHeading(703); pDlgOK.SetLine(1, 703); pDlgOK.SetLine(2, string.Empty); pDlgOK.DoModal(GetID); return; } // show dialog box indicating we're searching the album if (dlgProgress != null) { dlgProgress.Reset(); dlgProgress.SetHeading(326); dlgProgress.SetLine(1, albumName); dlgProgress.SetLine(2, artistName); dlgProgress.SetPercentage(0); dlgProgress.StartModal(GetID); dlgProgress.Progress(); dlgProgress.ShowProgressBar(true); } var scraper = new AllmusicSiteScraper(); List<AllMusicArtistMatch> artists; var selectedMatch = new AllMusicArtistMatch(); if (scraper.GetArtists(artistName, out artists)) { if (null != dlgProgress) { dlgProgress.SetPercentage(20); dlgProgress.Progress(); } if (artists.Count == 1) { // only have single match so no need to ask user Log.Debug("Single Artist Match Found"); selectedMatch = artists[0]; } else { // need to get user to choose which one to use Log.Debug("Muliple Artist Match Found ({0}) prompting user", artists.Count); var pDlg = (GUIDialogSelect2) GUIWindowManager.GetWindow((int) Window.WINDOW_DIALOG_SELECT2); if (null != pDlg) { pDlg.Reset(); pDlg.SetHeading(GUILocalizeStrings.Get(1303)); foreach (var i in artists.Select(artistMatch => new GUIListItem { Label = artistMatch.Artist + " - " + artistMatch.Genre, Label2 = artistMatch.YearsActive, Path = artistMatch.ArtistUrl, IconImage = artistMatch.ImageUrl })) { pDlg.Add(i); } pDlg.DoModal(GetID); // and wait till user selects one var iSelectedMatch = pDlg.SelectedLabel; if (iSelectedMatch < 0) { return; } selectedMatch = artists[iSelectedMatch]; } if (null != dlgProgress) { dlgProgress.Reset(); dlgProgress.SetHeading(326); dlgProgress.SetLine(1, albumName); dlgProgress.SetLine(2, artistName); dlgProgress.SetPercentage(40); dlgProgress.StartModal(GetID); dlgProgress.ShowProgressBar(true); dlgProgress.Progress(); } } string strAlbumHtml; if (scraper.GetAlbumHtml(albumName, selectedMatch.ArtistUrl, out strAlbumHtml)) { if (null != dlgProgress) { dlgProgress.SetPercentage(60); dlgProgress.Progress(); } if (albumInfo.Parse(strAlbumHtml)) { if (null != dlgProgress) { dlgProgress.SetPercentage(80); dlgProgress.Progress(); } m_database.AddAlbumInfo(albumInfo.Get()); errorEncountered = false; } } } } if (null != dlgProgress) { dlgProgress.SetPercentage(100); dlgProgress.Progress(); dlgProgress.Close(); dlgProgress = null; } if (!errorEncountered) { var pDlgAlbumInfo = (GUIMusicInfo)GUIWindowManager.GetWindow((int)Window.WINDOW_MUSIC_INFO); if (null != pDlgAlbumInfo) { pDlgAlbumInfo.Album = albumInfo; pDlgAlbumInfo.Tag = tag; pDlgAlbumInfo.DoModal(parentWindowID); if (pDlgAlbumInfo.NeedsRefresh) { m_database.DeleteAlbumInfo(albumName, artistName); ShowAlbumInfo(parentWindowID, artistName, albumName, strPath, tag); return; } } } else { Log.Debug("No Album Found"); if (null != pDlgOK) { pDlgOK.SetHeading(187); pDlgOK.SetLine(1, 187); pDlgOK.SetLine(2, string.Empty); pDlgOK.DoModal(GetID); } } }
/// <summary> /// Parse the Detail Page returned from the Allmusic Scraper /// </summary> /// <param name="strUrl">URL of artist details page</param> /// <returns>True is scrape was sucessful</returns> public bool Parse(string strUrl) { var mainPage = new HtmlWeb().Load(strUrl); // moods var moods = string.Empty; var moodNodes = mainPage.DocumentNode.SelectNodes(@"//section[@class=""moods""]/ul/*"); if (moodNodes != null) { moods = moodNodes.Aggregate(moods, (current, mood) => current + (AllmusicSiteScraper.CleanInnerText(mood) + ", ")); moods = moods.TrimEnd(new[] { ',', ' ' }); } // artist name var artistName = AllmusicSiteScraper.CleanInnerText(mainPage.DocumentNode.SelectSingleNode(@"//h2[@clas=""artist-name""]")); // artist image URL var artistImg = AllmusicSiteScraper.CleanAttribute(mainPage.DocumentNode.SelectSingleNode(@"//div[@class=""artist-image""]/img"), "src"); //years active var yearsActive = AllmusicSiteScraper.CleanInnerText(mainPage.DocumentNode.SelectSingleNode(@"//section[@class=""basic-info""]/div[@class=""active-dates""]/div")); //genre var genres = string.Empty; var genreNodes = mainPage.DocumentNode.SelectNodes(@"//section[@class=""basic-info""]/div[@class=""genre""]/div/a"); if (genreNodes != null) { genres = genreNodes.Aggregate(genres, (current, genre) => current + (AllmusicSiteScraper.CleanInnerText(genre) + ", ")); genres = genres.TrimEnd(new[] { ',', ' ' }); // remove trailing ", " } // born / formed var born = AllmusicSiteScraper.CleanInnerText(mainPage.DocumentNode.SelectSingleNode(@"//section[@class=""basic-info""]/div[@class=""birth""]/div")); // styles var styles = string.Empty; var styleNodes = mainPage.DocumentNode.SelectNodes(@"//section[@class=""basic-info""]/div[@class=""styles""]/div/a"); if (styleNodes != null) { styles = styleNodes.Aggregate(styles, (current, style) => current + (AllmusicSiteScraper.CleanInnerText(style) + ", ")); styles = styles.TrimEnd(new[] { ',', ' ' }); // remove trailing ", " } // bio var bio = string.Empty; var bioURL = "http://www.allmusic.com/" + AllmusicSiteScraper.CleanAttribute(mainPage.DocumentNode.SelectSingleNode(@"//ul[@class=""tabs overview""]/li[@class=""tab biography""]/a"), "href"); if (!string.IsNullOrEmpty(bioURL)) { var bioPage = new HtmlWeb().Load(bioURL); bio = AllmusicSiteScraper.CleanInnerText(bioPage.DocumentNode.SelectSingleNode(@"//section[@class=""biography""]/div[@class=""text""]")); } // albums var albumList = string.Empty; var albumPageURL = "http://www.allmusic.com/" + AllmusicSiteScraper.CleanAttribute(mainPage.DocumentNode.SelectSingleNode(@"//ul[@class=""tabs overview""]/li[@class=""tab discography""]/a"), "href"); if (!string.IsNullOrEmpty(albumPageURL)) { var albumPage = new HtmlWeb().Load(albumPageURL); var albums = albumPage.DocumentNode.SelectNodes(@"//section[@class=""discography""]/table/tbody/tr"); if (albums != null) { foreach (var album in albums) { var year = AllmusicSiteScraper.CleanInnerText(album.SelectSingleNode(@"td[@class=""year""]")); var title = AllmusicSiteScraper.CleanInnerText(album.SelectSingleNode(@"td[@class=""title""]/a")); var label = AllmusicSiteScraper.CleanInnerText(album.SelectSingleNode(@"td[@class=""label""]")); albumList += year + " - " + title + " (" + label + ")" + Environment.NewLine; } } } var artistInfo = new ArtistInfo { AMGBio = bio, Albums = albumList, Artist = artistName, Born = born, Compilations = string.Empty, Genres = genres, Image = artistImg, Instruments = string.Empty, Misc = string.Empty, Singles = string.Empty, Styles = styles, Tones = moods, YearsActive = yearsActive }; Set(artistInfo); _bLoaded = true; return(_bLoaded); }
protected virtual void ShowArtistInfo(string artistName, string albumName) { Log.Debug("Looking up Artist: {0}", albumName); var dlgProgress = (GUIDialogProgress)GUIWindowManager.GetWindow((int)Window.WINDOW_DIALOG_PROGRESS); var pDlgOK = (GUIDialogOK)GUIWindowManager.GetWindow((int)Window.WINDOW_DIALOG_OK); var errorEncountered = true; var artist = new ArtistInfo(); var artistInfo = new MusicArtistInfo(); if (m_database.GetArtistInfo(artistName, ref artist)) { // we already have artist info in database so just use that artistInfo.Set(artist); errorEncountered = false; } else { // lookup artist details if (null != pDlgOK && !Win32API.IsConnectedToInternet()) { pDlgOK.SetHeading(703); pDlgOK.SetLine(1, 703); pDlgOK.SetLine(2, string.Empty); pDlgOK.DoModal(GetID); return; } // show dialog box indicating we're searching the artist if (dlgProgress != null) { dlgProgress.Reset(); dlgProgress.SetHeading(320); dlgProgress.SetLine(1, artistName); dlgProgress.SetLine(2, string.Empty); dlgProgress.SetPercentage(0); dlgProgress.StartModal(GetID); dlgProgress.Progress(); dlgProgress.ShowProgressBar(true); } var scraper = new AllmusicSiteScraper(); List<AllMusicArtistMatch> artists; if (scraper.GetArtists(artistName, out artists)) { var selectedMatch = new AllMusicArtistMatch(); if (artists.Count == 1) { // only have single match so no need to ask user Log.Debug("Single Artist Match Found"); selectedMatch = artists[0]; errorEncountered = false; } else { // need to get user to choose which one to use Log.Debug("Muliple Artist Match Found ({0}) prompting user", artists.Count); var pDlg = (GUIDialogSelect2)GUIWindowManager.GetWindow((int)Window.WINDOW_DIALOG_SELECT2); if (null != pDlg) { pDlg.Reset(); pDlg.SetHeading(GUILocalizeStrings.Get(1303)); foreach (var i in artists.Select(artistMatch => new GUIListItem { Label = artistMatch.Artist + " - " + artistMatch.Genre, Label2 = artistMatch.YearsActive, Path = artistMatch.ArtistUrl, IconImage = artistMatch.ImageUrl })) { pDlg.Add(i); } pDlg.DoModal(GetID); // and wait till user selects one var iSelectedMatch = pDlg.SelectedLabel; if (iSelectedMatch < 0) { return; } selectedMatch = artists[iSelectedMatch]; } if (null != dlgProgress) { dlgProgress.Reset(); dlgProgress.SetHeading(320); dlgProgress.SetLine(1, artistName); dlgProgress.SetLine(2, string.Empty); dlgProgress.SetPercentage(40); dlgProgress.StartModal(GetID); dlgProgress.ShowProgressBar(true); dlgProgress.Progress(); } } if (null != dlgProgress) { dlgProgress.SetPercentage(60); dlgProgress.Progress(); } if (artistInfo.Parse(selectedMatch.ArtistUrl)) { if (null != dlgProgress) { dlgProgress.SetPercentage(80); dlgProgress.Progress(); } // set values to actual artist to ensure they match track data // rather than values that might be returned from allmusic.com artistInfo.Artist = artistName; m_database.AddArtistInfo(artistInfo.Get()); errorEncountered = false; } } } if (null != dlgProgress) { dlgProgress.SetPercentage(100); dlgProgress.Progress(); dlgProgress.Close(); dlgProgress = null; } if (!errorEncountered) { var pDlgArtistInfo = (GUIMusicArtistInfo)GUIWindowManager.GetWindow((int)Window.WINDOW_ARTIST_INFO); if (null != pDlgArtistInfo) { pDlgArtistInfo.Artist = artistInfo; pDlgArtistInfo.DoModal(GetID); if (pDlgArtistInfo.NeedsRefresh) { m_database.DeleteArtistInfo(artistInfo.Artist); ShowArtistInfo(artistName, albumName); } } } else { Log.Debug("Unable to get artist details"); if (null != pDlgOK) { pDlgOK.SetHeading(702); pDlgOK.SetLine(1, 702); pDlgOK.SetLine(2, string.Empty); pDlgOK.DoModal(GetID); } } }
/// <summary> /// Take URL of an album details page and scrape details /// </summary> /// <param name="strUrl">URL of album details page</param> /// <returns>True if scrape was successful</returns> public bool Parse(string strUrl) { var albumPage = new HtmlWeb().Load(strUrl); // artist var strAlbumArtist = AllmusicSiteScraper.CleanInnerText(albumPage.DocumentNode.SelectSingleNode(@"//h3[@class=""album-artist""]/span/a")); // album var strAlbum = AllmusicSiteScraper.CleanInnerText(albumPage.DocumentNode.SelectSingleNode(@"//h2[@class=""album-title""]")); // Image URL var imgURL = AllmusicSiteScraper.CleanAttribute( albumPage.DocumentNode.SelectSingleNode(@"//div[@class=""album-cover""]/div[@class=""album-contain""]/img"), "src"); // Rating var iRating = 0; var ratingMatch = AllmusicSiteScraper.CleanInnerText(albumPage.DocumentNode.SelectSingleNode(@"//div[starts-with(@class,""allmusic-rating rating-allmusic"")]")); int.TryParse(ratingMatch, out iRating); // year var iYear = 0; var yearMatch = AllmusicSiteScraper.CleanInnerText(albumPage.DocumentNode.SelectSingleNode(@"//div[@class=""release-date""]/span")); yearMatch = Regex.Replace(yearMatch, @".*(\d{4})", @"$1"); int.TryParse(yearMatch, out iYear); // review var strReview = AllmusicSiteScraper.CleanInnerText(albumPage.DocumentNode.SelectSingleNode(@"//div[@itemprop=""reviewBody""]")); // build up track listing into one string var strTracks = string.Empty; var trackNodes = albumPage.DocumentNode.SelectNodes(@"//tr[@itemprop=""track""]"); if (trackNodes != null) { foreach (var track in trackNodes) { var trackNo = AllmusicSiteScraper.CleanInnerText(track.SelectSingleNode(@"td[@class=""tracknum""]")); var title = AllmusicSiteScraper.CleanInnerText( track.SelectSingleNode(@"td[@class=""title-composer""]/div[@class=""title""]/a")); var strDuration = AllmusicSiteScraper.CleanInnerText(track.SelectSingleNode(@"td[@class=""time""]")); var iDuration = 0; var iPos = strDuration.IndexOf(":", StringComparison.Ordinal); if (iPos >= 0) { var strMin = strDuration.Substring(0, iPos); var strSec = strDuration.Substring(iPos + 1); int iMin = 0, iSec = 0; Int32.TryParse(strMin, out iMin); Int32.TryParse(strSec, out iSec); iDuration = (iMin * 60) + iSec; } strTracks += trackNo + "@" + title + "@" + iDuration.ToString(CultureInfo.InvariantCulture) + "|"; } } // genres var strGenres = string.Empty; var genreNodes = albumPage.DocumentNode.SelectNodes(@"//section[@class=""basic-info""]/div[@class=""genre""]/div/a"); if (genreNodes != null) { strGenres = genreNodes.Aggregate(strGenres, (current, genre) => current + (AllmusicSiteScraper.CleanInnerText(genre) + ", ")); strGenres = strGenres.TrimEnd(new[] { ',', ' ' }); // remove trailing ", " } // build up styles into one string var strThemes = string.Empty; var themeNodes = albumPage.DocumentNode.SelectNodes(@"//section[@class=""themes""]/div/span[@class=""theme""]/a"); if (themeNodes != null) { strThemes = themeNodes.Aggregate(strThemes, (current, theme) => current + (AllmusicSiteScraper.CleanInnerText(theme) + ", ")); strThemes = strThemes.TrimEnd(new[] { ',', ' ' }); // remove trailing ", " } // build up moods into one string var strMoods = string.Empty; var moodNodes = albumPage.DocumentNode.SelectNodes(@"//section[@class=""moods""]/div/span[@class=""mood""]/a"); if (moodNodes != null) { strMoods = moodNodes.Aggregate(strMoods, (current, mood) => current + (AllmusicSiteScraper.CleanInnerText(mood) + ", ")); strMoods = strMoods.TrimEnd(new[] { ',', ' ' }); // remove trailing ", " } var album = new AlbumInfo { Album = strAlbum, Artist = strAlbumArtist, Genre = strGenres, Tones = strMoods, Styles = strThemes, Review = strReview, Image = imgURL, Rating = iRating, Tracks = strTracks, AlbumArtist = strAlbumArtist, Year = iYear }; Set(album); Loaded = true; return(true); }
public void ShowAlbumInfo(int parentWindowID, bool isFolder, string artistName, string albumName, string strPath, MusicTag tag) { GUIDialogOK pDlgOK = (GUIDialogOK)GUIWindowManager.GetWindow((int)Window.WINDOW_DIALOG_OK); GUIDialogProgress dlgProgress = (GUIDialogProgress)GUIWindowManager.GetWindow((int)Window.WINDOW_DIALOG_PROGRESS); bool bDisplayErr = false; AlbumInfo album = new AlbumInfo(); MusicAlbumInfo albumInfo = new MusicAlbumInfo(); if (m_database.GetAlbumInfo(albumName, artistName, ref album)) { albumInfo.Set(album); } else { if (null != pDlgOK && !Win32API.IsConnectedToInternet()) { pDlgOK.SetHeading(703); pDlgOK.SetLine(1, 703); pDlgOK.SetLine(2, string.Empty); pDlgOK.DoModal(GetID); return; } // show dialog box indicating we're searching the album if (dlgProgress != null) { dlgProgress.Reset(); dlgProgress.SetHeading(185); dlgProgress.SetLine(1, albumName); dlgProgress.SetLine(2, artistName); dlgProgress.SetLine(3, tag.Year.ToString()); dlgProgress.SetPercentage(0); //dlgProgress.StartModal(GetID); dlgProgress.StartModal(parentWindowID); dlgProgress.ShowProgressBar(true); dlgProgress.Progress(); } // find album info AllmusicSiteScraper scraper = new AllmusicSiteScraper(); if (scraper.FindAlbumInfo(albumName, artistName, tag.Year)) { if (dlgProgress != null) { dlgProgress.SetPercentage(30); dlgProgress.Progress(); dlgProgress.Close(); } // Did we find multiple albums? int iSelectedAlbum = 0; if (scraper.IsMultiple()) { string szText = GUILocalizeStrings.Get(181); GUIDialogSelect pDlg = (GUIDialogSelect)GUIWindowManager.GetWindow((int)Window.WINDOW_DIALOG_SELECT); if (null != pDlg) { pDlg.Reset(); pDlg.SetHeading(szText); foreach (MusicAlbumInfo foundAlbum in scraper.GetAlbumsFound()) { pDlg.Add(string.Format("{0} - {1}", foundAlbum.Title, foundAlbum.Artist)); } pDlg.DoModal(parentWindowID); // and wait till user selects one iSelectedAlbum = pDlg.SelectedLabel; if (iSelectedAlbum < 0) { return; } } // ok, now show dialog we're downloading the album info MusicAlbumInfo selectedAlbum = scraper.GetAlbumsFound()[iSelectedAlbum]; if (null != dlgProgress) { dlgProgress.Reset(); dlgProgress.SetHeading(185); dlgProgress.SetLine(1, selectedAlbum.Title2); dlgProgress.SetLine(2, selectedAlbum.Artist); dlgProgress.StartModal(parentWindowID); dlgProgress.ShowProgressBar(true); dlgProgress.SetPercentage(40); dlgProgress.Progress(); } if (!scraper.FindInfoByIndex(iSelectedAlbum)) { if (null != dlgProgress) { dlgProgress.Close(); } return; } } if (null != dlgProgress) { dlgProgress.SetPercentage(60); dlgProgress.Progress(); } // Now we have either a Single hit or a selected Artist // Parse it if (albumInfo.Parse(scraper.GetHtmlContent())) { if (null != dlgProgress) { dlgProgress.SetPercentage(80); dlgProgress.Progress(); } // set album title and artist from musicinfotag, not the one we got from allmusic.com albumInfo.Title = albumName; albumInfo.Artist = artistName; // set path, needed to store album in database albumInfo.AlbumPath = strPath; album = new AlbumInfo(); album.Album = albumInfo.Title; album.Artist = albumInfo.Artist; album.Genre = albumInfo.Genre; album.Tones = albumInfo.Tones; album.Styles = albumInfo.Styles; album.Review = albumInfo.Review; album.Image = albumInfo.ImageURL; album.Rating = albumInfo.Rating; album.Tracks = albumInfo.Tracks; try { album.Year = Int32.Parse(albumInfo.DateOfRelease); } catch (Exception) {} // save to database m_database.AddAlbumInfo(album); if (null != dlgProgress) { dlgProgress.SetPercentage(100); dlgProgress.Progress(); dlgProgress.Close(); } if (isFolder) { // if there's an album thumb string thumb = Util.Utils.GetAlbumThumbName(albumInfo.Artist, albumInfo.Title); // use the better one thumb = Util.Utils.ConvertToLargeCoverArt(thumb); // to create a folder.jpg from it if (Util.Utils.FileExistsInCache(thumb) && _createMissingFolderThumbs) { try { string folderjpg = Util.Utils.GetFolderThumbForDir(strPath); Util.Utils.FileDelete(folderjpg); File.Copy(thumb, folderjpg); } catch (Exception) {} } } } else { bDisplayErr = true; } } else { bDisplayErr = true; } } if (null != dlgProgress) { dlgProgress.Close(); } if (!bDisplayErr) { GUIMusicInfo pDlgAlbumInfo = (GUIMusicInfo)GUIWindowManager.GetWindow((int)Window.WINDOW_MUSIC_INFO); if (null != pDlgAlbumInfo) { pDlgAlbumInfo.Album = albumInfo; pDlgAlbumInfo.Tag = tag; pDlgAlbumInfo.DoModal(parentWindowID); if (pDlgAlbumInfo.NeedsRefresh) { m_database.DeleteAlbumInfo(albumName, artistName); ShowAlbumInfo(isFolder, artistName, albumName, strPath, tag); return; } } else { if (null != dlgProgress) { dlgProgress.Close(); } if (null != pDlgOK) { pDlgOK.SetHeading(702); pDlgOK.SetLine(1, 702); pDlgOK.SetLine(2, string.Empty); pDlgOK.DoModal(GetID); } } } }
protected virtual void ShowArtistInfo(string artistName, string albumName) { GUIDialogOK pDlgOK = (GUIDialogOK)GUIWindowManager.GetWindow((int)Window.WINDOW_DIALOG_OK); GUIDialogProgress dlgProgress = (GUIDialogProgress)GUIWindowManager.GetWindow((int)Window.WINDOW_DIALOG_PROGRESS); bool bSaveDb = true; bool bDisplayErr = false; ArtistInfo artist = new ArtistInfo(); MusicArtistInfo artistInfo = new MusicArtistInfo(); if (m_database.GetArtistInfo(artistName, ref artist)) { artistInfo.Set(artist); } else { if (null != pDlgOK && !Win32API.IsConnectedToInternet()) { pDlgOK.SetHeading(703); pDlgOK.SetLine(1, 703); pDlgOK.SetLine(2, string.Empty); pDlgOK.DoModal(GetID); return; } // show dialog box indicating we're searching the artist if (dlgProgress != null) { dlgProgress.Reset(); dlgProgress.SetHeading(320); dlgProgress.SetLine(1, artistName); dlgProgress.SetLine(2, string.Empty); dlgProgress.SetPercentage(0); dlgProgress.StartModal(GetID); dlgProgress.Progress(); dlgProgress.ShowProgressBar(true); } // find artist info AllmusicSiteScraper scraper = new AllmusicSiteScraper(); if (scraper.FindInfo(AllmusicSiteScraper.SearchBy.Artists, artistName)) { // did we find at least 1 artist? if (scraper.IsMultiple()) { // let user choose one int iSelectedArtist = 0; string szText = GUILocalizeStrings.Get(181); GUIDialogSelect pDlg = (GUIDialogSelect)GUIWindowManager.GetWindow((int)Window.WINDOW_DIALOG_SELECT); if (null != pDlg) { pDlg.Reset(); pDlg.SetHeading(szText); foreach (string selectedArtist in scraper.GetItemsFound()) { pDlg.Add(selectedArtist); } pDlg.DoModal(GetID); // and wait till user selects one iSelectedArtist = pDlg.SelectedLabel; if (iSelectedArtist < 0) { return; } } // ok, now show dialog we're downloading the artist info if (null != dlgProgress) { dlgProgress.Reset(); dlgProgress.SetHeading(320); dlgProgress.SetLine(1, artistName); dlgProgress.SetLine(2, string.Empty); dlgProgress.SetPercentage(40); dlgProgress.StartModal(GetID); dlgProgress.ShowProgressBar(true); dlgProgress.Progress(); } // download the artist info if (!scraper.FindInfoByIndex(iSelectedArtist)) { if (null != dlgProgress) { dlgProgress.Close(); } return; } } if (null != dlgProgress) { dlgProgress.SetPercentage(60); dlgProgress.Progress(); } // Now we have either a Single hit or a selected Artist // Parse it if (artistInfo.Parse(scraper.GetHtmlContent())) { // if the artist selected from allmusic.com does not match // the one from the file, override the one from the allmusic // with the one from the file so the info is correct in the // database... if (!artistInfo.Artist.Equals(artistName)) { artistInfo.Artist = artistName; } if (bSaveDb) { // save to database m_database.AddArtistInfo(artistInfo.Get()); } if (null != dlgProgress) { dlgProgress.SetPercentage(100); dlgProgress.Progress(); dlgProgress.Close(); dlgProgress = null; } } else { bDisplayErr = true; } } else { bDisplayErr = true; } } if (null != dlgProgress) { dlgProgress.Close(); } if (!bDisplayErr) { // ok, show Artist info GUIMusicArtistInfo pDlgArtistInfo = (GUIMusicArtistInfo)GUIWindowManager.GetWindow((int)Window.WINDOW_ARTIST_INFO); if (null != pDlgArtistInfo) { pDlgArtistInfo.Artist = artistInfo; pDlgArtistInfo.DoModal(GetID); if (pDlgArtistInfo.NeedsRefresh) { m_database.DeleteArtistInfo(artistInfo.Artist); ShowArtistInfo(artistName, albumName); return; } } } else { if (null != dlgProgress) { dlgProgress.Close(); } if (null != pDlgOK) { pDlgOK.SetHeading(702); pDlgOK.SetLine(1, 702); pDlgOK.SetLine(2, string.Empty); pDlgOK.DoModal(GetID); } } }