public bool Parse(string html) { _songs.Clear(); HTMLUtil util = new HTMLUtil(); string strHtmlLow = html.ToLower(); int begIndex = 0; int endIndex = 0; // Extract Cover URL string pattern = @"<!--Begin.*?Album.*?Photo-->\s*?.*?<img.*?src=\""(.*?)\"""; if (FindPattern(pattern, html)) { _strImageURL = _match.Groups[1].Value; } // Extract Review pattern = @"<td.*?class=""tab_off""><a.*?href=""(.*?)"">.*?Review.*?</a>"; if (FindPattern(pattern, html)) { try { string contentinfo = AllmusicSiteScraper.GetHTTP(_match.Groups[1].Value); pattern = @"<p.*?class=""author"">.*\s*?.*?<p.*?class=""text"">(.*?)</p>"; if (FindPattern(pattern, contentinfo)) { string data = _match.Groups[1].Value; util.RemoveTags(ref data); util.ConvertHTMLToAnsi(data, out data); _strReview = data.Trim(); } } catch (Exception) {} } // Extract Artist pattern = @"<h3.*?artist</h3>\s*?.*?<a.*"">(.*)</a>"; if (FindPattern(pattern, html)) { _artist = _match.Groups[1].Value; util.RemoveTags(ref _artist); } // Extract Album pattern = @"<h3.*?album</h3>\s*?.*?<p>(.*)</P>"; if (FindPattern(pattern, html)) { _strTitle = _match.Groups[1].Value; util.RemoveTags(ref _strTitle); } // Extract Rating pattern = @"<h3.*?rating</h3>\s*?.*?src=""(.*?)"""; if (FindPattern(pattern, html)) { string strRating = _match.Groups[1].Value; util.RemoveTags(ref strRating); strRating = strRating.Substring(26, 1); try { _iRating = Int32.Parse(strRating); } catch (Exception) {} } // Release Date pattern = @"<h3.*?release.*?date</h3>\s*?.*?<p>(.*)</P>"; if (FindPattern(pattern, html)) { _strDateOfRelease = _match.Groups[1].Value; util.RemoveTags(ref _strDateOfRelease); // extract the year out of something like "1998 (release)" or "12 feb 2003" int nPos = _strDateOfRelease.IndexOf("19"); if (nPos > -1) { if ((int)_strDateOfRelease.Length >= nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) && Char.IsDigit(_strDateOfRelease[nPos + 3])) { string strYear = _strDateOfRelease.Substring(nPos, 4); _strDateOfRelease = strYear; } else { nPos = _strDateOfRelease.IndexOf("19", nPos + 2); if (nPos > -1) { if ((int)_strDateOfRelease.Length >= nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) && Char.IsDigit(_strDateOfRelease[nPos + 3])) { string strYear = _strDateOfRelease.Substring(nPos, 4); _strDateOfRelease = strYear; } } } } nPos = _strDateOfRelease.IndexOf("20"); if (nPos > -1) { if ((int)_strDateOfRelease.Length > nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) && Char.IsDigit(_strDateOfRelease[nPos + 3])) { string strYear = _strDateOfRelease.Substring(nPos, 4); _strDateOfRelease = strYear; } else { nPos = _strDateOfRelease.IndexOf("20", nPos + 1); if (nPos > -1) { if ((int)_strDateOfRelease.Length > nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) && Char.IsDigit(_strDateOfRelease[nPos + 3])) { string strYear = _strDateOfRelease.Substring(nPos, 4); _strDateOfRelease = strYear; } } } } } // Extract Genre begIndex = strHtmlLow.IndexOf("<h3>genre</h3>"); endIndex = strHtmlLow.IndexOf("</div>", begIndex + 2); if (begIndex != -1 && endIndex != -1) { string contentInfo = html.Substring(begIndex, endIndex - begIndex); pattern = @"(<li>(.*?)</li>)"; if (FindPattern(pattern, contentInfo)) { string data = ""; while (_match.Success) { data += string.Format("{0}, ", _match.Groups[2].Value); _match = _match.NextMatch(); } util.RemoveTags(ref data); util.ConvertHTMLToAnsi(data, out _strGenre); _strGenre = _strGenre.Trim(new[] { ' ', ',' }); } } // Extract Styles begIndex = strHtmlLow.IndexOf("<h3>style</h3>"); endIndex = strHtmlLow.IndexOf("</div>", begIndex + 2); if (begIndex != -1 && endIndex != -1) { string contentInfo = html.Substring(begIndex, endIndex - begIndex); pattern = @"(<li>(.*?)</li>)"; if (FindPattern(pattern, contentInfo)) { string data = ""; while (_match.Success) { data += string.Format("{0}, ", _match.Groups[2].Value); _match = _match.NextMatch(); } util.RemoveTags(ref data); util.ConvertHTMLToAnsi(data, out _strStyles); _strStyles = _strStyles.Trim(new[] { ' ', ',' }); } } // Extract Moods begIndex = strHtmlLow.IndexOf("<h3>moods</h3>"); endIndex = strHtmlLow.IndexOf("</div>", begIndex + 2); if (begIndex != -1 && endIndex != -1) { string contentInfo = html.Substring(begIndex, endIndex - begIndex); pattern = @"(<li>(.*?)</li>)"; if (FindPattern(pattern, contentInfo)) { string data = ""; while (_match.Success) { data += string.Format("{0}, ", _match.Groups[2].Value); _match = _match.NextMatch(); } util.RemoveTags(ref data); util.ConvertHTMLToAnsi(data, out _strTones); _strTones = _strTones.Trim(new[] { ' ', ',' }); } } // Extract Songs begIndex = strHtmlLow.IndexOf("<!-- tracks table -->"); endIndex = strHtmlLow.IndexOf("<!-- end tracks table -->", begIndex + 2); if (begIndex != -1 && endIndex != -1) { string contentInfo = html.Substring(begIndex, endIndex - begIndex); pattern = @"<tr.*class=""visible"".*?\s*?<td.*</td>\s*?.*<td.*</td>\s*?.*<td.*?>(?<track>.*)</td>" + @"\s*?.*<td.*</td>\s*?.*<td.*?>(?<title>.*)</td>\s*?.*?<td.*?>\s*?.*</td>\s*?.*?<td.*?>(?<duration>.*)</td>"; if (FindPattern(pattern, contentInfo)) { while (_match.Success) { // Tracknumber int iTrack = 0; try { iTrack = Int32.Parse(_match.Groups["track"].Value); } catch (Exception) {} // Song Title string strTitle = _match.Groups["title"].Value; util.RemoveTags(ref strTitle); util.ConvertHTMLToAnsi(strTitle, out strTitle); // Duration int iDuration = 0; string strDuration = _match.Groups["duration"].Value; int iPos = strDuration.IndexOf(":"); if (iPos >= 0) { string strMin, strSec; strMin = strDuration.Substring(0, iPos); iPos++; strSec = strDuration.Substring(iPos); int iMin = 0, iSec = 0; try { iMin = Int32.Parse(strMin); iSec = Int32.Parse(strSec); } catch (Exception) {} iDuration = iMin * 60 + iSec; } // Create new song object MusicSong newSong = new MusicSong(); newSong.Track = iTrack; newSong.SongName = strTitle; newSong.Duration = iDuration; _songs.Add(newSong); _match = _match.NextMatch(); } } } // Set to "Not available" if no value from web if (_artist.Length == 0) { _artist = GUILocalizeStrings.Get(416); } if (_strDateOfRelease.Length == 0) { _strDateOfRelease = GUILocalizeStrings.Get(416); } if (_strGenre.Length == 0) { _strGenre = GUILocalizeStrings.Get(416); } if (_strTones.Length == 0) { _strTones = GUILocalizeStrings.Get(416); } if (_strStyles.Length == 0) { _strStyles = GUILocalizeStrings.Get(416); } if (_strTitle.Length == 0) { _strTitle = GUILocalizeStrings.Get(416); } if (_strTitle2.Length == 0) { _strTitle2 = _strTitle; } Loaded = true; return(true); }
public bool Parse(string html) { _songs.Clear(); HTMLUtil util = new HTMLUtil(); string strHtmlLow = html.ToLower(); int begIndex = 0; int endIndex = 0; // Extract Cover URL string pattern = @"<!--Begin.*?Album.*?Photo-->\s*?.*?<img.*?src=\""(.*?)\"""; if (FindPattern(pattern, html)) { _strImageURL = _match.Groups[1].Value; } // Extract Review pattern = @"<td.*?class=""tab_off""><a.*?href=""(.*?)"">.*?Review.*?</a>"; if (FindPattern(pattern, html)) { try { string contentinfo = AllmusicSiteScraper.GetHTTP(_match.Groups[1].Value); pattern = @"<p.*?class=""author"">.*\s*?.*?<p.*?class=""text"">(.*?)</p>"; if (FindPattern(pattern, contentinfo)) { string data = _match.Groups[1].Value; util.RemoveTags(ref data); util.ConvertHTMLToAnsi(data, out data); _strReview = data.Trim(); } } catch (Exception) {} } // Extract Artist pattern = @"<h3.*?artist</h3>\s*?.*?<a.*"">(.*)</a>"; if (FindPattern(pattern, html)) { _artist = _match.Groups[1].Value; util.RemoveTags(ref _artist); } // Extract Album pattern = @"<h3.*?album</h3>\s*?.*?<p>(.*)</P>"; if (FindPattern(pattern, html)) { _strTitle = _match.Groups[1].Value; util.RemoveTags(ref _strTitle); } // Extract Rating pattern = @"<h3.*?rating</h3>\s*?.*?src=""(.*?)"""; if (FindPattern(pattern, html)) { string strRating = _match.Groups[1].Value; util.RemoveTags(ref strRating); strRating = strRating.Substring(26, 1); try { _iRating = Int32.Parse(strRating); } catch (Exception) {} } // Release Date pattern = @"<h3.*?release.*?date</h3>\s*?.*?<p>(.*)</P>"; if (FindPattern(pattern, html)) { _strDateOfRelease = _match.Groups[1].Value; util.RemoveTags(ref _strDateOfRelease); // extract the year out of something like "1998 (release)" or "12 feb 2003" int nPos = _strDateOfRelease.IndexOf("19"); if (nPos > -1) { if ((int)_strDateOfRelease.Length >= nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) && Char.IsDigit(_strDateOfRelease[nPos + 3])) { string strYear = _strDateOfRelease.Substring(nPos, 4); _strDateOfRelease = strYear; } else { nPos = _strDateOfRelease.IndexOf("19", nPos + 2); if (nPos > -1) { if ((int)_strDateOfRelease.Length >= nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) && Char.IsDigit(_strDateOfRelease[nPos + 3])) { string strYear = _strDateOfRelease.Substring(nPos, 4); _strDateOfRelease = strYear; } } } } nPos = _strDateOfRelease.IndexOf("20"); if (nPos > -1) { if ((int)_strDateOfRelease.Length > nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) && Char.IsDigit(_strDateOfRelease[nPos + 3])) { string strYear = _strDateOfRelease.Substring(nPos, 4); _strDateOfRelease = strYear; } else { nPos = _strDateOfRelease.IndexOf("20", nPos + 1); if (nPos > -1) { if ((int)_strDateOfRelease.Length > nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) && Char.IsDigit(_strDateOfRelease[nPos + 3])) { string strYear = _strDateOfRelease.Substring(nPos, 4); _strDateOfRelease = strYear; } } } } } // Extract Genre begIndex = strHtmlLow.IndexOf("<h3>genre</h3>"); endIndex = strHtmlLow.IndexOf("</div>", begIndex + 2); if (begIndex != -1 && endIndex != -1) { string contentInfo = html.Substring(begIndex, endIndex - begIndex); pattern = @"(<li>(.*?)</li>)"; if (FindPattern(pattern, contentInfo)) { string data = ""; while (_match.Success) { data += string.Format("{0}, ", _match.Groups[2].Value); _match = _match.NextMatch(); } util.RemoveTags(ref data); util.ConvertHTMLToAnsi(data, out _strGenre); _strGenre = _strGenre.Trim(new[] {' ', ','}); } } // Extract Styles begIndex = strHtmlLow.IndexOf("<h3>style</h3>"); endIndex = strHtmlLow.IndexOf("</div>", begIndex + 2); if (begIndex != -1 && endIndex != -1) { string contentInfo = html.Substring(begIndex, endIndex - begIndex); pattern = @"(<li>(.*?)</li>)"; if (FindPattern(pattern, contentInfo)) { string data = ""; while (_match.Success) { data += string.Format("{0}, ", _match.Groups[2].Value); _match = _match.NextMatch(); } util.RemoveTags(ref data); util.ConvertHTMLToAnsi(data, out _strStyles); _strStyles = _strStyles.Trim(new[] {' ', ','}); } } // Extract Moods begIndex = strHtmlLow.IndexOf("<h3>moods</h3>"); endIndex = strHtmlLow.IndexOf("</div>", begIndex + 2); if (begIndex != -1 && endIndex != -1) { string contentInfo = html.Substring(begIndex, endIndex - begIndex); pattern = @"(<li>(.*?)</li>)"; if (FindPattern(pattern, contentInfo)) { string data = ""; while (_match.Success) { data += string.Format("{0}, ", _match.Groups[2].Value); _match = _match.NextMatch(); } util.RemoveTags(ref data); util.ConvertHTMLToAnsi(data, out _strTones); _strTones = _strTones.Trim(new[] {' ', ','}); } } // Extract Songs begIndex = strHtmlLow.IndexOf("<!-- tracks table -->"); endIndex = strHtmlLow.IndexOf("<!-- end tracks table -->", begIndex + 2); if (begIndex != -1 && endIndex != -1) { string contentInfo = html.Substring(begIndex, endIndex - begIndex); pattern = @"<tr.*class=""visible"".*?\s*?<td.*</td>\s*?.*<td.*</td>\s*?.*<td.*?>(?<track>.*)</td>" + @"\s*?.*<td.*</td>\s*?.*<td.*?>(?<title>.*)</td>\s*?.*?<td.*?>\s*?.*</td>\s*?.*?<td.*?>(?<duration>.*)</td>"; if (FindPattern(pattern, contentInfo)) { while (_match.Success) { // Tracknumber int iTrack = 0; try { iTrack = Int32.Parse(_match.Groups["track"].Value); } catch (Exception) {} // Song Title string strTitle = _match.Groups["title"].Value; util.RemoveTags(ref strTitle); util.ConvertHTMLToAnsi(strTitle, out strTitle); // Duration int iDuration = 0; string strDuration = _match.Groups["duration"].Value; int iPos = strDuration.IndexOf(":"); if (iPos >= 0) { string strMin, strSec; strMin = strDuration.Substring(0, iPos); iPos++; strSec = strDuration.Substring(iPos); int iMin = 0, iSec = 0; try { iMin = Int32.Parse(strMin); iSec = Int32.Parse(strSec); } catch (Exception) {} iDuration = iMin * 60 + iSec; } // Create new song object MusicSong newSong = new MusicSong(); newSong.Track = iTrack; newSong.SongName = strTitle; newSong.Duration = iDuration; _songs.Add(newSong); _match = _match.NextMatch(); } } } // Set to "Not available" if no value from web if (_artist.Length == 0) { _artist = GUILocalizeStrings.Get(416); } if (_strDateOfRelease.Length == 0) { _strDateOfRelease = GUILocalizeStrings.Get(416); } if (_strGenre.Length == 0) { _strGenre = GUILocalizeStrings.Get(416); } if (_strTones.Length == 0) { _strTones = GUILocalizeStrings.Get(416); } if (_strStyles.Length == 0) { _strStyles = GUILocalizeStrings.Get(416); } if (_strTitle.Length == 0) { _strTitle = GUILocalizeStrings.Get(416); } if (_strTitle2.Length == 0) { _strTitle2 = _strTitle; } Loaded = true; return true; }