public bool Parse(string html)
        {
            _songs.Clear();
            HTMLUtil util       = new HTMLUtil();
            string   strHtmlLow = html.ToLower();

            int begIndex = 0;
            int endIndex = 0;

            //	Extract Cover URL
            string pattern = @"<!--Begin.*?Album.*?Photo-->\s*?.*?<img.*?src=\""(.*?)\""";

            if (FindPattern(pattern, html))
            {
                _strImageURL = _match.Groups[1].Value;
            }

            //	Extract Review
            pattern = @"<td.*?class=""tab_off""><a.*?href=""(.*?)"">.*?Review.*?</a>";
            if (FindPattern(pattern, html))
            {
                try
                {
                    string contentinfo = AllmusicSiteScraper.GetHTTP(_match.Groups[1].Value);
                    pattern = @"<p.*?class=""author"">.*\s*?.*?<p.*?class=""text"">(.*?)</p>";
                    if (FindPattern(pattern, contentinfo))
                    {
                        string data = _match.Groups[1].Value;
                        util.RemoveTags(ref data);
                        util.ConvertHTMLToAnsi(data, out data);
                        _strReview = data.Trim();
                    }
                }
                catch (Exception) {}
            }

            //	Extract Artist
            pattern = @"<h3.*?artist</h3>\s*?.*?<a.*"">(.*)</a>";
            if (FindPattern(pattern, html))
            {
                _artist = _match.Groups[1].Value;
                util.RemoveTags(ref _artist);
            }

            //	Extract Album
            pattern = @"<h3.*?album</h3>\s*?.*?<p>(.*)</P>";
            if (FindPattern(pattern, html))
            {
                _strTitle = _match.Groups[1].Value;
                util.RemoveTags(ref _strTitle);
            }

            // Extract Rating
            pattern = @"<h3.*?rating</h3>\s*?.*?src=""(.*?)""";
            if (FindPattern(pattern, html))
            {
                string strRating = _match.Groups[1].Value;
                util.RemoveTags(ref strRating);
                strRating = strRating.Substring(26, 1);
                try
                {
                    _iRating = Int32.Parse(strRating);
                }
                catch (Exception) {}
            }

            //	Release Date
            pattern = @"<h3.*?release.*?date</h3>\s*?.*?<p>(.*)</P>";
            if (FindPattern(pattern, html))
            {
                _strDateOfRelease = _match.Groups[1].Value;
                util.RemoveTags(ref _strDateOfRelease);

                //	extract the year out of something like "1998 (release)" or "12 feb 2003"
                int nPos = _strDateOfRelease.IndexOf("19");
                if (nPos > -1)
                {
                    if ((int)_strDateOfRelease.Length >= nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) &&
                        Char.IsDigit(_strDateOfRelease[nPos + 3]))
                    {
                        string strYear = _strDateOfRelease.Substring(nPos, 4);
                        _strDateOfRelease = strYear;
                    }
                    else
                    {
                        nPos = _strDateOfRelease.IndexOf("19", nPos + 2);
                        if (nPos > -1)
                        {
                            if ((int)_strDateOfRelease.Length >= nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) &&
                                Char.IsDigit(_strDateOfRelease[nPos + 3]))
                            {
                                string strYear = _strDateOfRelease.Substring(nPos, 4);
                                _strDateOfRelease = strYear;
                            }
                        }
                    }
                }

                nPos = _strDateOfRelease.IndexOf("20");
                if (nPos > -1)
                {
                    if ((int)_strDateOfRelease.Length > nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) &&
                        Char.IsDigit(_strDateOfRelease[nPos + 3]))
                    {
                        string strYear = _strDateOfRelease.Substring(nPos, 4);
                        _strDateOfRelease = strYear;
                    }
                    else
                    {
                        nPos = _strDateOfRelease.IndexOf("20", nPos + 1);
                        if (nPos > -1)
                        {
                            if ((int)_strDateOfRelease.Length > nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) &&
                                Char.IsDigit(_strDateOfRelease[nPos + 3]))
                            {
                                string strYear = _strDateOfRelease.Substring(nPos, 4);
                                _strDateOfRelease = strYear;
                            }
                        }
                    }
                }
            }

            // Extract Genre
            begIndex = strHtmlLow.IndexOf("<h3>genre</h3>");
            endIndex = strHtmlLow.IndexOf("</div>", begIndex + 2);
            if (begIndex != -1 && endIndex != -1)
            {
                string contentInfo = html.Substring(begIndex, endIndex - begIndex);
                pattern = @"(<li>(.*?)</li>)";
                if (FindPattern(pattern, contentInfo))
                {
                    string data = "";
                    while (_match.Success)
                    {
                        data  += string.Format("{0}, ", _match.Groups[2].Value);
                        _match = _match.NextMatch();
                    }
                    util.RemoveTags(ref data);
                    util.ConvertHTMLToAnsi(data, out _strGenre);
                    _strGenre = _strGenre.Trim(new[] { ' ', ',' });
                }
            }

            // Extract Styles
            begIndex = strHtmlLow.IndexOf("<h3>style</h3>");
            endIndex = strHtmlLow.IndexOf("</div>", begIndex + 2);
            if (begIndex != -1 && endIndex != -1)
            {
                string contentInfo = html.Substring(begIndex, endIndex - begIndex);
                pattern = @"(<li>(.*?)</li>)";
                if (FindPattern(pattern, contentInfo))
                {
                    string data = "";
                    while (_match.Success)
                    {
                        data  += string.Format("{0}, ", _match.Groups[2].Value);
                        _match = _match.NextMatch();
                    }
                    util.RemoveTags(ref data);
                    util.ConvertHTMLToAnsi(data, out _strStyles);
                    _strStyles = _strStyles.Trim(new[] { ' ', ',' });
                }
            }

            // Extract Moods
            begIndex = strHtmlLow.IndexOf("<h3>moods</h3>");
            endIndex = strHtmlLow.IndexOf("</div>", begIndex + 2);
            if (begIndex != -1 && endIndex != -1)
            {
                string contentInfo = html.Substring(begIndex, endIndex - begIndex);
                pattern = @"(<li>(.*?)</li>)";
                if (FindPattern(pattern, contentInfo))
                {
                    string data = "";
                    while (_match.Success)
                    {
                        data  += string.Format("{0}, ", _match.Groups[2].Value);
                        _match = _match.NextMatch();
                    }
                    util.RemoveTags(ref data);
                    util.ConvertHTMLToAnsi(data, out _strTones);
                    _strTones = _strTones.Trim(new[] { ' ', ',' });
                }
            }

            // Extract Songs
            begIndex = strHtmlLow.IndexOf("<!-- tracks table -->");
            endIndex = strHtmlLow.IndexOf("<!-- end tracks table -->", begIndex + 2);
            if (begIndex != -1 && endIndex != -1)
            {
                string contentInfo = html.Substring(begIndex, endIndex - begIndex);
                pattern = @"<tr.*class=""visible"".*?\s*?<td.*</td>\s*?.*<td.*</td>\s*?.*<td.*?>(?<track>.*)</td>" +
                          @"\s*?.*<td.*</td>\s*?.*<td.*?>(?<title>.*)</td>\s*?.*?<td.*?>\s*?.*</td>\s*?.*?<td.*?>(?<duration>.*)</td>";

                if (FindPattern(pattern, contentInfo))
                {
                    while (_match.Success)
                    {
                        //	Tracknumber
                        int iTrack = 0;
                        try
                        {
                            iTrack = Int32.Parse(_match.Groups["track"].Value);
                        }
                        catch (Exception) {}

                        // Song Title
                        string strTitle = _match.Groups["title"].Value;
                        util.RemoveTags(ref strTitle);
                        util.ConvertHTMLToAnsi(strTitle, out strTitle);

                        //	Duration
                        int    iDuration   = 0;
                        string strDuration = _match.Groups["duration"].Value;
                        int    iPos        = strDuration.IndexOf(":");
                        if (iPos >= 0)
                        {
                            string strMin, strSec;
                            strMin = strDuration.Substring(0, iPos);
                            iPos++;
                            strSec = strDuration.Substring(iPos);
                            int iMin = 0, iSec = 0;
                            try
                            {
                                iMin = Int32.Parse(strMin);
                                iSec = Int32.Parse(strSec);
                            }
                            catch (Exception) {}
                            iDuration = iMin * 60 + iSec;
                        }

                        //	Create new song object
                        MusicSong newSong = new MusicSong();
                        newSong.Track    = iTrack;
                        newSong.SongName = strTitle;
                        newSong.Duration = iDuration;
                        _songs.Add(newSong);

                        _match = _match.NextMatch();
                    }
                }
            }

            //	Set to "Not available" if no value from web
            if (_artist.Length == 0)
            {
                _artist = GUILocalizeStrings.Get(416);
            }
            if (_strDateOfRelease.Length == 0)
            {
                _strDateOfRelease = GUILocalizeStrings.Get(416);
            }
            if (_strGenre.Length == 0)
            {
                _strGenre = GUILocalizeStrings.Get(416);
            }
            if (_strTones.Length == 0)
            {
                _strTones = GUILocalizeStrings.Get(416);
            }
            if (_strStyles.Length == 0)
            {
                _strStyles = GUILocalizeStrings.Get(416);
            }
            if (_strTitle.Length == 0)
            {
                _strTitle = GUILocalizeStrings.Get(416);
            }

            if (_strTitle2.Length == 0)
            {
                _strTitle2 = _strTitle;
            }

            Loaded = true;
            return(true);
        }
Exemplo n.º 2
0
    public bool Parse(string html)
    {
      _songs.Clear();
      HTMLUtil util = new HTMLUtil();
      string strHtmlLow = html.ToLower();

      int begIndex = 0;
      int endIndex = 0;

      //	Extract Cover URL
      string pattern = @"<!--Begin.*?Album.*?Photo-->\s*?.*?<img.*?src=\""(.*?)\""";
      if (FindPattern(pattern, html))
      {
        _strImageURL = _match.Groups[1].Value;
      }

      //	Extract Review
      pattern = @"<td.*?class=""tab_off""><a.*?href=""(.*?)"">.*?Review.*?</a>";
      if (FindPattern(pattern, html))
      {
        try
        {
          string contentinfo = AllmusicSiteScraper.GetHTTP(_match.Groups[1].Value);
          pattern = @"<p.*?class=""author"">.*\s*?.*?<p.*?class=""text"">(.*?)</p>";
          if (FindPattern(pattern, contentinfo))
          {
            string data = _match.Groups[1].Value;
            util.RemoveTags(ref data);
            util.ConvertHTMLToAnsi(data, out data);
            _strReview = data.Trim();
          }
        }
        catch (Exception) {}
      }

      //	Extract Artist
      pattern = @"<h3.*?artist</h3>\s*?.*?<a.*"">(.*)</a>";
      if (FindPattern(pattern, html))
      {
        _artist = _match.Groups[1].Value;
        util.RemoveTags(ref _artist);
      }

      //	Extract Album
      pattern = @"<h3.*?album</h3>\s*?.*?<p>(.*)</P>";
      if (FindPattern(pattern, html))
      {
        _strTitle = _match.Groups[1].Value;
        util.RemoveTags(ref _strTitle);
      }

      // Extract Rating
      pattern = @"<h3.*?rating</h3>\s*?.*?src=""(.*?)""";
      if (FindPattern(pattern, html))
      {
        string strRating = _match.Groups[1].Value;
        util.RemoveTags(ref strRating);
        strRating = strRating.Substring(26, 1);
        try
        {
          _iRating = Int32.Parse(strRating);
        }
        catch (Exception) {}
      }

      //	Release Date
      pattern = @"<h3.*?release.*?date</h3>\s*?.*?<p>(.*)</P>";
      if (FindPattern(pattern, html))
      {
        _strDateOfRelease = _match.Groups[1].Value;
        util.RemoveTags(ref _strDateOfRelease);

        //	extract the year out of something like "1998 (release)" or "12 feb 2003"
        int nPos = _strDateOfRelease.IndexOf("19");
        if (nPos > -1)
        {
          if ((int)_strDateOfRelease.Length >= nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) &&
              Char.IsDigit(_strDateOfRelease[nPos + 3]))
          {
            string strYear = _strDateOfRelease.Substring(nPos, 4);
            _strDateOfRelease = strYear;
          }
          else
          {
            nPos = _strDateOfRelease.IndexOf("19", nPos + 2);
            if (nPos > -1)
            {
              if ((int)_strDateOfRelease.Length >= nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) &&
                  Char.IsDigit(_strDateOfRelease[nPos + 3]))
              {
                string strYear = _strDateOfRelease.Substring(nPos, 4);
                _strDateOfRelease = strYear;
              }
            }
          }
        }

        nPos = _strDateOfRelease.IndexOf("20");
        if (nPos > -1)
        {
          if ((int)_strDateOfRelease.Length > nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) &&
              Char.IsDigit(_strDateOfRelease[nPos + 3]))
          {
            string strYear = _strDateOfRelease.Substring(nPos, 4);
            _strDateOfRelease = strYear;
          }
          else
          {
            nPos = _strDateOfRelease.IndexOf("20", nPos + 1);
            if (nPos > -1)
            {
              if ((int)_strDateOfRelease.Length > nPos + 3 && Char.IsDigit(_strDateOfRelease[nPos + 2]) &&
                  Char.IsDigit(_strDateOfRelease[nPos + 3]))
              {
                string strYear = _strDateOfRelease.Substring(nPos, 4);
                _strDateOfRelease = strYear;
              }
            }
          }
        }
      }

      // Extract Genre
      begIndex = strHtmlLow.IndexOf("<h3>genre</h3>");
      endIndex = strHtmlLow.IndexOf("</div>", begIndex + 2);
      if (begIndex != -1 && endIndex != -1)
      {
        string contentInfo = html.Substring(begIndex, endIndex - begIndex);
        pattern = @"(<li>(.*?)</li>)";
        if (FindPattern(pattern, contentInfo))
        {
          string data = "";
          while (_match.Success)
          {
            data += string.Format("{0}, ", _match.Groups[2].Value);
            _match = _match.NextMatch();
          }
          util.RemoveTags(ref data);
          util.ConvertHTMLToAnsi(data, out _strGenre);
          _strGenre = _strGenre.Trim(new[] {' ', ','});
        }
      }

      // Extract Styles
      begIndex = strHtmlLow.IndexOf("<h3>style</h3>");
      endIndex = strHtmlLow.IndexOf("</div>", begIndex + 2);
      if (begIndex != -1 && endIndex != -1)
      {
        string contentInfo = html.Substring(begIndex, endIndex - begIndex);
        pattern = @"(<li>(.*?)</li>)";
        if (FindPattern(pattern, contentInfo))
        {
          string data = "";
          while (_match.Success)
          {
            data += string.Format("{0}, ", _match.Groups[2].Value);
            _match = _match.NextMatch();
          }
          util.RemoveTags(ref data);
          util.ConvertHTMLToAnsi(data, out _strStyles);
          _strStyles = _strStyles.Trim(new[] {' ', ','});
        }
      }

      // Extract Moods
      begIndex = strHtmlLow.IndexOf("<h3>moods</h3>");
      endIndex = strHtmlLow.IndexOf("</div>", begIndex + 2);
      if (begIndex != -1 && endIndex != -1)
      {
        string contentInfo = html.Substring(begIndex, endIndex - begIndex);
        pattern = @"(<li>(.*?)</li>)";
        if (FindPattern(pattern, contentInfo))
        {
          string data = "";
          while (_match.Success)
          {
            data += string.Format("{0}, ", _match.Groups[2].Value);
            _match = _match.NextMatch();
          }
          util.RemoveTags(ref data);
          util.ConvertHTMLToAnsi(data, out _strTones);
          _strTones = _strTones.Trim(new[] {' ', ','});
        }
      }

      // Extract Songs
      begIndex = strHtmlLow.IndexOf("<!-- tracks table -->");
      endIndex = strHtmlLow.IndexOf("<!-- end tracks table -->", begIndex + 2);
      if (begIndex != -1 && endIndex != -1)
      {
        string contentInfo = html.Substring(begIndex, endIndex - begIndex);
        pattern = @"<tr.*class=""visible"".*?\s*?<td.*</td>\s*?.*<td.*</td>\s*?.*<td.*?>(?<track>.*)</td>" +
                  @"\s*?.*<td.*</td>\s*?.*<td.*?>(?<title>.*)</td>\s*?.*?<td.*?>\s*?.*</td>\s*?.*?<td.*?>(?<duration>.*)</td>";

        if (FindPattern(pattern, contentInfo))
        {
          while (_match.Success)
          {
            //	Tracknumber
            int iTrack = 0;
            try
            {
              iTrack = Int32.Parse(_match.Groups["track"].Value);
            }
            catch (Exception) {}

            // Song Title
            string strTitle = _match.Groups["title"].Value;
            util.RemoveTags(ref strTitle);
            util.ConvertHTMLToAnsi(strTitle, out strTitle);

            //	Duration
            int iDuration = 0;
            string strDuration = _match.Groups["duration"].Value;
            int iPos = strDuration.IndexOf(":");
            if (iPos >= 0)
            {
              string strMin, strSec;
              strMin = strDuration.Substring(0, iPos);
              iPos++;
              strSec = strDuration.Substring(iPos);
              int iMin = 0, iSec = 0;
              try
              {
                iMin = Int32.Parse(strMin);
                iSec = Int32.Parse(strSec);
              }
              catch (Exception) {}
              iDuration = iMin * 60 + iSec;
            }

            //	Create new song object
            MusicSong newSong = new MusicSong();
            newSong.Track = iTrack;
            newSong.SongName = strTitle;
            newSong.Duration = iDuration;
            _songs.Add(newSong);

            _match = _match.NextMatch();
          }
        }
      }

      //	Set to "Not available" if no value from web
      if (_artist.Length == 0)
      {
        _artist = GUILocalizeStrings.Get(416);
      }
      if (_strDateOfRelease.Length == 0)
      {
        _strDateOfRelease = GUILocalizeStrings.Get(416);
      }
      if (_strGenre.Length == 0)
      {
        _strGenre = GUILocalizeStrings.Get(416);
      }
      if (_strTones.Length == 0)
      {
        _strTones = GUILocalizeStrings.Get(416);
      }
      if (_strStyles.Length == 0)
      {
        _strStyles = GUILocalizeStrings.Get(416);
      }
      if (_strTitle.Length == 0)
      {
        _strTitle = GUILocalizeStrings.Get(416);
      }

      if (_strTitle2.Length == 0)
      {
        _strTitle2 = _strTitle;
      }

      Loaded = true;
      return true;
    }