public static Song FetchSong(ScrapingBrowser Browser, songlist albumData, Guid songGuid, CrawlDatabaseEntities context)
        {
            Song song             = new Song();
            var  title            = albumData.songs[songGuid].song;
            var  slug             = albumData.songs[songGuid].slug;
            var  artistGuid       = albumData.songs[songGuid].artistId;
            var  endPathSongUrl   = albumData.songs[songGuid].url;
            var  songSId          = albumData.sid[songGuid];
            var  subDomain        = (Convert.ToInt32(songSId, 16) - 100) / 7;
            var  songUrl          = "http://mus" + subDomain + "." + TOPDOMAIN + endPathSongUrl;
            var  artistRouteName  = albumData.artists[artistGuid].slug;
            var  albumRouteName   = albumData.songs[songGuid].albumSlug;
            var  displaySongImage = "";

            if (albumData.songs[songGuid].haveAlbumImage == "True")
            {
                displaySongImage = GetAlbumImage(artistRouteName, albumRouteName, 2);
            }
            else
            {
                displaySongImage = GetArtistImage(artistRouteName, 2);
            }

            // Get lyric
            var           lyricUrl   = DOMAIN + String.Format("ajax/song?t=1&songId={0}", songGuid);
            WebPage       PageResult = Browser.NavigateToPage(new Uri(lyricUrl));
            string        lyrics     = "";
            ajax_response response   = JsonConvert.DeserializeObject <ajax_response>(PageResult.ToString());

            if (response.code == 0 && response.data != null && response.data.Count > 0)
            {
                lyrics = response.data[0].lyrics;
            }

            song.Guid       = songGuid.ToString();
            song.Title      = title;
            song.ArtistGuid = artistGuid.ToString();
            song.MediaUrl   = songUrl;
            song.Thumbnail  = displaySongImage;
            song.Url        = slug;
            song.Lyrics     = lyrics;
            song.AlbumSlug  = albumRouteName;
            song.ArtistSlug = artistRouteName;


            //bool hasSong = context.Songs.Where(s => s.Guid == song.Guid).Any();
            //if (!hasSong)
            //{
            context.Songs.Add(song);
            //}

            // Save resources
            //SaveImage(folderImagePath, songUrl);
            //SaveImage(folderSongPath, songUrl);

            return(song);
        }
예제 #2
0
        public static Album ScrapingAlbum(string link)
        {
            ScrapingBrowser Browser = new ScrapingBrowser();

            Browser.AllowAutoRedirect = true; // Browser has settings you can access in setup
            Browser.AllowMetaRedirect = true;
            Browser.Encoding          = Encoding.UTF8;

            WebPage PageResult = Browser.NavigateToPage(new Uri(link));

            var scriptString = PageResult.Html.CssSelect("#music>script").FirstOrDefault();

            var      engine = new Jurassic.ScriptEngine();
            var      result = engine.Evaluate("(function() { var MN = {};MN.m_page= {};MN.m_page.songlist = {};MN.m_page.songlist.artists = {};MN.m_page.songlist.songs = {};MN.m_page.songlist.sid = {};" + scriptString.InnerHtml + " return MN.m_page.songlist; })()");
            var      json   = JSONObject.Stringify(engine, result);
            songlist data   = JsonConvert.DeserializeObject <songlist>(json);

            // Get list song data of album
            List <Guid> listSongGuid = new List <Guid>(data.songs.Keys);
            List <Song> listSongs    = new List <Song>();

            for (int i = 0; i < listSongGuid.Count; i++)
            {
                listSongs.Add(FetchSong(Browser, data, listSongGuid[i]));
            }

            // Crawl album
            Album album = new Album();

            album.Title       = PageResult.Html.CssSelect("#artist-info>article>header>h1").FirstOrDefault().InnerText.Trim();
            album.ReleaseDate = PageResult.Html.CssSelect("#artist-info>article>header>h1>time").FirstOrDefault().InnerText.TrimStart('-').Trim();
            album.ArtistName  = PageResult.Html.CssSelect("#artist-info>article>header>h2>a").FirstOrDefault().InnerText.Trim();
            album.Thumbnail   = PageResult.Html.CssSelect("#artist-info>article>figure>img").FirstOrDefault().GetAttributeValue("src");
            album.Slug        = listSongs.Count > 0 ? listSongs[0].AlbumSlug: "";
            SaveImage(folderAlbumImagePath, album.Thumbnail);

            // Crawl artist
            List <Artist> artist        = new List <Artist>();
            List <Guid>   listArtistIds = new List <Guid>(data.artists.Keys);

            for (int i = 0; i < listArtistIds.Count; i++)
            {
                var newArtist = new Artist()
                {
                    Guid = listArtistIds[i],
                    Name = data.artists[listArtistIds[i]].artist,
                    Slug = data.artists[listArtistIds[i]].slug
                };
                artist.Add(newArtist);

                var artistImageUrl = GetArtistImage(newArtist.Slug, 2);
                SaveImage(folderArtistImagePath, artistImageUrl);
            }


            // Save mp3 files

            for (int i = 0; i < listSongs.Count; i++)
            {
                var mp3FullPath = Path.Combine(folderSongPath, Path.GetFileName(listSongs[i].MediaUrl));
                var success     = FileDownloader.DownloadFile(listSongs[i].MediaUrl, mp3FullPath, 120000);
                Console.WriteLine("Done  - success: " + success);
            }

            return(null);
        }
        public static Album ScrapingAlbum(ScrapingBrowser Browser, string link)
        {
            CrawlDatabaseEntities context = new CrawlDatabaseEntities();
            WebPage PageResult            = Browser.NavigateToPage(new Uri(link));
            var     albumScript           = PageResult.Html.CssSelect("head>script");
            string  scripts = "";

            foreach (var item in albumScript)
            {
                if (!String.IsNullOrEmpty(item.GetAttributeValue("src")))
                {
                    scripts += item.InnerHtml;
                }
            }
            var scriptString = PageResult.Html.CssSelect("#music>script").FirstOrDefault();

            var      engine = new Jurassic.ScriptEngine();
            var      result = engine.Evaluate("(function() { var MN = {};MN.m_page= {};MN.m_page.songlist = {};MN.m_page.songlist.artists = {};MN.m_page.songlist.songs = {};MN.m_page.songlist.sid = {};" + scriptString.InnerHtml + " return MN.m_page.songlist; })()");
            var      json   = JSONObject.Stringify(engine, result);
            songlist data   = JsonConvert.DeserializeObject <songlist>(json);

            // Get list song data of album
            List <Guid> listSongGuid = new List <Guid>(data.songs.Keys);
            List <Song> listSongs    = new List <Song>();

            for (int i = 0; i < listSongGuid.Count; i++)
            {
                Console.WriteLine("-------------  Begin song " + i);
                Console.WriteLine("-------------  Song link " + listSongGuid[i]);
                listSongs.Add(FetchSong(Browser, data, listSongGuid[i], context));
                Console.WriteLine("-------------  End song " + i);
            }

            // Crawl album
            Album album = new Album();

            album.Title       = PageResult.Html.CssSelect("#artist-info>article>header>h1").FirstOrDefault().InnerText.Trim();
            album.ReleaseDate = PageResult.Html.CssSelect("#artist-info>article>header>h1>time").FirstOrDefault().InnerText.TrimStart('-').Trim();
            album.ArtistName  = PageResult.Html.CssSelect("#artist-info>article>header>h2>a").FirstOrDefault().InnerText.Trim();
            album.Thumbnail   = PageResult.Html.CssSelect("#artist-info>article>figure>img").FirstOrDefault().GetAttributeValue("src");
            album.Slug        = listSongs.Count > 0 ? listSongs[0].AlbumSlug: "";
            album.Songs       = listSongs;
            SaveImage(folderAlbumImagePath, album.Thumbnail);

            // Crawl artist
            List <Artist> artist        = new List <Artist>();
            List <Guid>   listArtistIds = new List <Guid>(data.artists.Keys);

            album.ArtistGuid = listArtistIds.FirstOrDefault().ToString();
            for (int i = 0; i < listArtistIds.Count; i++)
            {
                var artistImageUrl = GetArtistImage(data.artists[listArtistIds[i]].slug, 2);

                var newArtist = new Artist()
                {
                    Guid      = listArtistIds[i].ToString(),
                    Name      = data.artists[listArtistIds[i]].artist,
                    Slug      = data.artists[listArtistIds[i]].slug,
                    Thumbnail = artistImageUrl
                };

                artist.Add(newArtist);

                bool isArtistExisted = context.Artists.Where(a => a.Guid == newArtist.Guid).Any();
                if (!isArtistExisted)
                {
                    context.Artists.Add(newArtist);
                    SaveImage(folderArtistImagePath, artistImageUrl);
                }
            }

            bool isAlbumExisted = context.Albums.Where(a => a.Title == album.Title && a.ArtistName == album.ArtistName).Any();

            if (!isAlbumExisted)
            {
                context.Albums.Add(album);
            }

            context.SaveChanges();

            // Save mp3 files

            //for (int i = 0; i < listSongs.Count; i++)
            //{
            //    var mp3FullPath = Path.Combine(folderSongPath, Path.GetFileName(listSongs[i].MediaUrl));
            //    var success = FileDownloader.DownloadFile(listSongs[i].MediaUrl, mp3FullPath, 120000);
            //    Console.WriteLine("Done  - success: " + success);
            //}

            return(null);
        }