public static Song FetchSong(ScrapingBrowser Browser, songlist albumData, Guid songGuid, CrawlDatabaseEntities context) { Song song = new Song(); var title = albumData.songs[songGuid].song; var slug = albumData.songs[songGuid].slug; var artistGuid = albumData.songs[songGuid].artistId; var endPathSongUrl = albumData.songs[songGuid].url; var songSId = albumData.sid[songGuid]; var subDomain = (Convert.ToInt32(songSId, 16) - 100) / 7; var songUrl = "http://mus" + subDomain + "." + TOPDOMAIN + endPathSongUrl; var artistRouteName = albumData.artists[artistGuid].slug; var albumRouteName = albumData.songs[songGuid].albumSlug; var displaySongImage = ""; if (albumData.songs[songGuid].haveAlbumImage == "True") { displaySongImage = GetAlbumImage(artistRouteName, albumRouteName, 2); } else { displaySongImage = GetArtistImage(artistRouteName, 2); } // Get lyric var lyricUrl = DOMAIN + String.Format("ajax/song?t=1&songId={0}", songGuid); WebPage PageResult = Browser.NavigateToPage(new Uri(lyricUrl)); string lyrics = ""; ajax_response response = JsonConvert.DeserializeObject <ajax_response>(PageResult.ToString()); if (response.code == 0 && response.data != null && response.data.Count > 0) { lyrics = response.data[0].lyrics; } song.Guid = songGuid.ToString(); song.Title = title; song.ArtistGuid = artistGuid.ToString(); song.MediaUrl = songUrl; song.Thumbnail = displaySongImage; song.Url = slug; song.Lyrics = lyrics; song.AlbumSlug = albumRouteName; song.ArtistSlug = artistRouteName; //bool hasSong = context.Songs.Where(s => s.Guid == song.Guid).Any(); //if (!hasSong) //{ context.Songs.Add(song); //} // Save resources //SaveImage(folderImagePath, songUrl); //SaveImage(folderSongPath, songUrl); return(song); }
public static Album ScrapingAlbum(string link) { ScrapingBrowser Browser = new ScrapingBrowser(); Browser.AllowAutoRedirect = true; // Browser has settings you can access in setup Browser.AllowMetaRedirect = true; Browser.Encoding = Encoding.UTF8; WebPage PageResult = Browser.NavigateToPage(new Uri(link)); var scriptString = PageResult.Html.CssSelect("#music>script").FirstOrDefault(); var engine = new Jurassic.ScriptEngine(); var result = engine.Evaluate("(function() { var MN = {};MN.m_page= {};MN.m_page.songlist = {};MN.m_page.songlist.artists = {};MN.m_page.songlist.songs = {};MN.m_page.songlist.sid = {};" + scriptString.InnerHtml + " return MN.m_page.songlist; })()"); var json = JSONObject.Stringify(engine, result); songlist data = JsonConvert.DeserializeObject <songlist>(json); // Get list song data of album List <Guid> listSongGuid = new List <Guid>(data.songs.Keys); List <Song> listSongs = new List <Song>(); for (int i = 0; i < listSongGuid.Count; i++) { listSongs.Add(FetchSong(Browser, data, listSongGuid[i])); } // Crawl album Album album = new Album(); album.Title = PageResult.Html.CssSelect("#artist-info>article>header>h1").FirstOrDefault().InnerText.Trim(); album.ReleaseDate = PageResult.Html.CssSelect("#artist-info>article>header>h1>time").FirstOrDefault().InnerText.TrimStart('-').Trim(); album.ArtistName = PageResult.Html.CssSelect("#artist-info>article>header>h2>a").FirstOrDefault().InnerText.Trim(); album.Thumbnail = PageResult.Html.CssSelect("#artist-info>article>figure>img").FirstOrDefault().GetAttributeValue("src"); album.Slug = listSongs.Count > 0 ? listSongs[0].AlbumSlug: ""; SaveImage(folderAlbumImagePath, album.Thumbnail); // Crawl artist List <Artist> artist = new List <Artist>(); List <Guid> listArtistIds = new List <Guid>(data.artists.Keys); for (int i = 0; i < listArtistIds.Count; i++) { var newArtist = new Artist() { Guid = listArtistIds[i], Name = data.artists[listArtistIds[i]].artist, Slug = data.artists[listArtistIds[i]].slug }; artist.Add(newArtist); var artistImageUrl = GetArtistImage(newArtist.Slug, 2); SaveImage(folderArtistImagePath, artistImageUrl); } // Save mp3 files for (int i = 0; i < listSongs.Count; i++) { var mp3FullPath = Path.Combine(folderSongPath, Path.GetFileName(listSongs[i].MediaUrl)); var success = FileDownloader.DownloadFile(listSongs[i].MediaUrl, mp3FullPath, 120000); Console.WriteLine("Done - success: " + success); } return(null); }
public static Album ScrapingAlbum(ScrapingBrowser Browser, string link) { CrawlDatabaseEntities context = new CrawlDatabaseEntities(); WebPage PageResult = Browser.NavigateToPage(new Uri(link)); var albumScript = PageResult.Html.CssSelect("head>script"); string scripts = ""; foreach (var item in albumScript) { if (!String.IsNullOrEmpty(item.GetAttributeValue("src"))) { scripts += item.InnerHtml; } } var scriptString = PageResult.Html.CssSelect("#music>script").FirstOrDefault(); var engine = new Jurassic.ScriptEngine(); var result = engine.Evaluate("(function() { var MN = {};MN.m_page= {};MN.m_page.songlist = {};MN.m_page.songlist.artists = {};MN.m_page.songlist.songs = {};MN.m_page.songlist.sid = {};" + scriptString.InnerHtml + " return MN.m_page.songlist; })()"); var json = JSONObject.Stringify(engine, result); songlist data = JsonConvert.DeserializeObject <songlist>(json); // Get list song data of album List <Guid> listSongGuid = new List <Guid>(data.songs.Keys); List <Song> listSongs = new List <Song>(); for (int i = 0; i < listSongGuid.Count; i++) { Console.WriteLine("------------- Begin song " + i); Console.WriteLine("------------- Song link " + listSongGuid[i]); listSongs.Add(FetchSong(Browser, data, listSongGuid[i], context)); Console.WriteLine("------------- End song " + i); } // Crawl album Album album = new Album(); album.Title = PageResult.Html.CssSelect("#artist-info>article>header>h1").FirstOrDefault().InnerText.Trim(); album.ReleaseDate = PageResult.Html.CssSelect("#artist-info>article>header>h1>time").FirstOrDefault().InnerText.TrimStart('-').Trim(); album.ArtistName = PageResult.Html.CssSelect("#artist-info>article>header>h2>a").FirstOrDefault().InnerText.Trim(); album.Thumbnail = PageResult.Html.CssSelect("#artist-info>article>figure>img").FirstOrDefault().GetAttributeValue("src"); album.Slug = listSongs.Count > 0 ? listSongs[0].AlbumSlug: ""; album.Songs = listSongs; SaveImage(folderAlbumImagePath, album.Thumbnail); // Crawl artist List <Artist> artist = new List <Artist>(); List <Guid> listArtistIds = new List <Guid>(data.artists.Keys); album.ArtistGuid = listArtistIds.FirstOrDefault().ToString(); for (int i = 0; i < listArtistIds.Count; i++) { var artistImageUrl = GetArtistImage(data.artists[listArtistIds[i]].slug, 2); var newArtist = new Artist() { Guid = listArtistIds[i].ToString(), Name = data.artists[listArtistIds[i]].artist, Slug = data.artists[listArtistIds[i]].slug, Thumbnail = artistImageUrl }; artist.Add(newArtist); bool isArtistExisted = context.Artists.Where(a => a.Guid == newArtist.Guid).Any(); if (!isArtistExisted) { context.Artists.Add(newArtist); SaveImage(folderArtistImagePath, artistImageUrl); } } bool isAlbumExisted = context.Albums.Where(a => a.Title == album.Title && a.ArtistName == album.ArtistName).Any(); if (!isAlbumExisted) { context.Albums.Add(album); } context.SaveChanges(); // Save mp3 files //for (int i = 0; i < listSongs.Count; i++) //{ // var mp3FullPath = Path.Combine(folderSongPath, Path.GetFileName(listSongs[i].MediaUrl)); // var success = FileDownloader.DownloadFile(listSongs[i].MediaUrl, mp3FullPath, 120000); // Console.WriteLine("Done - success: " + success); //} return(null); }