public HttpWebRequest BuildRequest(String startingImageID = null) { HttpWebRequest rq = (HttpWebRequest)WebRequest .Create(String.Format(@"https://myspace.com/ajax/{0}/photosStream/", UserName)); rq.UserAgent = CrawlUtil.GetUserAgent(); rq.Host = "myspace.com"; rq.Method = "POST"; rq.Accept = @"application / json, text / javascript, */*; q=0.01"; rq.ContentType = @"application/x-www-form-urlencoded; charset=UTF-8"; rq.Headers.Add(@"Hash", HashKey); var postData = String.Format("lastImageId={0}", startingImageID); var data = Encoding.ASCII.GetBytes(postData); using (Stream s = rq.GetRequestStream()) { if (!String.IsNullOrEmpty(startingImageID)) { s.Write(data, 0, data.Length); } } return(rq); }
public void Read() { PhotoStreamResponse r = new PhotoStreamResponse() { EndOfPhotos = false, LastPhotoID = null }; while (!r.EndOfPhotos) { r = RequestPhotoStream(r.LastPhotoID); if (r == null || !(String.IsNullOrEmpty(r.Error))) { //return on error return; } else { Photos.AddRange(r.PhotosEntries); } if (!r.EndOfPhotos) { Thread.Sleep(CrawlUtil.GetVariableDelay(DelayBetweenAPIRequests)); } } }
public void Read() { ConnectionStreamResponse r = new ConnectionStreamResponse() { NextStart = 0 }; while (!r.EndOfConnections) { r = RequestConnectionStream(r.NextStart); if (r == null || !(String.IsNullOrEmpty(r.Error))) { //return on error return; } else { Connections.AddRange(r.ConnectionEntries); } if (!r.EndOfConnections) { Thread.Sleep(CrawlUtil.GetVariableDelay(DelayBetweenAPIRequests)); } } }
/// <summary> /// Parse root profile page. /// </summary> /// <param name="userName">Profile page to parse.</param> private void ParseProfilePage(String userName) { String profileURL = String.Format(@"https://myspace.com/{0}", userName); var doc = new HtmlAgilityPack.HtmlDocument(); HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty; doc.OptionWriteEmptyNodes = true; try { ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12; var webRequest = HttpWebRequest.Create(profileURL); ((HttpWebRequest)webRequest).UserAgent = CrawlUtil.GetUserAgent(); Stream stream = webRequest.GetResponse().GetResponseStream(); doc.Load(stream); stream.Close(); Profile.URL = String.Format(@"https://myspace.com/{0}", userName); Profile.UserName = userName; Profile.ProfileThumbnailImageURL = doc.DocumentNode.SelectSingleNode(@"//a[@id='profileImage']//img")?.Attributes["src"]?.Value; Profile.ProfileImageURL = !String.IsNullOrEmpty(Profile.ProfileThumbnailImageURL) ? CrawlUtil.ModifyUriFileName(Profile.ProfileThumbnailImageURL, x => "600x600") : null; Profile.ProfileID = doc.DocumentNode.SelectSingleNode(@"//div[@class='connectButton notReversed tooltips']")?.Attributes["data-id"]?.Value; String privateFlag = doc.DocumentNode.SelectSingleNode(@"//div[@class='connectButton notReversed tooltips']")?.Attributes["data-is-private"]?.Value; Profile.IsPrivate = privateFlag != null && privateFlag.ToLower().Equals("true"); Profile.PersonalName = doc.DocumentNode.SelectSingleNode(@"//div[@class='connectButton notReversed tooltips']")?.Attributes["data-title"]?.Value; Profile.LocationDescription = doc.DocumentNode.SelectSingleNode(@"//div[@id='profileDetails']//div[@id='locAndWeb']//div[@class='location_white location ']")?.Attributes["data-display-text"]?.Value; Profile.Website = doc.DocumentNode.SelectSingleNode(@"//div[@id='profileDetails']//div[@id='locAndWeb']//div[@class='ribbon_white website ']//a")?.InnerText; Profile.OutConnectionTotal = doc.DocumentNode.SelectSingleNode(String.Format(@"//div[@id='profileDetails']//div[@id='connectionsCount']//a[@href='/{0}/connections/out']//span", Profile.UserName))?.InnerText; Profile.InConnectionTotal = doc.DocumentNode.SelectSingleNode(String.Format(@"//div[@id='profileDetails']//div[@id='connectionsCount']//a[@href='/{0}/connections/in']//span", Profile.UserName))?.InnerText; if (!Profile.IsPrivate) { var top8FriendsNode = doc.DocumentNode.SelectNodes(@"//div[@class='friendsWrapper']//ul//li//a"); if (top8FriendsNode != null) { foreach (var friendNode in top8FriendsNode) { Top8FriendEntry friendEntry = new Top8FriendEntry(); friendEntry.UserURL = friendNode?.Attributes["href"]?.Value; if (!String.IsNullOrEmpty(friendEntry.UserURL) && friendEntry.UserURL.StartsWith("/")) { friendEntry.UserURL = string.Format(@"https://myspace.com{0}", friendEntry.UserURL); } friendEntry.ProfileID = friendNode?.Attributes["data-profileid"]?.Value; friendEntry.ThumbnailURL = friendNode?.Attributes["data-image-url"]?.Value; friendEntry.UserName = friendNode?.Attributes["data-title"]?.Value; Profile.Top8Friends.Add(friendEntry); } } } } catch (Exception e) { } }
public void ParseDetailPage(PhotoEntry photoEntry, String detailURL) { var doc = new HtmlAgilityPack.HtmlDocument(); HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty; doc.OptionWriteEmptyNodes = true; try { var webRequest = HttpWebRequest.Create(detailURL); ((HttpWebRequest)webRequest).UserAgent = CrawlUtil.GetUserAgent(); Stream stream = webRequest.GetResponse().GetResponseStream(); doc.Load(stream); stream.Close(); #region Parse photo properties var statsNode = doc.DocumentNode.SelectSingleNode("//div[@class='rr']//header[@class='stats']"); photoEntry.LikesCount = statsNode.SelectSingleNode("a[@data-view='likes']//span")?.InnerText; photoEntry.ConnectsCount = statsNode.SelectSingleNode("a[@data-view='connects']//span")?.InnerText; photoEntry.CommentsCount = statsNode.SelectSingleNode("a[@data-view='comments']//span")?.InnerText; photoEntry.SharesCount = doc.DocumentNode.SelectSingleNode("//div[@class='genInfo ']//p[@class='stats']//span")?.InnerText; photoEntry.ConnectsEntityKey = doc.DocumentNode.SelectSingleNode("//div[@class='rr']")?.Attributes["data-connects-entity-key"]?.Value; #endregion #region Parse visible comments var commentNodes = doc.DocumentNode.SelectNodes("//ol//li"); if (commentNodes != null) { foreach (var commentNode in commentNodes) { PhotoCommentEntry entry = new PhotoCommentEntry(); entry.ProfileURL = commentNode.SelectSingleNode("div//div//div//a")?.Attributes["href"]?.Value; entry.ThumbnailImageURL = commentNode.SelectSingleNode("a//img")?.Attributes["src"]?.Value; if (!String.IsNullOrEmpty(entry.ProfileURL)) { entry.ProfileURL = String.Format(@"https://myspace.com{0}", entry.ProfileURL); } entry.UserName = commentNode.SelectSingleNode("div//div//div//a")?.InnerText; entry.CommentHTML = commentNode.SelectSingleNode("div//div//div//span")?.InnerHtml; entry.Comment = commentNode.SelectSingleNode("div//div//div//span")?.InnerText; entry.DateTimeUTC = commentNode.SelectSingleNode("div//div[@class='commentFooter']//time")?.Attributes["datetime"]?.Value; entry.DateTimeDisplay = commentNode.SelectSingleNode("div//div[@class='commentFooter']//time")?.InnerText; photoEntry.Comments.Add(entry); } } #endregion } catch (Exception e) { } }
/// <summary> /// Parse the given profile page. /// </summary> /// <param name="logger">Logging manager.</param> /// <param name="hashKey">Hashkey to use for API calls.</param> /// <param name="userName">Profile name to parse.</param> /// <param name="delayBetweenPages">Delay between page requests. This is a base value which will be slightly randomized.</param> /// <param name="delayBetweenAPIRequests">Delay between API calls. This is a base value which will be slightly randomized.</param> /// <param name="captureConnections">Should connections be parsed?</param> /// <returns>Parsed profile.</returns> public Profile Parse( ILog logger, String hashKey, String userName, int delayBetweenPages, int delayBetweenAPIRequests, bool captureConnections) { HashKey = hashKey; DelayBetweenPages = delayBetweenPages; DelayBetweenAPIRequests = delayBetweenAPIRequests; CaptureConnections = captureConnections; Profile = new Profile(); Profile.Captured = DateTime.Now; Profile.CapturedConnections = captureConnections; ParseProfilePage(userName); Thread.Sleep(CrawlUtil.GetVariableDelay(DelayBetweenPages)); if (!Profile.IsPrivate) { ParseBio(); Thread.Sleep(CrawlUtil.GetVariableDelay(DelayBetweenPages)); logger.Log(String.Format("Parsing Photos: UserName={0}", userName)); ParsePhotos(); Thread.Sleep(CrawlUtil.GetVariableDelay(DelayBetweenPages)); if (CaptureConnections) { logger.Log(String.Format("Parsing Connections Out: UserName={0}", userName)); ParseConnections(ConnectionDirection.Out); Thread.Sleep(CrawlUtil.GetVariableDelay(DelayBetweenPages)); logger.Log(String.Format("Parsing Connections In: UserName={0}", userName)); ParseConnections(ConnectionDirection.In); Thread.Sleep(CrawlUtil.GetVariableDelay(DelayBetweenPages)); } logger.Log(String.Format("Parsing Songs: UserName={0}", userName)); ParseSongs(); Thread.Sleep(CrawlUtil.GetVariableDelay(DelayBetweenPages)); logger.Log(String.Format("Parsing Videos: Videos={0}", userName)); ParseVideos(); Thread.Sleep(CrawlUtil.GetVariableDelay(DelayBetweenPages)); } else { logger.Log(String.Format("Private profile: UserName={0}", userName)); } return(Profile); }
/// <summary> /// Should connections be captured for the given username? /// Not capturing connections will prevent other profiles from being parsed from this one. /// </summary> /// <param name="userName">User Name to capture connections</param> /// <returns>true if connections should be captured</returns> public bool CanCaptureConnections(String userName) { if (!string.IsNullOrEmpty(LocationCriteria)) { String location = CrawlUtil.GetUserLocation(userName); if (AllowEmptyLocations) { return(String.IsNullOrEmpty(location) || (location.ToLower().Contains(LocationCriteria))); } else { return(!String.IsNullOrEmpty(location) && (location.ToLower().Contains(LocationCriteria))); } } else { return(true); } }
/// <summary> /// Parse biography information. /// </summary> private void ParseBio() { var doc = new HtmlAgilityPack.HtmlDocument(); HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty; doc.OptionWriteEmptyNodes = true; try { var webRequest = HttpWebRequest.Create(String.Format(@"https://myspace.com/{0}/bio", Profile.UserName)); ((HttpWebRequest)webRequest).UserAgent = CrawlUtil.GetUserAgent(); Stream stream = webRequest.GetResponse().GetResponseStream(); doc.Load(stream); stream.Close(); Profile.Biography = doc.DocumentNode.SelectSingleNode(@"//div[@class='mainBio']//div[@class='bioColumns']//div")?.InnerHtml?.ToString()?.Trim(); } catch (Exception e) { } }
public HttpWebRequest BuildRequest(ConnectionDirection direction, int startingIndex = 0) { String directionURLToken = ""; switch (direction) { case ConnectionDirection.Unknown: return(null); case ConnectionDirection.Out: directionURLToken = "out"; break; case ConnectionDirection.In: directionURLToken = "in"; break; } HttpWebRequest rq = (HttpWebRequest)WebRequest .Create(String.Format(@"https://myspace.com/ajax/{0}/connections/{1}", UserName, directionURLToken)); rq.UserAgent = CrawlUtil.GetUserAgent(); rq.Host = "myspace.com"; rq.Method = "POST"; rq.Accept = @"application / json, text / javascript, */*; q=0.01"; rq.ContentType = @"application/x-www-form-urlencoded; charset=UTF-8"; rq.Headers.Add(@"Hash", HashKey); var postData = String.Format("start={0}", startingIndex); var data = Encoding.ASCII.GetBytes(postData); using (Stream s = rq.GetRequestStream()) { s.Write(data, 0, data.Length); } return(rq); }
/// <summary> /// Crawl an individual profile, then its connections. /// </summary> /// <param name="userName">User name of profile to parse</param> /// <param name="delayBetweenPages">Delay between pages</param> /// <param name="depth">Depth to parse from seed</param> public void CrawlNode(String userName, int delayBetweenPages, int depth) { if (depth + 1 > MaxDepth) { return; } if (File.Exists(UserNameCrawlLock(userName))) { Log(String.Format(@"Skip Locked Profile: UserName={0}", userName)); return; } #region Check if all connections have been parsed if (File.Exists(UserNameCompletePath(userName))) { try { CompleteFile checkFile = JsonConvert.DeserializeObject <CompleteFile>(File.ReadAllText(UserNameCompletePath(userName))); if (checkFile != null && checkFile.AllConnectionsParsed) { Log(String.Format(@"Skip Completed Profile: UserName={0}", userName)); return; } } catch (Exception chk) { Log(String.Format(@"Error Checking Lock File: UserName={0}, {1}", userName, chk?.Message)); } } #endregion #region Create Directory if not exists try { String path = Path.Combine(StoreDirectory, userName); if (!Directory.Exists(path)) { Directory.CreateDirectory(path); } } catch (Exception e) { Log(String.Format("Error Creating Directory: UserName={0}", userName)); } #endregion #region Create Lock file try { File.Create(UserNameCrawlLock(userName)).Dispose(); } catch (Exception e) { Log(String.Format(@"Error Creating Lock File: UserName={0}, {1}", userName, e?.Message)); } #endregion Profile profile = null; if (!UserNameHasBeenParsed(userName)) { bool captureConnections = CanCaptureConnections(userName); Log(String.Format(@"Parse Profile: UserName={0}, Depth={1}/{2}, CaptureConnections={3}", userName, depth.ToString(), MaxDepth.ToString(), captureConnections.ToString())); profile = new ProfileParser().Parse( this, HashKey, userName, DelayBetweenPages, DelayBetweenAPIRequests, captureConnections); if (profile != null) { ProfileFinished(profile); #region Download additional files if passes check if (CapturePhotos) { #region Profile Photos Log(String.Format(@"Download Profile Photos: UserName={0}", profile.UserName)); if (!String.IsNullOrEmpty(profile.ProfileImageURL) && !File.Exists(Path.Combine(StoreDirectory, profile.UserName, "profile.jpg")) && !File.Exists(Path.Combine(StoreDirectory, profile.UserName, "profile.jpg.error"))) { bool success = CrawlUtil.GetFile(profile.ProfileImageURL, Path.Combine(StoreDirectory, profile.UserName, "profile.jpg"), 30000); if (!success) { File.Create(Path.Combine(StoreDirectory, profile.UserName, "profile.jpg.error")).Dispose(); Log(String.Format(@"Error Profile Photo Thumbnail: UserName={0}", profile.UserName)); } } if (!String.IsNullOrEmpty(profile.ProfileThumbnailImageURL) && !File.Exists(Path.Combine(StoreDirectory, profile.UserName, "profile_sm.jpg")) && !File.Exists(Path.Combine(StoreDirectory, profile.UserName, "profile_sm.jpg.error"))) { bool success = CrawlUtil.GetFile(profile.ProfileThumbnailImageURL, Path.Combine(StoreDirectory, profile.UserName, "profile_sm.jpg"), 30000); if (!success) { File.Create(Path.Combine(StoreDirectory, profile.UserName, "profile_sm.jpg.error")).Dispose(); Log(String.Format(@"Error Profile Photo Thumbnail: UserName={0}", profile.UserName)); } } #endregion #region Photos Albums if (profile.Photos != null && profile.Photos.Count > 0) { Log(String.Format(@"Download Photos: UserName={0}", profile.UserName)); #region Ensure Photos directory exists String photoAlbumsPath = Path.Combine(UserNameDirectoryPath(profile.UserName), "Photos"); if (!Directory.Exists(photoAlbumsPath)) { Directory.CreateDirectory(photoAlbumsPath); } #endregion foreach (PhotoEntry entry in profile.Photos) { try { if (!String.IsNullOrEmpty(entry.PhotoID)) { String picturePath = photoAlbumsPath; if (!String.IsNullOrEmpty(entry.AlbumName)) { #region Ensure Photo album directory exists picturePath = Path.Combine(picturePath, entry.AlbumName); if (!Directory.Exists(picturePath)) { Directory.CreateDirectory(picturePath); } #endregion #region Download Thumbnail if (!String.IsNullOrEmpty(entry.ThumbnailImageURL) && !File.Exists(Path.Combine(picturePath, String.Format("{0}_sm.jpg", entry.PhotoID))) && !File.Exists(Path.Combine(picturePath, String.Format("{0}_sm.error", entry.PhotoID)))) { Log(String.Format(@"Download Photo Thumbnail: UserName={0}, PhotoID={1}, Album={2}", profile.UserName, entry.PhotoID, entry.AlbumName)); bool success = CrawlUtil.GetFile(entry.ThumbnailImageURL, Path.Combine(picturePath, String.Format("{0}_sm.jpg", entry.PhotoID)), 30000); if (!success) { File.Create(Path.Combine(picturePath, String.Format("{0}_sm.error", entry.PhotoID))).Dispose(); Log(String.Format(@"Error Downloading Photo Thumbnail: UserName={0}, PhotoID={1}, Album={2}", profile.UserName, entry.PhotoID, entry.AlbumName)); } } #endregion #region Download Full Photo if (!String.IsNullOrEmpty(entry.FullImageURL) && !File.Exists(Path.Combine(picturePath, String.Format("{0}.jpg", entry.PhotoID))) && !File.Exists(Path.Combine(picturePath, String.Format("{0}.error", entry.PhotoID)))) { Log(String.Format(@"Download Photo: UserName={0}, PhotoID={1}, Album={2}", profile.UserName, entry.PhotoID, entry.AlbumName)); bool success = CrawlUtil.GetFile(entry.FullImageURL, Path.Combine(picturePath, String.Format("{0}.jpg", entry.PhotoID)), 30000); if (!success) { File.Create(Path.Combine(picturePath, String.Format("{0}.error", entry.PhotoID))).Dispose(); Log(String.Format(@"Error Downloading Photo: UserName={0}, PhotoID={1}, Album={2}", profile.UserName, entry.PhotoID, entry.AlbumName)); } } #endregion } //Wait between each photo. Thread.Sleep(CrawlUtil.GetVariableDelay(200)); } } catch (Exception e) { Log(String.Format(@"Error Downloading Photo: UserName={0}, PhotoID={1}", profile.UserName, entry?.PhotoID)); } } } #endregion #region Song Artwork if (profile.Songs != null && profile.Songs.Count > 0) { Log(String.Format(@"Download Song Artwork: UserName={0}", profile.UserName)); #region Ensure Photos directory exists String songArtworkPath = Path.Combine(UserNameDirectoryPath(profile.UserName), "Song_Artwork"); if (!Directory.Exists(songArtworkPath)) { Directory.CreateDirectory(songArtworkPath); } #endregion foreach (SongEntry entry in profile.Songs) { #region Download Thumbnail String thumbnailFileName = entry.ImageThumbnailURL?.Replace(@"/", "___")?.Replace(":", "---"); if (!String.IsNullOrEmpty(thumbnailFileName) && !File.Exists(Path.Combine(songArtworkPath, thumbnailFileName)) && !File.Exists(Path.Combine(songArtworkPath, String.Format("{0}.error", thumbnailFileName)))) { Log(String.Format(@"Download Song Artwork Thumbnail: UserName={0}, Name={1}", profile.UserName, thumbnailFileName)); bool success = CrawlUtil.GetFile(entry.ImageThumbnailURL, Path.Combine(songArtworkPath, thumbnailFileName), 30000); if (!success) { File.Create(Path.Combine(songArtworkPath, String.Format("{0}.error", thumbnailFileName))).Dispose(); Log(String.Format(@"Error Downloading Song Artwork Thumbnail: UserName={0}, Name={1}", profile.UserName, thumbnailFileName)); } } #endregion #region Download Full Image String imageFileName = entry.ImageURL?.Replace(@"/", "___")?.Replace(":", "---"); if (!String.IsNullOrEmpty(imageFileName) && !File.Exists(Path.Combine(songArtworkPath, imageFileName)) && !File.Exists(Path.Combine(songArtworkPath, String.Format("{0}.error", imageFileName)))) { Log(String.Format(@"Download Song Artwork: UserName={0}, Name={1}", profile.UserName, imageFileName)); bool success = CrawlUtil.GetFile(entry.ImageURL, Path.Combine(songArtworkPath, imageFileName), 60000); if (!success) { File.Create(Path.Combine(songArtworkPath, String.Format("{0}.error", imageFileName))).Dispose(); Log(String.Format(@"Error Downloading Song Artwork: UserName={0}, Name={1}", profile.UserName, imageFileName)); } } #endregion //Wait between each photo. Thread.Sleep(CrawlUtil.GetVariableDelay(200)); } } #endregion } #endregion } } else { Log(String.Format(@"Load Profile: UserName={0}", userName)); try { profile = JsonConvert.DeserializeObject <Profile>(File.ReadAllText(UserNameProfilePath(userName))); } catch (Exception e) { Log(String.Format(@"Error Loading Profile: UserName={0}, {1}", userName, e?.Message)); } } if (profile != null) { CompleteFile completeFile = null; #region Ensure complete file has been created and populated if (!File.Exists(UserNameCompletePath(userName))) { try { completeFile = new CompleteFile(userName); if (profile.Connections != null) { foreach (ConnectionEntry c in profile.Connections) { completeFile.AllConnectionsParsed = false; completeFile.ConnectionsParsed.Add( new ConnectionParsedEntry() { UserName = c.UserName, Parsed = false, DateParsed = null } ); } } File.WriteAllText( UserNameCompletePath(userName), JsonConvert.SerializeObject(completeFile, Formatting.Indented)); } catch (Exception e) { Log(String.Format(@"Error Creating Complete File: UserName={0}", e?.Message)); } } else { completeFile = JsonConvert.DeserializeObject <CompleteFile>(File.ReadAllText(UserNameCompletePath(userName))); } #endregion foreach (ConnectionEntry c in profile.Connections) { ConnectionParsedEntry parsedEntry = completeFile.ConnectionsParsed.Where(x => String.Equals(x.UserName, c.UserName, StringComparison.OrdinalIgnoreCase)).FirstOrDefault(); if (parsedEntry != null) { if (!parsedEntry.Parsed) { CrawlNode(c.UserName, DelayBetweenPages, depth + 1); #region Update complete file try { parsedEntry.Parsed = true; parsedEntry.DateParsed = DateTime.Now; File.WriteAllText( UserNameCompletePath(userName), JsonConvert.SerializeObject(completeFile, Formatting.Indented)); } catch (Exception p) { Log(String.Format(@"Error Updating Complete Entry: UserName={0}, ConnectionUserName={1}, {2]", userName, c.UserName, p?.Message)); } #endregion } } else { Log(String.Format(@"Error Missing Connection Complete Entry: UserName={0}, ConnectionUserName={1}", userName, c.UserName)); } } #region Update complete flag try { Log(String.Format(@"All Connections Parsed: UserName={0}", userName)); completeFile.AllConnectionsParsed = true; File.WriteAllText( UserNameCompletePath(userName), JsonConvert.SerializeObject(completeFile, Formatting.Indented)); } catch (Exception p) { Log(String.Format(@"Error Updating Complete File: UserName={0}, {1}", userName, p?.Message)); } #endregion } }
public void Read() { var doc = new HtmlAgilityPack.HtmlDocument(); HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty; doc.OptionWriteEmptyNodes = true; try { var webRequest = HttpWebRequest.Create(String.Format(@"https://myspace.com/{0}/music/songs", UserName)); ((HttpWebRequest)webRequest).UserAgent = CrawlUtil.GetUserAgent(); Stream stream = webRequest.GetResponse().GetResponseStream(); doc.Load(stream); stream.Close(); var songsNode = doc.DocumentNode.SelectNodes(@"//button[@class='playBtn play_25 song']"); if (songsNode != null) { foreach (var songNode in songsNode) { SongEntry entry = new SongEntry(); #region Parse summary page entry.SongID = songNode?.Attributes["data-song-id"]?.Value; entry.SongTitle = songNode?.Attributes["data-title"]?.Value; entry.SongURL = songNode?.Attributes["data-url"]?.Value; if (!String.IsNullOrEmpty(entry.SongURL)) { entry.SongURL = String.Format(@"https://myspace.com{0}", entry.SongURL); } entry.AlbumID = songNode?.Attributes["data-album-id"]?.Value; entry.AlbumTitle = songNode?.Attributes["data-album-title"]?.Value; entry.AlbumURL = songNode?.Attributes["data-album-url"]?.Value; if (!String.IsNullOrEmpty(entry.AlbumURL)) { entry.AlbumURL = String.Format(@"https://myspace.com{0}", entry.AlbumURL); } entry.ArtistID = songNode?.Attributes["data-artist-id"]?.Value; entry.ArtistTitle = songNode?.Attributes["data-artist-name"]?.Value; entry.ArtistURL = songNode?.Attributes["data-artist-url"]?.Value; if (!String.IsNullOrEmpty(entry.ArtistURL)) { entry.ArtistURL = String.Format(@"https://myspace.com{0}", entry.ArtistURL); } entry.DurationInSeconds = songNode?.Attributes["data-duration"]?.Value; entry.VideoID = songNode?.Attributes["data-video-id"]?.Value; entry.YoutubeID = songNode?.Attributes["data-youtube-id"]?.Value; if (!String.IsNullOrEmpty(entry.YoutubeID)) { entry.YoutubeURL = String.Format(@"https://www.youtube.com/watch?v={0}", entry.YoutubeID); } entry.ImageThumbnailURL = songNode?.Attributes["data-image-url"]?.Value; entry.ImageURL = !String.IsNullOrEmpty(entry.ImageThumbnailURL) ? CrawlUtil.ModifyUriFileName(entry.ImageThumbnailURL, x => "full") : null; entry.GenreID = songNode?.Attributes["data-genre-id"]?.Value; entry.GenreName = songNode?.Attributes["data-genre-name"]?.Value; entry.MediaID = songNode?.Attributes["data-media-id"]?.Value; entry.MediaType = songNode?.Attributes["data-media-type"]?.Value; entry.UID = songNode?.Attributes["data-uid"]?.Value; String isPremiumFlag = songNode?.Attributes["data-is-premium"]?.Value; entry.IsPremium = isPremiumFlag != null && isPremiumFlag.ToLower().Equals("true"); String isExplicitFlag = songNode?.Attributes["data-is-explicit"]?.Value; entry.IsExplicit = isExplicitFlag != null && isExplicitFlag.ToLower().Equals("true"); String isFullLength = songNode?.Attributes["data-is-full-length"]?.Value; entry.IsFullLength = isFullLength != null && isFullLength.ToLower().Equals("true"); String isAdsProhibited = songNode?.Attributes["data-ads-prohibited"]?.Value; entry.IsAdsProhibited = isAdsProhibited != null && isAdsProhibited.ToLower().Equals("true"); #endregion #region Parse detial page if (!String.IsNullOrEmpty(entry.SongURL)) { Thread.Sleep(CrawlUtil.GetVariableDelay(DelayBetweenAPIRequests)); try { var detailDoc = new HtmlAgilityPack.HtmlDocument(); HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty; detailDoc.OptionWriteEmptyNodes = true; var webDetailRequest = HttpWebRequest.Create(entry.SongURL); ((HttpWebRequest)webDetailRequest).UserAgent = CrawlUtil.GetUserAgent(); Stream detailStream = webDetailRequest.GetResponse().GetResponseStream(); detailDoc.Load(detailStream); detailStream.Close(); var playsNodes = detailDoc.DocumentNode.SelectNodes(@"//div[@class='plays']"); if (playsNodes != null && playsNodes.Count >= 2 && String.Equals(playsNodes[0]?.InnerText, "PLAYS", StringComparison.OrdinalIgnoreCase)) { entry.PlayCount = playsNodes[1]?.InnerText; } var asideNodes = detailDoc.DocumentNode.SelectNodes(@"//aside[@class='dotted top']"); if (asideNodes != null && asideNodes.Count >= 1 && asideNodes[0]?.InnerText != null && asideNodes[0].InnerText.Contains("Length")) { var songDetailItemNodesDt = asideNodes[0].SelectNodes("//dt"); var songDetailItemNodesDd = asideNodes[0].SelectNodes("//dd"); if (songDetailItemNodesDt != null && songDetailItemNodesDt.Count > 0 && songDetailItemNodesDd != null && songDetailItemNodesDd.Count > 0 && songDetailItemNodesDt.Count == songDetailItemNodesDd.Count) { Dictionary <String, int> tableIndex = new Dictionary <string, int>(); int songDetailItemDtCount = -1; foreach (var item in songDetailItemNodesDt) { songDetailItemDtCount++; tableIndex.Add(item.InnerText, songDetailItemDtCount); } if (tableIndex.ContainsKey("Label") && songDetailItemNodesDd.Count >= tableIndex["Label"]) { int index = tableIndex["Label"]; entry.Label = songDetailItemNodesDd[index]?.InnerText; } if (tableIndex.ContainsKey("Release") && songDetailItemNodesDd.Count >= tableIndex["Release"]) { int index = tableIndex["Release"]; entry.ReleaseDate = songDetailItemNodesDd[index]?.InnerText; } } } } catch (Exception e2) { } } #endregion if (!String.IsNullOrEmpty(entry.SongID)) { Songs.Add(entry); } } } } catch (Exception e) { } }
public PhotoStreamResponse RequestPhotoStream(String startingImageID) { PhotoStreamResponse fullResponse = new PhotoStreamResponse(); try { HttpWebRequest request = BuildRequest(startingImageID); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Encoding responseEncoding = Encoding.GetEncoding(response.CharacterSet); String result = ""; using (StreamReader sr = new StreamReader(response.GetResponseStream(), responseEncoding)) { result = sr.ReadToEnd(); JObject model = JObject.Parse(result); fullResponse.EndOfPhotos = (bool)model["endOfPhotos"]; String htmlDocument = (String)model["view"]; var doc = new HtmlAgilityPack.HtmlDocument(); HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty; doc.OptionWriteEmptyNodes = true; doc.LoadHtml(htmlDocument); var photoNodes = doc.DocumentNode.SelectNodes(String.Format("//ul[@id='photosContainer']//li")); if (photoNodes != null) { foreach (var photoNode in photoNodes) { PhotoEntry entry = new PhotoEntry(); entry.Caption = photoNode.SelectSingleNode("div//div//span[@class='photoCaption postText']")?.InnerText; entry.ThumbnailImageURL = photoNode.SelectSingleNode("a//img")?.Attributes["src"]?.Value; entry.FullImageURL = !String.IsNullOrEmpty(entry.ThumbnailImageURL) ? CrawlUtil.ModifyUriFileName(entry.ThumbnailImageURL, x => "full") : null; entry.PhotoID = photoNode.Attributes["data-photoId"]?.Value; entry.AlbumName = photoNode.SelectSingleNode("span[@itemprop='name']")?.InnerText; entry.DetailPageURL = photoNode.SelectSingleNode("a")?.Attributes["content"]?.Value; if (!String.IsNullOrEmpty(entry.DetailPageURL)) { entry.DetailPageURL = String.Format(@"https://myspace.com{0}", entry.DetailPageURL); ParseDetailPage(entry, entry.DetailPageURL); } if (!String.IsNullOrEmpty(entry.PhotoID)) { fullResponse.PhotosEntries.Add(entry); } } } } } catch (Exception e) { fullResponse.Error = e.Message; } if (fullResponse.PhotosEntries != null & fullResponse.PhotosEntries.Count > 0) { fullResponse.LastPhotoID = fullResponse.PhotosEntries[fullResponse.PhotosEntries.Count - 1]?.PhotoID; } return(fullResponse); }
/// <summary> /// Startdownload process. /// </summary> public void Download() { #region Ensure path exists String path = UserNameDirectoryPath(UserName); if (!Directory.Exists(path)) { Directory.CreateDirectory(path); } #endregion Logger = new Logger(UserNameDirectoryPath(UserName), String.Format("download_{0}", UserName)); Logger.Log(String.Format(@"Start Download Process: UserName={0}", UserName)); if (!File.Exists(UserNameProfilePath(UserName))) { #region Download Profile Logger.Log(String.Format(@"Download Profile: UserName={0}", UserName)); ProfileParser profileParse = new ProfileParser(); Profile = profileParse.Parse(Logger, HashKey, UserName, 250, 200, CaptureConnections); if (Profile == null) { Logger.Log(String.Format(@"Empty Profile: UserName={0}", UserName)); return; } Logger.Log(String.Format(@"Downloaded Profile: UserName={0}", UserName)); try { File.WriteAllText( UserNameProfilePath(UserName), JsonConvert.SerializeObject(Profile, Formatting.Indented)); } catch (Exception e) { Logger.Log(String.Format(@"Error Saving Profile: UserName={0}, {1}", UserName, e?.Message)); return; } Logger.Log(String.Format(@"Save Profile JSON: UserName={0}", UserName)); #endregion } else { #region Load Profile JSON Logger.Log(String.Format(@"Load Profile JSON: UserName={0}", UserName)); try { Profile = JsonConvert.DeserializeObject <Profile>(File.ReadAllText(UserNameProfilePath(UserName))); if (Profile == null) { Logger.Log(String.Format(@"Empty Profile: UserName={0}", UserName)); return; } } catch (Exception e) { Logger.Log(String.Format(@"Error loading profile: UserName={0}, {1}", UserName, e?.Message)); return; } Logger.Log(String.Format(@"Loaded Profile JSON: UserName={0}", UserName)); #endregion } //Download additional files if passes check if (CapturePhotos) { if (DownloadPhotosCheck == null || (Profile != null && DownloadPhotosCheck.Invoke(Profile))) { #region Profile Photos Logger.Log(String.Format(@"Download Profile Photos: UserName={0}", UserName)); if (!String.IsNullOrEmpty(Profile.ProfileImageURL) && !File.Exists(Path.Combine(StoreDirectory, UserName, "profile.jpg")) && !File.Exists(Path.Combine(StoreDirectory, UserName, "profile.jpg.error"))) { bool success = CrawlUtil.GetFile(Profile.ProfileImageURL, Path.Combine(StoreDirectory, UserName, "profile.jpg"), 30000); if (!success) { File.Create(Path.Combine(StoreDirectory, UserName, "profile.jpg.error")).Dispose(); Logger.Log(String.Format(@"Error Profile Photo Thumbnail: UserName={0}", UserName)); } } if (!String.IsNullOrEmpty(Profile.ProfileThumbnailImageURL) && !File.Exists(Path.Combine(StoreDirectory, UserName, "profile_sm.jpg")) && !File.Exists(Path.Combine(StoreDirectory, UserName, "profile_sm.jpg.error"))) { bool success = CrawlUtil.GetFile(Profile.ProfileThumbnailImageURL, Path.Combine(StoreDirectory, UserName, "profile_sm.jpg"), 30000); if (!success) { File.Create(Path.Combine(StoreDirectory, UserName, "profile_sm.jpg.error")).Dispose(); Logger.Log(String.Format(@"Error Profile Photo Thumbnail: UserName={0}", UserName)); } } #endregion #region Photos Albums if (Profile.Photos != null && Profile.Photos.Count > 0) { Logger.Log(String.Format(@"Download Photos: UserName={0}", UserName)); #region Ensure Photos directory exists String photoAlbumsPath = Path.Combine(UserNameDirectoryPath(UserName), "Photos"); if (!Directory.Exists(photoAlbumsPath)) { Directory.CreateDirectory(photoAlbumsPath); } #endregion foreach (PhotoEntry entry in Profile.Photos) { try { if (!String.IsNullOrEmpty(entry.PhotoID)) { String picturePath = photoAlbumsPath; if (!String.IsNullOrEmpty(entry.AlbumName)) { #region Ensure Photo album directory exists picturePath = Path.Combine(picturePath, entry.AlbumName); if (!Directory.Exists(picturePath)) { Directory.CreateDirectory(picturePath); } #endregion #region Download Thumbnail if (!String.IsNullOrEmpty(entry.ThumbnailImageURL) && !File.Exists(Path.Combine(picturePath, String.Format("{0}_sm.jpg", entry.PhotoID))) && !File.Exists(Path.Combine(picturePath, String.Format("{0}_sm.error", entry.PhotoID)))) { Logger.Log(String.Format(@"Download Photo Thumbnail: UserName={0}, PhotoID={1}, Album={2}", UserName, entry.PhotoID, entry.AlbumName)); bool success = CrawlUtil.GetFile(entry.ThumbnailImageURL, Path.Combine(picturePath, String.Format("{0}_sm.jpg", entry.PhotoID)), 30000); if (!success) { File.Create(Path.Combine(picturePath, String.Format("{0}_sm.error", entry.PhotoID))).Dispose(); Logger.Log(String.Format(@"Error Downloading Photo Thumbnail: UserName={0}, PhotoID={1}, Album={2}", UserName, entry.PhotoID, entry.AlbumName)); } } #endregion #region Download Full Photo if (!String.IsNullOrEmpty(entry.FullImageURL) && !File.Exists(Path.Combine(picturePath, String.Format("{0}.jpg", entry.PhotoID))) && !File.Exists(Path.Combine(picturePath, String.Format("{0}.error", entry.PhotoID)))) { Logger.Log(String.Format(@"Download Photo: UserName={0}, PhotoID={1}, Album={2}", UserName, entry.PhotoID, entry.AlbumName)); bool success = CrawlUtil.GetFile(entry.FullImageURL, Path.Combine(picturePath, String.Format("{0}.jpg", entry.PhotoID)), 30000); if (!success) { File.Create(Path.Combine(picturePath, String.Format("{0}.error", entry.PhotoID))).Dispose(); Logger.Log(String.Format(@"Error Downloading Photo: UserName={0}, PhotoID={1}, Album={2}", UserName, entry.PhotoID, entry.AlbumName)); } } #endregion } //Wait between each photo. Thread.Sleep(CrawlUtil.GetVariableDelay(200)); } } catch (Exception e) { Logger.Log(String.Format(@"Error Downloading Photo: UserName={0}, PhotoID={1}", UserName, entry?.PhotoID)); } } } #endregion #region Song Artwork if (Profile.Songs != null && Profile.Songs.Count > 0) { Logger.Log(String.Format(@"Download Song Artwork: UserName={0}", UserName)); #region Ensure Photos directory exists String songArtworkPath = Path.Combine(UserNameDirectoryPath(UserName), "Song_Artwork"); if (!Directory.Exists(songArtworkPath)) { Directory.CreateDirectory(songArtworkPath); } #endregion foreach (SongEntry entry in Profile.Songs) { #region Download Thumbnail String thumbnailFileName = entry.ImageThumbnailURL?.Replace(@"/", "___")?.Replace(":", "---"); if (!String.IsNullOrEmpty(thumbnailFileName) && !File.Exists(Path.Combine(songArtworkPath, thumbnailFileName)) && !File.Exists(Path.Combine(songArtworkPath, String.Format("{0}.error", thumbnailFileName)))) { Logger.Log(String.Format(@"Download Song Artwork Thumbnail: UserName={0}, Name={1}", UserName, thumbnailFileName)); bool success = CrawlUtil.GetFile(entry.ImageThumbnailURL, Path.Combine(songArtworkPath, thumbnailFileName), 30000); if (!success) { File.Create(Path.Combine(songArtworkPath, String.Format("{0}.error", thumbnailFileName))).Dispose(); Logger.Log(String.Format(@"Error Downloading Song Artwork Thumbnail: UserName={0}, Name={1}", UserName, thumbnailFileName)); } } #endregion #region Download Full Image String imageFileName = entry.ImageURL?.Replace(@"/", "___")?.Replace(":", "---"); if (!String.IsNullOrEmpty(imageFileName) && !File.Exists(Path.Combine(songArtworkPath, imageFileName)) && !File.Exists(Path.Combine(songArtworkPath, String.Format("{0}.error", imageFileName)))) { Logger.Log(String.Format(@"Download Song Artwork: UserName={0}, Name={1}", UserName, imageFileName)); bool success = CrawlUtil.GetFile(entry.ImageURL, Path.Combine(songArtworkPath, imageFileName), 60000); if (!success) { File.Create(Path.Combine(songArtworkPath, String.Format("{0}.error", imageFileName))).Dispose(); Logger.Log(String.Format(@"Error Downloading Song Artwork: UserName={0}, Name={1}", UserName, imageFileName)); } } #endregion //Wait between each photo. Thread.Sleep(CrawlUtil.GetVariableDelay(200)); } } #endregion } } Logger.Log(String.Format(@"Done: UserName={0}", UserName)); }
/// <summary> /// Start crawl. /// </summary> public void Crawl() { //Ensure HTTPS will work correctly ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12; List <ProfileLocationRecord> records = new List <ProfileLocationRecord>(); using (Logger log = new Logger(StoreDirectory, "locationDownload")) { //Load existing usernames try { records = JsonConvert.DeserializeObject <List <ProfileLocationRecord> >(File.ReadAllText(Path.Combine(StoreDirectory, "locations.json"))); if (records == null) { log.Log("Could not load locations file."); records = new List <ProfileLocationRecord>(); } } catch (Exception e) { log.Log("Could not load locations file."); records = new List <ProfileLocationRecord>(); } String[] profileFiles = Directory.GetFiles(StoreDirectory, @"*.profile.json", SearchOption.AllDirectories); foreach (String profileFileName in profileFiles) { if (!String.IsNullOrEmpty(profileFileName)) { try { Profile parentProfile = JsonConvert.DeserializeObject <Profile>(File.ReadAllText(profileFileName)); if (parentProfile == null) { log.Log(String.Format(@"Empty Profile: {0}", profileFileName)); } else { log.Log(String.Format("Processing {0}", parentProfile?.UserName)); if (records.Where(x => String.Equals(x.UserName, parentProfile.UserName)).FirstOrDefault() == null) { //Add parent record records.Add(new ProfileLocationRecord() { UserName = parentProfile.UserName, PersonalName = parentProfile.PersonalName, Location = parentProfile.LocationDescription, ConnectedFromLocation = null, Error = null, }); log.Log(String.Format("Added {0}", parentProfile.UserName)); } //Process all connection records int connectionCount = 0; int connectionTotal = parentProfile.Connections != null ? parentProfile.Connections.Count : 0; foreach (ConnectionEntry connection in parentProfile.Connections) { connectionCount++; try { //Check if exists if (records.Where(x => String.Equals(x.UserName, connection.UserName)).FirstOrDefault() == null) { //Get the location, then add the record String locationDescription = CrawlUtil.GetUserLocation(connection.UserName); Thread.Sleep(20); records.Add(new ProfileLocationRecord() { UserName = connection.UserName, PersonalName = connection.PersonalName, Location = locationDescription, ConnectedFromLocation = parentProfile.LocationDescription, Error = null, }); log.Log(String.Format(@"({1}/{2})Added {0}", connection.UserName, connectionCount, connectionTotal)); } else { //log.Log(String.Format("({1}/{2})Skipped {0}", connection.UserName, connectionCount, connectionTotal)); } } catch (Exception e) { log.Log(String.Format("Error: {0}", e?.Message)); } } } } catch (Exception e) { log.Log(String.Format(@"Error loading profile: {0}", profileFileName)); return; } } //Update location file try { File.WriteAllText( Path.Combine(StoreDirectory, "locations.json"), JsonConvert.SerializeObject(records, Formatting.Indented)); } catch (Exception e) { log.Log(String.Format(@"Error Saving Locations: {0}", e?.Message)); return; } } } }