public HttpWebRequest BuildRequest(String startingImageID = null) { HttpWebRequest rq = (HttpWebRequest)WebRequest .Create(String.Format(@"https://myspace.com/ajax/{0}/photosStream/", UserName)); rq.UserAgent = CrawlUtil.GetUserAgent(); rq.Host = "myspace.com"; rq.Method = "POST"; rq.Accept = @"application / json, text / javascript, */*; q=0.01"; rq.ContentType = @"application/x-www-form-urlencoded; charset=UTF-8"; rq.Headers.Add(@"Hash", HashKey); var postData = String.Format("lastImageId={0}", startingImageID); var data = Encoding.ASCII.GetBytes(postData); using (Stream s = rq.GetRequestStream()) { if (!String.IsNullOrEmpty(startingImageID)) { s.Write(data, 0, data.Length); } } return(rq); }
/// <summary> /// Parse root profile page. /// </summary> /// <param name="userName">Profile page to parse.</param> private void ParseProfilePage(String userName) { String profileURL = String.Format(@"https://myspace.com/{0}", userName); var doc = new HtmlAgilityPack.HtmlDocument(); HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty; doc.OptionWriteEmptyNodes = true; try { ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12; var webRequest = HttpWebRequest.Create(profileURL); ((HttpWebRequest)webRequest).UserAgent = CrawlUtil.GetUserAgent(); Stream stream = webRequest.GetResponse().GetResponseStream(); doc.Load(stream); stream.Close(); Profile.URL = String.Format(@"https://myspace.com/{0}", userName); Profile.UserName = userName; Profile.ProfileThumbnailImageURL = doc.DocumentNode.SelectSingleNode(@"//a[@id='profileImage']//img")?.Attributes["src"]?.Value; Profile.ProfileImageURL = !String.IsNullOrEmpty(Profile.ProfileThumbnailImageURL) ? CrawlUtil.ModifyUriFileName(Profile.ProfileThumbnailImageURL, x => "600x600") : null; Profile.ProfileID = doc.DocumentNode.SelectSingleNode(@"//div[@class='connectButton notReversed tooltips']")?.Attributes["data-id"]?.Value; String privateFlag = doc.DocumentNode.SelectSingleNode(@"//div[@class='connectButton notReversed tooltips']")?.Attributes["data-is-private"]?.Value; Profile.IsPrivate = privateFlag != null && privateFlag.ToLower().Equals("true"); Profile.PersonalName = doc.DocumentNode.SelectSingleNode(@"//div[@class='connectButton notReversed tooltips']")?.Attributes["data-title"]?.Value; Profile.LocationDescription = doc.DocumentNode.SelectSingleNode(@"//div[@id='profileDetails']//div[@id='locAndWeb']//div[@class='location_white location ']")?.Attributes["data-display-text"]?.Value; Profile.Website = doc.DocumentNode.SelectSingleNode(@"//div[@id='profileDetails']//div[@id='locAndWeb']//div[@class='ribbon_white website ']//a")?.InnerText; Profile.OutConnectionTotal = doc.DocumentNode.SelectSingleNode(String.Format(@"//div[@id='profileDetails']//div[@id='connectionsCount']//a[@href='/{0}/connections/out']//span", Profile.UserName))?.InnerText; Profile.InConnectionTotal = doc.DocumentNode.SelectSingleNode(String.Format(@"//div[@id='profileDetails']//div[@id='connectionsCount']//a[@href='/{0}/connections/in']//span", Profile.UserName))?.InnerText; if (!Profile.IsPrivate) { var top8FriendsNode = doc.DocumentNode.SelectNodes(@"//div[@class='friendsWrapper']//ul//li//a"); if (top8FriendsNode != null) { foreach (var friendNode in top8FriendsNode) { Top8FriendEntry friendEntry = new Top8FriendEntry(); friendEntry.UserURL = friendNode?.Attributes["href"]?.Value; if (!String.IsNullOrEmpty(friendEntry.UserURL) && friendEntry.UserURL.StartsWith("/")) { friendEntry.UserURL = string.Format(@"https://myspace.com{0}", friendEntry.UserURL); } friendEntry.ProfileID = friendNode?.Attributes["data-profileid"]?.Value; friendEntry.ThumbnailURL = friendNode?.Attributes["data-image-url"]?.Value; friendEntry.UserName = friendNode?.Attributes["data-title"]?.Value; Profile.Top8Friends.Add(friendEntry); } } } } catch (Exception e) { } }
public void ParseDetailPage(PhotoEntry photoEntry, String detailURL) { var doc = new HtmlAgilityPack.HtmlDocument(); HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty; doc.OptionWriteEmptyNodes = true; try { var webRequest = HttpWebRequest.Create(detailURL); ((HttpWebRequest)webRequest).UserAgent = CrawlUtil.GetUserAgent(); Stream stream = webRequest.GetResponse().GetResponseStream(); doc.Load(stream); stream.Close(); #region Parse photo properties var statsNode = doc.DocumentNode.SelectSingleNode("//div[@class='rr']//header[@class='stats']"); photoEntry.LikesCount = statsNode.SelectSingleNode("a[@data-view='likes']//span")?.InnerText; photoEntry.ConnectsCount = statsNode.SelectSingleNode("a[@data-view='connects']//span")?.InnerText; photoEntry.CommentsCount = statsNode.SelectSingleNode("a[@data-view='comments']//span")?.InnerText; photoEntry.SharesCount = doc.DocumentNode.SelectSingleNode("//div[@class='genInfo ']//p[@class='stats']//span")?.InnerText; photoEntry.ConnectsEntityKey = doc.DocumentNode.SelectSingleNode("//div[@class='rr']")?.Attributes["data-connects-entity-key"]?.Value; #endregion #region Parse visible comments var commentNodes = doc.DocumentNode.SelectNodes("//ol//li"); if (commentNodes != null) { foreach (var commentNode in commentNodes) { PhotoCommentEntry entry = new PhotoCommentEntry(); entry.ProfileURL = commentNode.SelectSingleNode("div//div//div//a")?.Attributes["href"]?.Value; entry.ThumbnailImageURL = commentNode.SelectSingleNode("a//img")?.Attributes["src"]?.Value; if (!String.IsNullOrEmpty(entry.ProfileURL)) { entry.ProfileURL = String.Format(@"https://myspace.com{0}", entry.ProfileURL); } entry.UserName = commentNode.SelectSingleNode("div//div//div//a")?.InnerText; entry.CommentHTML = commentNode.SelectSingleNode("div//div//div//span")?.InnerHtml; entry.Comment = commentNode.SelectSingleNode("div//div//div//span")?.InnerText; entry.DateTimeUTC = commentNode.SelectSingleNode("div//div[@class='commentFooter']//time")?.Attributes["datetime"]?.Value; entry.DateTimeDisplay = commentNode.SelectSingleNode("div//div[@class='commentFooter']//time")?.InnerText; photoEntry.Comments.Add(entry); } } #endregion } catch (Exception e) { } }
/// <summary> /// Parse biography information. /// </summary> private void ParseBio() { var doc = new HtmlAgilityPack.HtmlDocument(); HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty; doc.OptionWriteEmptyNodes = true; try { var webRequest = HttpWebRequest.Create(String.Format(@"https://myspace.com/{0}/bio", Profile.UserName)); ((HttpWebRequest)webRequest).UserAgent = CrawlUtil.GetUserAgent(); Stream stream = webRequest.GetResponse().GetResponseStream(); doc.Load(stream); stream.Close(); Profile.Biography = doc.DocumentNode.SelectSingleNode(@"//div[@class='mainBio']//div[@class='bioColumns']//div")?.InnerHtml?.ToString()?.Trim(); } catch (Exception e) { } }
public HttpWebRequest BuildRequest(ConnectionDirection direction, int startingIndex = 0) { String directionURLToken = ""; switch (direction) { case ConnectionDirection.Unknown: return(null); case ConnectionDirection.Out: directionURLToken = "out"; break; case ConnectionDirection.In: directionURLToken = "in"; break; } HttpWebRequest rq = (HttpWebRequest)WebRequest .Create(String.Format(@"https://myspace.com/ajax/{0}/connections/{1}", UserName, directionURLToken)); rq.UserAgent = CrawlUtil.GetUserAgent(); rq.Host = "myspace.com"; rq.Method = "POST"; rq.Accept = @"application / json, text / javascript, */*; q=0.01"; rq.ContentType = @"application/x-www-form-urlencoded; charset=UTF-8"; rq.Headers.Add(@"Hash", HashKey); var postData = String.Format("start={0}", startingIndex); var data = Encoding.ASCII.GetBytes(postData); using (Stream s = rq.GetRequestStream()) { s.Write(data, 0, data.Length); } return(rq); }
public void Read() { var doc = new HtmlAgilityPack.HtmlDocument(); HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty; doc.OptionWriteEmptyNodes = true; try { var webRequest = HttpWebRequest.Create(String.Format(@"https://myspace.com/{0}/music/songs", UserName)); ((HttpWebRequest)webRequest).UserAgent = CrawlUtil.GetUserAgent(); Stream stream = webRequest.GetResponse().GetResponseStream(); doc.Load(stream); stream.Close(); var songsNode = doc.DocumentNode.SelectNodes(@"//button[@class='playBtn play_25 song']"); if (songsNode != null) { foreach (var songNode in songsNode) { SongEntry entry = new SongEntry(); #region Parse summary page entry.SongID = songNode?.Attributes["data-song-id"]?.Value; entry.SongTitle = songNode?.Attributes["data-title"]?.Value; entry.SongURL = songNode?.Attributes["data-url"]?.Value; if (!String.IsNullOrEmpty(entry.SongURL)) { entry.SongURL = String.Format(@"https://myspace.com{0}", entry.SongURL); } entry.AlbumID = songNode?.Attributes["data-album-id"]?.Value; entry.AlbumTitle = songNode?.Attributes["data-album-title"]?.Value; entry.AlbumURL = songNode?.Attributes["data-album-url"]?.Value; if (!String.IsNullOrEmpty(entry.AlbumURL)) { entry.AlbumURL = String.Format(@"https://myspace.com{0}", entry.AlbumURL); } entry.ArtistID = songNode?.Attributes["data-artist-id"]?.Value; entry.ArtistTitle = songNode?.Attributes["data-artist-name"]?.Value; entry.ArtistURL = songNode?.Attributes["data-artist-url"]?.Value; if (!String.IsNullOrEmpty(entry.ArtistURL)) { entry.ArtistURL = String.Format(@"https://myspace.com{0}", entry.ArtistURL); } entry.DurationInSeconds = songNode?.Attributes["data-duration"]?.Value; entry.VideoID = songNode?.Attributes["data-video-id"]?.Value; entry.YoutubeID = songNode?.Attributes["data-youtube-id"]?.Value; if (!String.IsNullOrEmpty(entry.YoutubeID)) { entry.YoutubeURL = String.Format(@"https://www.youtube.com/watch?v={0}", entry.YoutubeID); } entry.ImageThumbnailURL = songNode?.Attributes["data-image-url"]?.Value; entry.ImageURL = !String.IsNullOrEmpty(entry.ImageThumbnailURL) ? CrawlUtil.ModifyUriFileName(entry.ImageThumbnailURL, x => "full") : null; entry.GenreID = songNode?.Attributes["data-genre-id"]?.Value; entry.GenreName = songNode?.Attributes["data-genre-name"]?.Value; entry.MediaID = songNode?.Attributes["data-media-id"]?.Value; entry.MediaType = songNode?.Attributes["data-media-type"]?.Value; entry.UID = songNode?.Attributes["data-uid"]?.Value; String isPremiumFlag = songNode?.Attributes["data-is-premium"]?.Value; entry.IsPremium = isPremiumFlag != null && isPremiumFlag.ToLower().Equals("true"); String isExplicitFlag = songNode?.Attributes["data-is-explicit"]?.Value; entry.IsExplicit = isExplicitFlag != null && isExplicitFlag.ToLower().Equals("true"); String isFullLength = songNode?.Attributes["data-is-full-length"]?.Value; entry.IsFullLength = isFullLength != null && isFullLength.ToLower().Equals("true"); String isAdsProhibited = songNode?.Attributes["data-ads-prohibited"]?.Value; entry.IsAdsProhibited = isAdsProhibited != null && isAdsProhibited.ToLower().Equals("true"); #endregion #region Parse detial page if (!String.IsNullOrEmpty(entry.SongURL)) { Thread.Sleep(CrawlUtil.GetVariableDelay(DelayBetweenAPIRequests)); try { var detailDoc = new HtmlAgilityPack.HtmlDocument(); HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty; detailDoc.OptionWriteEmptyNodes = true; var webDetailRequest = HttpWebRequest.Create(entry.SongURL); ((HttpWebRequest)webDetailRequest).UserAgent = CrawlUtil.GetUserAgent(); Stream detailStream = webDetailRequest.GetResponse().GetResponseStream(); detailDoc.Load(detailStream); detailStream.Close(); var playsNodes = detailDoc.DocumentNode.SelectNodes(@"//div[@class='plays']"); if (playsNodes != null && playsNodes.Count >= 2 && String.Equals(playsNodes[0]?.InnerText, "PLAYS", StringComparison.OrdinalIgnoreCase)) { entry.PlayCount = playsNodes[1]?.InnerText; } var asideNodes = detailDoc.DocumentNode.SelectNodes(@"//aside[@class='dotted top']"); if (asideNodes != null && asideNodes.Count >= 1 && asideNodes[0]?.InnerText != null && asideNodes[0].InnerText.Contains("Length")) { var songDetailItemNodesDt = asideNodes[0].SelectNodes("//dt"); var songDetailItemNodesDd = asideNodes[0].SelectNodes("//dd"); if (songDetailItemNodesDt != null && songDetailItemNodesDt.Count > 0 && songDetailItemNodesDd != null && songDetailItemNodesDd.Count > 0 && songDetailItemNodesDt.Count == songDetailItemNodesDd.Count) { Dictionary <String, int> tableIndex = new Dictionary <string, int>(); int songDetailItemDtCount = -1; foreach (var item in songDetailItemNodesDt) { songDetailItemDtCount++; tableIndex.Add(item.InnerText, songDetailItemDtCount); } if (tableIndex.ContainsKey("Label") && songDetailItemNodesDd.Count >= tableIndex["Label"]) { int index = tableIndex["Label"]; entry.Label = songDetailItemNodesDd[index]?.InnerText; } if (tableIndex.ContainsKey("Release") && songDetailItemNodesDd.Count >= tableIndex["Release"]) { int index = tableIndex["Release"]; entry.ReleaseDate = songDetailItemNodesDd[index]?.InnerText; } } } } catch (Exception e2) { } } #endregion if (!String.IsNullOrEmpty(entry.SongID)) { Songs.Add(entry); } } } } catch (Exception e) { } }