Пример #1
0
        public HttpWebRequest BuildRequest(String startingImageID = null)
        {
            HttpWebRequest rq = (HttpWebRequest)WebRequest
                                .Create(String.Format(@"https://myspace.com/ajax/{0}/photosStream/", UserName));

            rq.UserAgent   = CrawlUtil.GetUserAgent();
            rq.Host        = "myspace.com";
            rq.Method      = "POST";
            rq.Accept      = @"application / json, text / javascript, */*; q=0.01";
            rq.ContentType = @"application/x-www-form-urlencoded; charset=UTF-8";
            rq.Headers.Add(@"Hash", HashKey);

            var postData = String.Format("lastImageId={0}", startingImageID);
            var data     = Encoding.ASCII.GetBytes(postData);

            using (Stream s = rq.GetRequestStream())
            {
                if (!String.IsNullOrEmpty(startingImageID))
                {
                    s.Write(data, 0, data.Length);
                }
            }

            return(rq);
        }
Пример #2
0
        /// <summary>
        /// Parse root profile page.
        /// </summary>
        /// <param name="userName">Profile page to parse.</param>
        private void ParseProfilePage(String userName)
        {
            String profileURL = String.Format(@"https://myspace.com/{0}", userName);
            var    doc        = new HtmlAgilityPack.HtmlDocument();

            HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty;
            doc.OptionWriteEmptyNodes = true;

            try
            {
                ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12;
                var webRequest = HttpWebRequest.Create(profileURL);
                ((HttpWebRequest)webRequest).UserAgent = CrawlUtil.GetUserAgent();
                Stream stream = webRequest.GetResponse().GetResponseStream();
                doc.Load(stream);
                stream.Close();

                Profile.URL      = String.Format(@"https://myspace.com/{0}", userName);
                Profile.UserName = userName;
                Profile.ProfileThumbnailImageURL = doc.DocumentNode.SelectSingleNode(@"//a[@id='profileImage']//img")?.Attributes["src"]?.Value;
                Profile.ProfileImageURL          = !String.IsNullOrEmpty(Profile.ProfileThumbnailImageURL) ? CrawlUtil.ModifyUriFileName(Profile.ProfileThumbnailImageURL, x => "600x600") : null;
                Profile.ProfileID = doc.DocumentNode.SelectSingleNode(@"//div[@class='connectButton notReversed tooltips']")?.Attributes["data-id"]?.Value;
                String privateFlag = doc.DocumentNode.SelectSingleNode(@"//div[@class='connectButton notReversed tooltips']")?.Attributes["data-is-private"]?.Value;
                Profile.IsPrivate           = privateFlag != null && privateFlag.ToLower().Equals("true");
                Profile.PersonalName        = doc.DocumentNode.SelectSingleNode(@"//div[@class='connectButton notReversed tooltips']")?.Attributes["data-title"]?.Value;
                Profile.LocationDescription = doc.DocumentNode.SelectSingleNode(@"//div[@id='profileDetails']//div[@id='locAndWeb']//div[@class='location_white location ']")?.Attributes["data-display-text"]?.Value;
                Profile.Website             = doc.DocumentNode.SelectSingleNode(@"//div[@id='profileDetails']//div[@id='locAndWeb']//div[@class='ribbon_white website ']//a")?.InnerText;
                Profile.OutConnectionTotal  = doc.DocumentNode.SelectSingleNode(String.Format(@"//div[@id='profileDetails']//div[@id='connectionsCount']//a[@href='/{0}/connections/out']//span", Profile.UserName))?.InnerText;
                Profile.InConnectionTotal   = doc.DocumentNode.SelectSingleNode(String.Format(@"//div[@id='profileDetails']//div[@id='connectionsCount']//a[@href='/{0}/connections/in']//span", Profile.UserName))?.InnerText;

                if (!Profile.IsPrivate)
                {
                    var top8FriendsNode = doc.DocumentNode.SelectNodes(@"//div[@class='friendsWrapper']//ul//li//a");
                    if (top8FriendsNode != null)
                    {
                        foreach (var friendNode in top8FriendsNode)
                        {
                            Top8FriendEntry friendEntry = new Top8FriendEntry();
                            friendEntry.UserURL = friendNode?.Attributes["href"]?.Value;
                            if (!String.IsNullOrEmpty(friendEntry.UserURL) && friendEntry.UserURL.StartsWith("/"))
                            {
                                friendEntry.UserURL = string.Format(@"https://myspace.com{0}", friendEntry.UserURL);
                            }
                            friendEntry.ProfileID    = friendNode?.Attributes["data-profileid"]?.Value;
                            friendEntry.ThumbnailURL = friendNode?.Attributes["data-image-url"]?.Value;
                            friendEntry.UserName     = friendNode?.Attributes["data-title"]?.Value;
                            Profile.Top8Friends.Add(friendEntry);
                        }
                    }
                }
            }
            catch (Exception e)
            {
            }
        }
Пример #3
0
        public void ParseDetailPage(PhotoEntry photoEntry, String detailURL)
        {
            var doc = new HtmlAgilityPack.HtmlDocument();

            HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty;
            doc.OptionWriteEmptyNodes = true;

            try
            {
                var webRequest = HttpWebRequest.Create(detailURL);
                ((HttpWebRequest)webRequest).UserAgent = CrawlUtil.GetUserAgent();
                Stream stream = webRequest.GetResponse().GetResponseStream();
                doc.Load(stream);
                stream.Close();

                #region Parse photo properties
                var statsNode = doc.DocumentNode.SelectSingleNode("//div[@class='rr']//header[@class='stats']");
                photoEntry.LikesCount        = statsNode.SelectSingleNode("a[@data-view='likes']//span")?.InnerText;
                photoEntry.ConnectsCount     = statsNode.SelectSingleNode("a[@data-view='connects']//span")?.InnerText;
                photoEntry.CommentsCount     = statsNode.SelectSingleNode("a[@data-view='comments']//span")?.InnerText;
                photoEntry.SharesCount       = doc.DocumentNode.SelectSingleNode("//div[@class='genInfo ']//p[@class='stats']//span")?.InnerText;
                photoEntry.ConnectsEntityKey = doc.DocumentNode.SelectSingleNode("//div[@class='rr']")?.Attributes["data-connects-entity-key"]?.Value;
                #endregion

                #region Parse visible comments
                var commentNodes = doc.DocumentNode.SelectNodes("//ol//li");
                if (commentNodes != null)
                {
                    foreach (var commentNode in commentNodes)
                    {
                        PhotoCommentEntry entry = new PhotoCommentEntry();
                        entry.ProfileURL        = commentNode.SelectSingleNode("div//div//div//a")?.Attributes["href"]?.Value;
                        entry.ThumbnailImageURL = commentNode.SelectSingleNode("a//img")?.Attributes["src"]?.Value;
                        if (!String.IsNullOrEmpty(entry.ProfileURL))
                        {
                            entry.ProfileURL = String.Format(@"https://myspace.com{0}", entry.ProfileURL);
                        }

                        entry.UserName        = commentNode.SelectSingleNode("div//div//div//a")?.InnerText;
                        entry.CommentHTML     = commentNode.SelectSingleNode("div//div//div//span")?.InnerHtml;
                        entry.Comment         = commentNode.SelectSingleNode("div//div//div//span")?.InnerText;
                        entry.DateTimeUTC     = commentNode.SelectSingleNode("div//div[@class='commentFooter']//time")?.Attributes["datetime"]?.Value;
                        entry.DateTimeDisplay = commentNode.SelectSingleNode("div//div[@class='commentFooter']//time")?.InnerText;

                        photoEntry.Comments.Add(entry);
                    }
                }
                #endregion
            }
            catch (Exception e)
            {
            }
        }
Пример #4
0
        /// <summary>
        /// Parse biography information.
        /// </summary>
        private void ParseBio()
        {
            var doc = new HtmlAgilityPack.HtmlDocument();

            HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty;
            doc.OptionWriteEmptyNodes = true;

            try
            {
                var webRequest = HttpWebRequest.Create(String.Format(@"https://myspace.com/{0}/bio", Profile.UserName));
                ((HttpWebRequest)webRequest).UserAgent = CrawlUtil.GetUserAgent();
                Stream stream = webRequest.GetResponse().GetResponseStream();
                doc.Load(stream);
                stream.Close();

                Profile.Biography = doc.DocumentNode.SelectSingleNode(@"//div[@class='mainBio']//div[@class='bioColumns']//div")?.InnerHtml?.ToString()?.Trim();
            }
            catch (Exception e)
            {
            }
        }
Пример #5
0
        public HttpWebRequest BuildRequest(ConnectionDirection direction, int startingIndex = 0)
        {
            String directionURLToken = "";

            switch (direction)
            {
            case ConnectionDirection.Unknown:
                return(null);

            case ConnectionDirection.Out:
                directionURLToken = "out";
                break;

            case ConnectionDirection.In:
                directionURLToken = "in";
                break;
            }

            HttpWebRequest rq = (HttpWebRequest)WebRequest
                                .Create(String.Format(@"https://myspace.com/ajax/{0}/connections/{1}", UserName, directionURLToken));

            rq.UserAgent   = CrawlUtil.GetUserAgent();
            rq.Host        = "myspace.com";
            rq.Method      = "POST";
            rq.Accept      = @"application / json, text / javascript, */*; q=0.01";
            rq.ContentType = @"application/x-www-form-urlencoded; charset=UTF-8";
            rq.Headers.Add(@"Hash", HashKey);

            var postData = String.Format("start={0}", startingIndex);
            var data     = Encoding.ASCII.GetBytes(postData);

            using (Stream s = rq.GetRequestStream())
            {
                s.Write(data, 0, data.Length);
            }

            return(rq);
        }
Пример #6
0
        public void Read()
        {
            var doc = new HtmlAgilityPack.HtmlDocument();

            HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty;
            doc.OptionWriteEmptyNodes = true;

            try
            {
                var webRequest = HttpWebRequest.Create(String.Format(@"https://myspace.com/{0}/music/songs", UserName));
                ((HttpWebRequest)webRequest).UserAgent = CrawlUtil.GetUserAgent();
                Stream stream = webRequest.GetResponse().GetResponseStream();
                doc.Load(stream);
                stream.Close();

                var songsNode = doc.DocumentNode.SelectNodes(@"//button[@class='playBtn play_25 song']");
                if (songsNode != null)
                {
                    foreach (var songNode in songsNode)
                    {
                        SongEntry entry = new SongEntry();

                        #region Parse summary page
                        entry.SongID    = songNode?.Attributes["data-song-id"]?.Value;
                        entry.SongTitle = songNode?.Attributes["data-title"]?.Value;
                        entry.SongURL   = songNode?.Attributes["data-url"]?.Value;
                        if (!String.IsNullOrEmpty(entry.SongURL))
                        {
                            entry.SongURL = String.Format(@"https://myspace.com{0}", entry.SongURL);
                        }

                        entry.AlbumID    = songNode?.Attributes["data-album-id"]?.Value;
                        entry.AlbumTitle = songNode?.Attributes["data-album-title"]?.Value;
                        entry.AlbumURL   = songNode?.Attributes["data-album-url"]?.Value;
                        if (!String.IsNullOrEmpty(entry.AlbumURL))
                        {
                            entry.AlbumURL = String.Format(@"https://myspace.com{0}", entry.AlbumURL);
                        }

                        entry.ArtistID    = songNode?.Attributes["data-artist-id"]?.Value;
                        entry.ArtistTitle = songNode?.Attributes["data-artist-name"]?.Value;
                        entry.ArtistURL   = songNode?.Attributes["data-artist-url"]?.Value;
                        if (!String.IsNullOrEmpty(entry.ArtistURL))
                        {
                            entry.ArtistURL = String.Format(@"https://myspace.com{0}", entry.ArtistURL);
                        }

                        entry.DurationInSeconds = songNode?.Attributes["data-duration"]?.Value;
                        entry.VideoID           = songNode?.Attributes["data-video-id"]?.Value;
                        entry.YoutubeID         = songNode?.Attributes["data-youtube-id"]?.Value;
                        if (!String.IsNullOrEmpty(entry.YoutubeID))
                        {
                            entry.YoutubeURL = String.Format(@"https://www.youtube.com/watch?v={0}", entry.YoutubeID);
                        }

                        entry.ImageThumbnailURL = songNode?.Attributes["data-image-url"]?.Value;
                        entry.ImageURL          = !String.IsNullOrEmpty(entry.ImageThumbnailURL) ? CrawlUtil.ModifyUriFileName(entry.ImageThumbnailURL, x => "full") : null;

                        entry.GenreID   = songNode?.Attributes["data-genre-id"]?.Value;
                        entry.GenreName = songNode?.Attributes["data-genre-name"]?.Value;
                        entry.MediaID   = songNode?.Attributes["data-media-id"]?.Value;
                        entry.MediaType = songNode?.Attributes["data-media-type"]?.Value;
                        entry.UID       = songNode?.Attributes["data-uid"]?.Value;

                        String isPremiumFlag = songNode?.Attributes["data-is-premium"]?.Value;
                        entry.IsPremium = isPremiumFlag != null && isPremiumFlag.ToLower().Equals("true");

                        String isExplicitFlag = songNode?.Attributes["data-is-explicit"]?.Value;
                        entry.IsExplicit = isExplicitFlag != null && isExplicitFlag.ToLower().Equals("true");

                        String isFullLength = songNode?.Attributes["data-is-full-length"]?.Value;
                        entry.IsFullLength = isFullLength != null && isFullLength.ToLower().Equals("true");

                        String isAdsProhibited = songNode?.Attributes["data-ads-prohibited"]?.Value;
                        entry.IsAdsProhibited = isAdsProhibited != null && isAdsProhibited.ToLower().Equals("true");
                        #endregion

                        #region Parse detial page
                        if (!String.IsNullOrEmpty(entry.SongURL))
                        {
                            Thread.Sleep(CrawlUtil.GetVariableDelay(DelayBetweenAPIRequests));
                            try
                            {
                                var detailDoc = new HtmlAgilityPack.HtmlDocument();
                                HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty;
                                detailDoc.OptionWriteEmptyNodes = true;

                                var webDetailRequest = HttpWebRequest.Create(entry.SongURL);
                                ((HttpWebRequest)webDetailRequest).UserAgent = CrawlUtil.GetUserAgent();
                                Stream detailStream = webDetailRequest.GetResponse().GetResponseStream();
                                detailDoc.Load(detailStream);
                                detailStream.Close();

                                var playsNodes = detailDoc.DocumentNode.SelectNodes(@"//div[@class='plays']");
                                if (playsNodes != null &&
                                    playsNodes.Count >= 2 &&
                                    String.Equals(playsNodes[0]?.InnerText, "PLAYS", StringComparison.OrdinalIgnoreCase))
                                {
                                    entry.PlayCount = playsNodes[1]?.InnerText;
                                }

                                var asideNodes = detailDoc.DocumentNode.SelectNodes(@"//aside[@class='dotted top']");
                                if (asideNodes != null &&
                                    asideNodes.Count >= 1 &&
                                    asideNodes[0]?.InnerText != null &&
                                    asideNodes[0].InnerText.Contains("Length"))
                                {
                                    var songDetailItemNodesDt = asideNodes[0].SelectNodes("//dt");
                                    var songDetailItemNodesDd = asideNodes[0].SelectNodes("//dd");
                                    if (songDetailItemNodesDt != null &&
                                        songDetailItemNodesDt.Count > 0 &&
                                        songDetailItemNodesDd != null &&
                                        songDetailItemNodesDd.Count > 0 &&
                                        songDetailItemNodesDt.Count == songDetailItemNodesDd.Count)
                                    {
                                        Dictionary <String, int> tableIndex = new Dictionary <string, int>();
                                        int songDetailItemDtCount           = -1;
                                        foreach (var item in songDetailItemNodesDt)
                                        {
                                            songDetailItemDtCount++;
                                            tableIndex.Add(item.InnerText, songDetailItemDtCount);
                                        }

                                        if (tableIndex.ContainsKey("Label") &&
                                            songDetailItemNodesDd.Count >= tableIndex["Label"])
                                        {
                                            int index = tableIndex["Label"];
                                            entry.Label = songDetailItemNodesDd[index]?.InnerText;
                                        }

                                        if (tableIndex.ContainsKey("Release") &&
                                            songDetailItemNodesDd.Count >= tableIndex["Release"])
                                        {
                                            int index = tableIndex["Release"];
                                            entry.ReleaseDate = songDetailItemNodesDd[index]?.InnerText;
                                        }
                                    }
                                }
                            }
                            catch (Exception e2)
                            {
                            }
                        }
                        #endregion

                        if (!String.IsNullOrEmpty(entry.SongID))
                        {
                            Songs.Add(entry);
                        }
                    }
                }
            }
            catch (Exception e)
            {
            }
        }