Example #1
0
        public void ParseDetailPage(PhotoEntry photoEntry, String detailURL)
        {
            var doc = new HtmlAgilityPack.HtmlDocument();

            HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty;
            doc.OptionWriteEmptyNodes = true;

            try
            {
                var webRequest = HttpWebRequest.Create(detailURL);
                ((HttpWebRequest)webRequest).UserAgent = CrawlUtil.GetUserAgent();
                Stream stream = webRequest.GetResponse().GetResponseStream();
                doc.Load(stream);
                stream.Close();

                #region Parse photo properties
                var statsNode = doc.DocumentNode.SelectSingleNode("//div[@class='rr']//header[@class='stats']");
                photoEntry.LikesCount        = statsNode.SelectSingleNode("a[@data-view='likes']//span")?.InnerText;
                photoEntry.ConnectsCount     = statsNode.SelectSingleNode("a[@data-view='connects']//span")?.InnerText;
                photoEntry.CommentsCount     = statsNode.SelectSingleNode("a[@data-view='comments']//span")?.InnerText;
                photoEntry.SharesCount       = doc.DocumentNode.SelectSingleNode("//div[@class='genInfo ']//p[@class='stats']//span")?.InnerText;
                photoEntry.ConnectsEntityKey = doc.DocumentNode.SelectSingleNode("//div[@class='rr']")?.Attributes["data-connects-entity-key"]?.Value;
                #endregion

                #region Parse visible comments
                var commentNodes = doc.DocumentNode.SelectNodes("//ol//li");
                if (commentNodes != null)
                {
                    foreach (var commentNode in commentNodes)
                    {
                        PhotoCommentEntry entry = new PhotoCommentEntry();
                        entry.ProfileURL        = commentNode.SelectSingleNode("div//div//div//a")?.Attributes["href"]?.Value;
                        entry.ThumbnailImageURL = commentNode.SelectSingleNode("a//img")?.Attributes["src"]?.Value;
                        if (!String.IsNullOrEmpty(entry.ProfileURL))
                        {
                            entry.ProfileURL = String.Format(@"https://myspace.com{0}", entry.ProfileURL);
                        }

                        entry.UserName        = commentNode.SelectSingleNode("div//div//div//a")?.InnerText;
                        entry.CommentHTML     = commentNode.SelectSingleNode("div//div//div//span")?.InnerHtml;
                        entry.Comment         = commentNode.SelectSingleNode("div//div//div//span")?.InnerText;
                        entry.DateTimeUTC     = commentNode.SelectSingleNode("div//div[@class='commentFooter']//time")?.Attributes["datetime"]?.Value;
                        entry.DateTimeDisplay = commentNode.SelectSingleNode("div//div[@class='commentFooter']//time")?.InnerText;

                        photoEntry.Comments.Add(entry);
                    }
                }
                #endregion
            }
            catch (Exception e)
            {
            }
        }
Example #2
0
        public PhotoStreamResponse RequestPhotoStream(String startingImageID)
        {
            PhotoStreamResponse fullResponse = new PhotoStreamResponse();

            try
            {
                HttpWebRequest  request          = BuildRequest(startingImageID);
                HttpWebResponse response         = (HttpWebResponse)request.GetResponse();
                Encoding        responseEncoding = Encoding.GetEncoding(response.CharacterSet);
                String          result           = "";
                using (StreamReader sr = new StreamReader(response.GetResponseStream(), responseEncoding))
                {
                    result = sr.ReadToEnd();
                    JObject model = JObject.Parse(result);

                    fullResponse.EndOfPhotos = (bool)model["endOfPhotos"];

                    String htmlDocument = (String)model["view"];

                    var doc = new HtmlAgilityPack.HtmlDocument();
                    HtmlAgilityPack.HtmlNode.ElementsFlags["br"] = HtmlAgilityPack.HtmlElementFlag.Empty;
                    doc.OptionWriteEmptyNodes = true;

                    doc.LoadHtml(htmlDocument);

                    var photoNodes = doc.DocumentNode.SelectNodes(String.Format("//ul[@id='photosContainer']//li"));
                    if (photoNodes != null)
                    {
                        foreach (var photoNode in photoNodes)
                        {
                            PhotoEntry entry = new PhotoEntry();
                            entry.Caption           = photoNode.SelectSingleNode("div//div//span[@class='photoCaption postText']")?.InnerText;
                            entry.ThumbnailImageURL = photoNode.SelectSingleNode("a//img")?.Attributes["src"]?.Value;
                            entry.FullImageURL      = !String.IsNullOrEmpty(entry.ThumbnailImageURL) ? CrawlUtil.ModifyUriFileName(entry.ThumbnailImageURL, x => "full") : null;
                            entry.PhotoID           = photoNode.Attributes["data-photoId"]?.Value;
                            entry.AlbumName         = photoNode.SelectSingleNode("span[@itemprop='name']")?.InnerText;
                            entry.DetailPageURL     = photoNode.SelectSingleNode("a")?.Attributes["content"]?.Value;

                            if (!String.IsNullOrEmpty(entry.DetailPageURL))
                            {
                                entry.DetailPageURL = String.Format(@"https://myspace.com{0}", entry.DetailPageURL);
                                ParseDetailPage(entry, entry.DetailPageURL);
                            }

                            if (!String.IsNullOrEmpty(entry.PhotoID))
                            {
                                fullResponse.PhotosEntries.Add(entry);
                            }
                        }
                    }
                }
            }
            catch (Exception e)
            {
                fullResponse.Error = e.Message;
            }

            if (fullResponse.PhotosEntries != null & fullResponse.PhotosEntries.Count > 0)
            {
                fullResponse.LastPhotoID = fullResponse.PhotosEntries[fullResponse.PhotosEntries.Count - 1]?.PhotoID;
            }

            return(fullResponse);
        }