protected virtual SocialBaseSnapshot GetSnapshot(SocialBaseAccount account) //where Snapshot : SocialBaseSnapshot { // Construct the url string relativeUrl = String.Format(PageFormatString, account.Id); // Eg. /torontomapleleafs/likes Uri url = new Uri(relativeUrl, UriKind.Relative); // Make an http request HttpClient httpClient = new HttpClient(); httpClient.DefaultRequestHeaders.TryAddWithoutValidation("User-Agent", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.69 Safari/537.36"); httpClient.BaseAddress = new Uri(BaseAddress); Task <string> httpResponseMessage = httpClient.GetStringAsync(url); string responseString = httpResponseMessage.Result; HtmlDocument document = new HtmlDocument(); document.LoadHtml(responseString); HtmlNode documentNode = document.DocumentNode; SocialBaseSnapshot result = this.ParsePage(documentNode, account); result.DateOfSnapshot = DateTime.UtcNow; return(result); }
protected override SocialBaseSnapshot ParsePage(HtmlNode documentNode, SocialBaseAccount account) { TwitterSnapshot twitterAccountSnapshot = new TwitterSnapshot(); twitterAccountSnapshot.TwitterAccountId = account.Id; if (null == documentNode) { return(null); } string tweetCountXPath = @"//a[@data-element-term='tweet_stats']/strong"; HtmlNode tweetCount = documentNode.SelectSingleNode(tweetCountXPath); if (null == tweetCount) { twitterAccountSnapshot.Tweets = -1; twitterAccountSnapshot.Log += "Could not find tweetCount using " + tweetCountXPath + Environment.NewLine; } else { twitterAccountSnapshot.Tweets = int.Parse(tweetCount.InnerText, NumberStyles.AllowThousands); } string followingCountXPath = @"//a[@data-element-term='following_stats']/strong"; HtmlNode followingCount = documentNode.SelectSingleNode(followingCountXPath); if (null == followingCount) { twitterAccountSnapshot.Following = -1; twitterAccountSnapshot.Log += "Could not find followingCount using " + followingCountXPath + Environment.NewLine; } else { twitterAccountSnapshot.Following = int.Parse(followingCount.InnerText, NumberStyles.AllowThousands); } string followerCountXPath = @"//a[@data-element-term='follower_stats']/strong"; HtmlNode followerCount = documentNode.SelectSingleNode(followerCountXPath); if (null == followerCount) { twitterAccountSnapshot.Followers = -1; twitterAccountSnapshot.Log += "Could not find followerCount using " + followerCountXPath + Environment.NewLine; } else { twitterAccountSnapshot.Followers = int.Parse(followerCount.InnerText, NumberStyles.AllowThousands); } return(twitterAccountSnapshot); }
protected abstract SocialBaseSnapshot ParsePage(HtmlNode documentNode, SocialBaseAccount account);
protected override SocialBaseSnapshot ParsePage(HtmlNode documentNode, SocialBaseAccount account) { FacebookSnapshot accountSnapshot = new FacebookSnapshot(); accountSnapshot.FacebookAccountId = account.Id; if (null == documentNode) { return(null); } // We need to strip out comment tags. Facebook puts the this data in comment tags and HAP does not parse through comments. documentNode.InnerHtml = documentNode.InnerHtml.Replace("<!--", String.Empty).Replace("-->", String.Empty); string totalLikesXPath = @"//h3[text() = 'Total Likes']/../../../../div/span[@class='timelineLikesBigNumber fsm']"; HtmlNode totalLikes = documentNode.SelectSingleNode(totalLikesXPath); HtmlNodeCollection likes = documentNode.SelectNodes(totalLikesXPath); if (null == totalLikes) { accountSnapshot.TotalLikes = -1; accountSnapshot.Log += "Could not find totalLikes using " + totalLikesXPath + Environment.NewLine; } else { accountSnapshot.TotalLikes = int.Parse(totalLikes.InnerText, NumberStyles.AllowThousands); } string peopleTalkingAboutThisXPath = @"//h3[text() = 'People Talking About This']/../../../../div/span[@class='timelineLikesBigNumber fsm']"; HtmlNode peopleTalkingAboutThis = documentNode.SelectSingleNode(peopleTalkingAboutThisXPath); if (null == peopleTalkingAboutThis) { accountSnapshot.PeopleTalkingAboutThis = -1; accountSnapshot.Log += "Could not find peopleTalkingAboutThis using " + peopleTalkingAboutThisXPath + Environment.NewLine; } else { accountSnapshot.PeopleTalkingAboutThis = int.Parse(peopleTalkingAboutThis.InnerText, NumberStyles.AllowThousands); } string mostPopularWeekXPath = @"//span[text()='Most Popular Week']"; HtmlNode mostPopularWeek = documentNode.SelectSingleNode(mostPopularWeekXPath); if (null != mostPopularWeek && null != mostPopularWeek.PreviousSibling) { accountSnapshot.MostPopularWeek = DateTime.Parse(mostPopularWeek.PreviousSibling.InnerText); } else { accountSnapshot.MostPopularWeek = new DateTime(1900, 1, 1); accountSnapshot.Log += "Could not find mostPopularWeek using " + mostPopularWeekXPath + Environment.NewLine; } string mostPopularCityXPath = @"//span[text()='Most Popular City']"; HtmlNode mostPopularCity = documentNode.SelectSingleNode(mostPopularCityXPath); if (null != mostPopularCity && null != mostPopularCity.PreviousSibling) { accountSnapshot.MostPopularCity = mostPopularCity.PreviousSibling.InnerText; } else { accountSnapshot.MostPopularCity = String.Empty; accountSnapshot.Log += "Could not find mostPopularCity using " + mostPopularCityXPath + Environment.NewLine; } string mostPopularAgeGroupXPath = @"//span[text()='Most Popular Age Group']"; HtmlNode mostPopularAgeGroup = documentNode.SelectSingleNode(mostPopularAgeGroupXPath); if (null != mostPopularAgeGroup && null != mostPopularAgeGroup.PreviousSibling) { accountSnapshot.MostPopularAgeGroup = mostPopularAgeGroup.PreviousSibling.InnerText; } else { accountSnapshot.MostPopularAgeGroup = String.Empty; accountSnapshot.Log += "Could not find mostPopularAgeGroup using " + mostPopularAgeGroupXPath + Environment.NewLine; } return(accountSnapshot); }