Beispiel #1
0
        protected virtual SocialBaseSnapshot GetSnapshot(SocialBaseAccount account)
        //where Snapshot : SocialBaseSnapshot
        {
            // Construct the url
            string relativeUrl = String.Format(PageFormatString, account.Id); // Eg. /torontomapleleafs/likes
            Uri    url         = new Uri(relativeUrl, UriKind.Relative);

            // Make an http request
            HttpClient httpClient = new HttpClient();

            httpClient.DefaultRequestHeaders.TryAddWithoutValidation("User-Agent", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.69 Safari/537.36");
            httpClient.BaseAddress = new Uri(BaseAddress);

            Task <string> httpResponseMessage = httpClient.GetStringAsync(url);
            string        responseString      = httpResponseMessage.Result;

            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(responseString);

            HtmlNode documentNode = document.DocumentNode;

            SocialBaseSnapshot result = this.ParsePage(documentNode, account);

            result.DateOfSnapshot = DateTime.UtcNow;
            return(result);
        }
Beispiel #2
0
        protected override SocialBaseSnapshot ParsePage(HtmlNode documentNode, SocialBaseAccount account)
        {
            TwitterSnapshot twitterAccountSnapshot = new TwitterSnapshot();

            twitterAccountSnapshot.TwitterAccountId = account.Id;

            if (null == documentNode)
            {
                return(null);
            }

            string   tweetCountXPath = @"//a[@data-element-term='tweet_stats']/strong";
            HtmlNode tweetCount      = documentNode.SelectSingleNode(tweetCountXPath);

            if (null == tweetCount)
            {
                twitterAccountSnapshot.Tweets = -1;
                twitterAccountSnapshot.Log   += "Could not find tweetCount using " + tweetCountXPath + Environment.NewLine;
            }
            else
            {
                twitterAccountSnapshot.Tweets = int.Parse(tweetCount.InnerText, NumberStyles.AllowThousands);
            }

            string   followingCountXPath = @"//a[@data-element-term='following_stats']/strong";
            HtmlNode followingCount      = documentNode.SelectSingleNode(followingCountXPath);

            if (null == followingCount)
            {
                twitterAccountSnapshot.Following = -1;
                twitterAccountSnapshot.Log      += "Could not find followingCount using " + followingCountXPath + Environment.NewLine;
            }
            else
            {
                twitterAccountSnapshot.Following = int.Parse(followingCount.InnerText, NumberStyles.AllowThousands);
            }


            string   followerCountXPath = @"//a[@data-element-term='follower_stats']/strong";
            HtmlNode followerCount      = documentNode.SelectSingleNode(followerCountXPath);

            if (null == followerCount)
            {
                twitterAccountSnapshot.Followers = -1;
                twitterAccountSnapshot.Log      += "Could not find followerCount using " + followerCountXPath + Environment.NewLine;
            }
            else
            {
                twitterAccountSnapshot.Followers = int.Parse(followerCount.InnerText, NumberStyles.AllowThousands);
            }

            return(twitterAccountSnapshot);
        }
Beispiel #3
0
 protected abstract SocialBaseSnapshot ParsePage(HtmlNode documentNode, SocialBaseAccount account);
Beispiel #4
0
        protected override SocialBaseSnapshot ParsePage(HtmlNode documentNode, SocialBaseAccount account)
        {
            FacebookSnapshot accountSnapshot = new FacebookSnapshot();

            accountSnapshot.FacebookAccountId = account.Id;

            if (null == documentNode)
            {
                return(null);
            }

            // We need to strip out comment tags. Facebook puts the this data in comment tags and HAP does not parse through comments.
            documentNode.InnerHtml = documentNode.InnerHtml.Replace("<!--", String.Empty).Replace("-->", String.Empty);

            string             totalLikesXPath = @"//h3[text() = 'Total Likes']/../../../../div/span[@class='timelineLikesBigNumber fsm']";
            HtmlNode           totalLikes      = documentNode.SelectSingleNode(totalLikesXPath);
            HtmlNodeCollection likes           = documentNode.SelectNodes(totalLikesXPath);

            if (null == totalLikes)
            {
                accountSnapshot.TotalLikes = -1;
                accountSnapshot.Log       += "Could not find totalLikes using " + totalLikesXPath + Environment.NewLine;
            }
            else
            {
                accountSnapshot.TotalLikes = int.Parse(totalLikes.InnerText, NumberStyles.AllowThousands);
            }

            string   peopleTalkingAboutThisXPath = @"//h3[text() = 'People Talking About This']/../../../../div/span[@class='timelineLikesBigNumber fsm']";
            HtmlNode peopleTalkingAboutThis      = documentNode.SelectSingleNode(peopleTalkingAboutThisXPath);

            if (null == peopleTalkingAboutThis)
            {
                accountSnapshot.PeopleTalkingAboutThis = -1;
                accountSnapshot.Log += "Could not find peopleTalkingAboutThis using " + peopleTalkingAboutThisXPath + Environment.NewLine;
            }
            else
            {
                accountSnapshot.PeopleTalkingAboutThis = int.Parse(peopleTalkingAboutThis.InnerText, NumberStyles.AllowThousands);
            }

            string   mostPopularWeekXPath = @"//span[text()='Most Popular Week']";
            HtmlNode mostPopularWeek      = documentNode.SelectSingleNode(mostPopularWeekXPath);

            if (null != mostPopularWeek && null != mostPopularWeek.PreviousSibling)
            {
                accountSnapshot.MostPopularWeek = DateTime.Parse(mostPopularWeek.PreviousSibling.InnerText);
            }
            else
            {
                accountSnapshot.MostPopularWeek = new DateTime(1900, 1, 1);
                accountSnapshot.Log            += "Could not find mostPopularWeek using " + mostPopularWeekXPath + Environment.NewLine;
            }

            string   mostPopularCityXPath = @"//span[text()='Most Popular City']";
            HtmlNode mostPopularCity      = documentNode.SelectSingleNode(mostPopularCityXPath);

            if (null != mostPopularCity && null != mostPopularCity.PreviousSibling)
            {
                accountSnapshot.MostPopularCity = mostPopularCity.PreviousSibling.InnerText;
            }
            else
            {
                accountSnapshot.MostPopularCity = String.Empty;
                accountSnapshot.Log            += "Could not find mostPopularCity using " + mostPopularCityXPath + Environment.NewLine;
            }

            string   mostPopularAgeGroupXPath = @"//span[text()='Most Popular Age Group']";
            HtmlNode mostPopularAgeGroup      = documentNode.SelectSingleNode(mostPopularAgeGroupXPath);

            if (null != mostPopularAgeGroup && null != mostPopularAgeGroup.PreviousSibling)
            {
                accountSnapshot.MostPopularAgeGroup = mostPopularAgeGroup.PreviousSibling.InnerText;
            }
            else
            {
                accountSnapshot.MostPopularAgeGroup = String.Empty;
                accountSnapshot.Log += "Could not find mostPopularAgeGroup using " + mostPopularAgeGroupXPath + Environment.NewLine;
            }

            return(accountSnapshot);
        }