コード例 #1
0
        private static List<ModelFeedMultimedia> ScrapeImageDetailfromPage(HtmlDocument doc, string selector)
        {
            List<ModelFeedMultimedia> lstModelFeedImage = new List<ModelFeedMultimedia>();
            ModelFeedMultimedia objModelFeedImage;

            HtmlNodeCollection selectNodesList = doc.DocumentNode.SelectNodes(selector);
            if (selectNodesList != null && selectNodesList.Count > 0)
            {
                foreach (HtmlNode node in selectNodesList)
                {
                    objModelFeedImage = new ModelFeedMultimedia();
                    if (node != null && !String.IsNullOrEmpty(node.Attributes["src"].Value))
                    {
                        string imageURLSrcAttrib = node.Attributes["src"].Value;
                        objModelFeedImage.MultiMediaURL = imageURLSrcAttrib;
                        objModelFeedImage.MultiMediaType = 1;
                        lstModelFeedImage.Add(objModelFeedImage);
                    }
                }
            }
            return lstModelFeedImage;
        }
コード例 #2
0
        public static void GetTwitterTweetsWithDetails(TwitterService service, double feedId, string LastMaxTweetPostedId, ModelFeed modelFeedScrapperPath, List<ModelFeedsDetailScrappingHelper> listScrappingHelper)
        {
            ModelTwitterFeeds modelTwitterFeeds = new ModelTwitterFeeds();
            ModelTwitterFeedsDetails modelTwitterFeedsDetail = new ModelTwitterFeedsDetails();
            ModelFeedMultimedia modelModelFeedMultimedia = new ModelFeedMultimedia();
            List<ModelFeedMultimedia> lstmodelModelFeedImage = new List<ModelFeedMultimedia>();
            List<ModelTwitterFeedsDetails> lstModelTwitterFeedsDetail = new List<ModelTwitterFeedsDetails>();
            BLLFeed objBLLFeed = new BLLFeed();

            string DetailPageURL = String.Empty;
            try
            {
                //TwitterAccount user = new TwitterAccount.
                //TwitterUser twitterUser =
                //>>GET OTHER USER TIMELINE//BeginListTweetsOnHomeTimeline

                var User_OptionInit = new ListTweetsOnUserTimelineOptions { ScreenName = modelFeedScrapperPath.FeedChannelName, Count = 20, ExcludeReplies = true };
                var User_Tweets = service.ListTweetsOnUserTimeline(User_OptionInit);
                /*
                var profilebanner = service.GetUserProfile(new GetUserProfileOptions() { IncludeEntities = true, SkipStatus = true });
                if (profilebanner != null)
                {
                    //profilebanner.ContributorsEnabled
                    //profilebanner.CreatedDate
                    modelTwitterFeeds.UserPageDesc = profilebanner.Description;
                    //profilebanner.FavouritesCount;
                    modelTwitterFeeds.UserPageFollowers = profilebanner.FollowersCount;
                    //profilebanner.FollowRequestSent;
                    //profilebanner.FriendsCount;
                    modelTwitterFeeds.UserPageId = profilebanner.Id;
                    //profilebanner.IsDefaultProfile;
                    //profilebanner.IsGeoEnabled;
                    //profilebanner.IsProfileBackgroundTiled;
                    //profilebanner.IsProtected;
                    //profilebanner.IsTranslator;
                    //profilebanner.IsVerified;
                    modelTwitterFeeds.UserPageLanguage = profilebanner.Language;
                    //profilebanner.ListedCount;
                    //profilebanner.Location;
                    modelTwitterFeeds.UserPageTitle = profilebanner.Name;
                    //profilebanner.ProfileBackgroundColor;
                    //profilebanner.ProfileBackgroundImageUrl;
                    //profilebanner.ProfileBackgroundImageUrlHttps;
                    modelTwitterFeeds.UserPageLogoImage = profilebanner.ProfileImageUrl;
                    //profilebanner.ProfileImageUrlHttps;
                    //profilebanner.ProfileLinkColor;
                    //profilebanner.ProfileSidebarBorderColor;
                    //profilebanner.ProfileSidebarFillColor;
                    //profilebanner.ProfileTextColor;
                    //profilebanner.RawSource;
                    modelTwitterFeeds.UserScreenName = profilebanner.ScreenName;
                    //profilebanner.Status;
                    //profilebanner.StatusesCount;
                    //profilebanner.TimeZone;

                    //JObject o = new JObject(new JProperty("Name", "John Smith"),new JProperty("BirthDate", new DateTime(1983, 3, 20)));
                    JObject obj = (JObject)JsonConvert.DeserializeObject(profilebanner.RawSource);
                    if (obj != null)
                    {
                        if (obj["user"]["profile_banner_url"] != null)
                        {
                            modelTwitterFeeds.UserPageCoverImageURL = (string)obj["user"]["profile_banner_url"];
                        }
                    }
                }
                  */
                string TweetText = String.Empty;
                int tweetCountFirstRun = 0;
                foreach (var tweet in User_Tweets)
                {
                    if (!String.IsNullOrEmpty(LastMaxTweetPostedId))
                    {
                        if (LastMaxTweetPostedId.ToLower().Trim() == tweet.IdStr.ToLower().Trim())
                        {
                            break;
                        }
                    }
                    modelTwitterFeedsDetail = new ModelTwitterFeedsDetails();
                    modelModelFeedMultimedia = new ModelFeedMultimedia();
                    var userDetail = tweet.User;
                    if (tweet != null)
                    {
                        if (tweetCountFirstRun == 0)
                        {
                            if (userDetail != null)
                            {
                                modelTwitterFeeds.FeedId = feedId;
                                modelTwitterFeeds.UserPageId = tweet.User.Id;
                                modelTwitterFeeds.UserPageTitle = tweet.User.Name;
                                modelTwitterFeeds.UserScreenName = tweet.User.ScreenName;
                                modelTwitterFeeds.UserPageDesc = tweet.User.Description;
                                modelTwitterFeeds.UserPageLanguage = tweet.User.Language;
                                modelTwitterFeeds.UserPageFollowers = tweet.User.FollowersCount;
                                //modelTwitterFeeds.UserPageCoverImageURL = tweet.User.ProfileBackgroundImageUrl;
                                modelTwitterFeeds.UserPageLogoImage = tweet.User.ProfileImageUrl;
                                //JObject o = new JObject(new JProperty("Name", "John Smith"),new JProperty("BirthDate", new DateTime(1983, 3, 20)));
                                JObject obj = (JObject)JsonConvert.DeserializeObject(tweet.RawSource);
                                if (obj != null)
                                {
                                    if (obj["user"]["profile_banner_url"] != null)
                                    {
                                        modelTwitterFeeds.UserPageCoverImageURL = (string)obj["user"]["profile_banner_url"];
                                    }
                                }
                            }
                            tweetCountFirstRun++;
                        }
                        var st1 = tweet.Text.ToSafeString(); //string
                        TweetText = tweet.Text;
                        //modelTwitterFeedsDetail.TwitterPageId;
                        modelTwitterFeedsDetail.FeedPostedtId = tweet.Id;
                        modelTwitterFeedsDetail.FeedText = String.Empty;
                        modelTwitterFeedsDetail.FeedLanguage = tweet.Language;
                        modelTwitterFeedsDetail.FeedPostDate = tweet.CreatedDate;

                        modelTwitterFeedsDetail.FeedTextDetail = "";

                        var st2 = tweet.Source; //string
                        var st3 = tweet.TextAsHtml; //string
                        var st4 = tweet.TextDecoded; //string
                        var st5 = tweet.RetweetedStatus; //TwitterStatus
                        var st6 = tweet.RetweetCount; //int
                        var st7 = tweet.RawSource; //string
                        var st8 = tweet.Place; //TwitterPlace
                        var st9 = tweet.Location; //TwitterGeoLocation
                        var st10 = tweet.Language; //string
                        var st11 = tweet.IsTruncated; //bool
                        var st12 = tweet.IsRetweeted; //bool
                        var st13 = tweet.IsPossiblySensitive; //bool is nullable
                        var st14 = tweet.IsFavorited; //bool
                        var st15 = tweet.InReplyToUserId; //long is nullable
                        var st16 = tweet.InReplyToStatusId; //long is nullable
                        var st17 = tweet.InReplyToScreenName; //string
                        var st18 = tweet.IdStr; //string
                        var st19 = tweet.Id; //long
                        var st20 = tweet.FavoriteCount; //int
                        var st21 = tweet.ExtendedEntities; //TwitterExtendedEntities
                        var st22 = tweet.Entities; //TwitterEntities
                        var twitterEntities = new TwitterEntities();
                        twitterEntities = tweet.Entities;
                        IList<TwitterUrl> twitterUrl = twitterEntities.Urls; //List<TwitterUrl>
                        if (twitterUrl != null && twitterUrl.Count > 0)
                        {
                            foreach (var url in twitterUrl)
                            {
                                var url1 = url.DisplayUrl; //string
                                var url2 = url.EndIndex; //int
                                var url3 = url.EntityType; //TwitterEntityType
                                var entityType = url3;
                                //4 Entity Types are Defined
                                //TwitterEntityType.HashTag; //0
                                //TwitterEntityType.Mention; //1
                                //TwitterEntityType.Url; //2
                                //TwitterEntityType.Media; //3

                                var url4 = url.ExpandedValue; //string
                                var url5 = url.Indices; //IList<int>
                                var url6 = url.StartIndex; //int
                                var url7 = url.Value; //string

                                //MODEL DATA //Tweet Detail Page URL
                                if (!String.IsNullOrEmpty(url.ExpandedValue))
                                {
                                    modelTwitterFeedsDetail.FeedDetailPageURL = url.ExpandedValue.Trim();
                                }
                                else if (!String.IsNullOrEmpty(url.Value))
                                {
                                    if (String.IsNullOrEmpty(modelTwitterFeedsDetail.FeedDetailPageURL))
                                    {
                                        modelTwitterFeedsDetail.FeedDetailPageURL = url.Value.Trim();
                                    }
                                }
                                TweetText = TweetText.Trim().Replace(url.Value, String.Empty).Trim();
                            }
                        }
                        IList<TwitterMedia> twitterMedia = twitterEntities.Media; //List<TwitterMedia>
                        foreach (var media in twitterMedia)
                        {
                            var media1 = media.DisplayUrl; //string
                            var media2 = media.EndIndex; //int
                            var media3 = media.EntityType; //TwitterEntity
                            var media4 = media.ExpandedUrl; //string
                            var media5 = media.Id; //long
                            var media6 = media.IdAsString; //string
                            var media7 = media.Indices; //IList<int>
                            var media8 = media.MediaType; //TwitterMediaType
                            var twitterMediaType = media8;

                            /*Three Types of MediaType
                            TwitterMediaType.Photo; //0
                            TwitterMediaType.Video; //1
                            TwitterMediaType.AnimatedGif; //2
                            */

                            var media9 = media.MediaUrl; //string
                            var media10 = media.MediaUrlHttps; //string
                            //var FeedImageURLhttp  = media.MediaUrl; for Http Image
                            //var FeedImageURLhttps  =  media.MediaUrlHttps; for Https Image
                            var media11 = media.Sizes; //TwitterMediaSizes
                            var twitterMediaSizes = media11;
                            //media11.Large
                            //media11.Medium
                            //media11.Small
                            //media11.Thumb
                            var media12 = media.StartIndex; //int
                            var media13 = media.Url; //string
                            //MODEL DATA
                            string tweetMultimediaURL = !String.IsNullOrEmpty(media.Url.Trim()) ? media.Url.Trim() : String.Empty;
                            TweetText = TweetText.Trim().Replace(media.Url, String.Empty).Trim();
                            if (!String.IsNullOrEmpty(tweetMultimediaURL))
                            {
                                modelModelFeedMultimedia.MultiMediaURL = tweetMultimediaURL;
                                modelModelFeedMultimedia.MultiMediaType = Convert.ToInt32(media.MediaType);
                                //Add model object to list
                                //lstmodelModelFeedImage.Add(modelModelFeedMultimedia);
                                modelTwitterFeedsDetail.FeedMultimediaList.Add(modelModelFeedMultimedia);
                            }

                        }
                        IList<TwitterHashTag> twitterHashTag = twitterEntities.HashTags; //List<TwitterHashTag>
                        foreach (var hashTag in twitterHashTag)
                        {
                            var ht1 = hashTag.EndIndex;
                            var ht2 = hashTag.EntityType;
                            var ht3 = hashTag.Indices;
                            var ht4 = hashTag.StartIndex;
                            var ht5 = hashTag.Text;
                            // hashTagText for refine Tweet Text
                            TweetText = TweetText.Trim().Replace("#" + hashTag.Text, string.Empty).Trim();
                        }
                        TweetText = TweetText.Replace("\n", String.Empty).Trim();
                        var st23 = tweet.CreatedDate; //DateTime
                        var st24 = tweet.Author; //ITweeter

                        var ProfileImageURL = st24.ProfileImageUrl;
                        var ss = st24.RawSource;
                        var screenName = st24.ScreenName;
                        modelTwitterFeedsDetail.FeedText = TweetText;

                        //string DetailPageSelector = "div.story-body__inner p:not(:first-of-type)";
                        if (!String.IsNullOrEmpty(modelTwitterFeedsDetail.FeedDetailPageURL))
                        {
                            Uri DetailPageURI = new Uri(modelTwitterFeedsDetail.FeedDetailPageURL);
                            string DetailPageTextSelector = String.Empty;
                            string DetailPageImageSelector = String.Empty;
                            ModelFeedsDetailScrappingHelper rowDetailHelper = listScrappingHelper.Where(m => m.HostName.Contains(DetailPageURI.Host.Trim().ToLower())).FirstOrDefault();
                            if (rowDetailHelper != null)
                            {
                                DetailPageTextSelector = rowDetailHelper.DetailPagePath;
                                DetailPageImageSelector = rowDetailHelper.DetailPageImagePath;
                            }
                            /*
                            if (DetailPageURI.Host.Trim().ToLower().Contains("cnn"))
                            {
                                //bbc.com Text Details= //div[@class='story-body'] //p[position()>=1]
                                //bbc.com img = //div[@class='story-body__inner'] //img[@class='js-image-replace']
                                //cnn = ////p[@dir='RTL']js-image-replace
                                DetailPageTextSelector = "//div[contains(@class, 'article-left')] //p";
                                DetailPageImageSelector = "//div[contains(@class, 'gallery-big-images')] //img";
                            }
                            else if (DetailPageURI.Host.Trim().ToLower().Contains("soundcloud"))
                            {
                                DetailPageTextSelector = "//div[contains(@class, 'sc-type-small')] //p";
                                DetailPageImageSelector = "//div[@class='article-left'] //p";
                            }
                             */
                            HtmlDocument requestedDoc = ScrapeDetailfromPage(DetailPageURI.AbsoluteUri);
                            if (requestedDoc != null)
                            {
                                //if Details Page Selector Text not found then not send request
                                if (!String.IsNullOrEmpty(DetailPageTextSelector))
                                {
                                    string detailTextFromURL = ScrapeTextfromDetailPage(requestedDoc, DetailPageTextSelector);
                                    modelTwitterFeedsDetail.FeedTextDetail = detailTextFromURL;
                                }
                                //if default twitter post not have any multimedia type then
                                if (modelTwitterFeedsDetail.FeedMultimediaList.Count <= 0)
                                {
                                    //if Details Page Selector Image not found then not send request
                                    if (!String.IsNullOrEmpty(DetailPageImageSelector))
                                    {
                                        modelTwitterFeedsDetail.FeedMultimediaList = ScrapeImageDetailfromPage(requestedDoc, DetailPageImageSelector);
                                        //if details page not found on detected tag & Tweet having Image and Embeded Video on Details page
                                        // Then we make a new request to scrape twitter create image from sperate static created request
                                        if (modelTwitterFeedsDetail.FeedMultimediaList.Count <= 0)
                                        {
                                            //Sample URI for Video Image get://https://twitter.com/cnnarabic/status/678548429530906624
                                            //https://twitter.com/i/cards/tfw/v1/678585688783257600?cardname=summary_large_image
                                            //https://twitter.com/i/cards/tfw/v1/678585688783257600
                                            //string StaticImageURI = "https://twitter.com/" + modelTwitterFeeds.UserScreenName.Trim() + "//status/" + modelTwitterFeedsDetail.FeedPostId;
                                            string StaticImageURI = "https://twitter.com/i/cards/tfw/v1/" + tweet.Id;
                                            HtmlDocument VideoImageRequestedDoc = ScrapeDetailfromPage(StaticImageURI);
                                            string staticImageSelector = "//div[contains(@class, 'SummaryCard-image')] //img";
                                            modelTwitterFeedsDetail.FeedMultimediaList = ScrapeImageDetailfromPage(VideoImageRequestedDoc, staticImageSelector);
                                        }
                                    }
                                }
                            }
                        }
                        else
                        {
                            modelTwitterFeedsDetail.FeedTextDetail = String.Empty;
                        }
                    }

                    lstModelTwitterFeedsDetail.Add(modelTwitterFeedsDetail);
                }
                modelTwitterFeeds.TwitterFeedDetails = lstModelTwitterFeedsDetail;
                objBLLFeed.InsertTwitterScrappedData(modelTwitterFeeds);
                //StartSafeScrappedData(modelTwitterFeeds, modelFeed);
            }

            catch (Exception ex)
            {

                throw ex;
            }
        }