private static List<ModelFeedMultimedia> ScrapeImageDetailfromPage(HtmlDocument doc, string selector) { List<ModelFeedMultimedia> lstModelFeedImage = new List<ModelFeedMultimedia>(); ModelFeedMultimedia objModelFeedImage; HtmlNodeCollection selectNodesList = doc.DocumentNode.SelectNodes(selector); if (selectNodesList != null && selectNodesList.Count > 0) { foreach (HtmlNode node in selectNodesList) { objModelFeedImage = new ModelFeedMultimedia(); if (node != null && !String.IsNullOrEmpty(node.Attributes["src"].Value)) { string imageURLSrcAttrib = node.Attributes["src"].Value; objModelFeedImage.MultiMediaURL = imageURLSrcAttrib; objModelFeedImage.MultiMediaType = 1; lstModelFeedImage.Add(objModelFeedImage); } } } return lstModelFeedImage; }
public static void GetTwitterTweetsWithDetails(TwitterService service, double feedId, string LastMaxTweetPostedId, ModelFeed modelFeedScrapperPath, List<ModelFeedsDetailScrappingHelper> listScrappingHelper) { ModelTwitterFeeds modelTwitterFeeds = new ModelTwitterFeeds(); ModelTwitterFeedsDetails modelTwitterFeedsDetail = new ModelTwitterFeedsDetails(); ModelFeedMultimedia modelModelFeedMultimedia = new ModelFeedMultimedia(); List<ModelFeedMultimedia> lstmodelModelFeedImage = new List<ModelFeedMultimedia>(); List<ModelTwitterFeedsDetails> lstModelTwitterFeedsDetail = new List<ModelTwitterFeedsDetails>(); BLLFeed objBLLFeed = new BLLFeed(); string DetailPageURL = String.Empty; try { //TwitterAccount user = new TwitterAccount. //TwitterUser twitterUser = //>>GET OTHER USER TIMELINE//BeginListTweetsOnHomeTimeline var User_OptionInit = new ListTweetsOnUserTimelineOptions { ScreenName = modelFeedScrapperPath.FeedChannelName, Count = 20, ExcludeReplies = true }; var User_Tweets = service.ListTweetsOnUserTimeline(User_OptionInit); /* var profilebanner = service.GetUserProfile(new GetUserProfileOptions() { IncludeEntities = true, SkipStatus = true }); if (profilebanner != null) { //profilebanner.ContributorsEnabled //profilebanner.CreatedDate modelTwitterFeeds.UserPageDesc = profilebanner.Description; //profilebanner.FavouritesCount; modelTwitterFeeds.UserPageFollowers = profilebanner.FollowersCount; //profilebanner.FollowRequestSent; //profilebanner.FriendsCount; modelTwitterFeeds.UserPageId = profilebanner.Id; //profilebanner.IsDefaultProfile; //profilebanner.IsGeoEnabled; //profilebanner.IsProfileBackgroundTiled; //profilebanner.IsProtected; //profilebanner.IsTranslator; //profilebanner.IsVerified; modelTwitterFeeds.UserPageLanguage = profilebanner.Language; //profilebanner.ListedCount; //profilebanner.Location; modelTwitterFeeds.UserPageTitle = profilebanner.Name; //profilebanner.ProfileBackgroundColor; //profilebanner.ProfileBackgroundImageUrl; //profilebanner.ProfileBackgroundImageUrlHttps; modelTwitterFeeds.UserPageLogoImage = profilebanner.ProfileImageUrl; //profilebanner.ProfileImageUrlHttps; //profilebanner.ProfileLinkColor; //profilebanner.ProfileSidebarBorderColor; //profilebanner.ProfileSidebarFillColor; //profilebanner.ProfileTextColor; //profilebanner.RawSource; modelTwitterFeeds.UserScreenName = profilebanner.ScreenName; //profilebanner.Status; //profilebanner.StatusesCount; //profilebanner.TimeZone; //JObject o = new JObject(new JProperty("Name", "John Smith"),new JProperty("BirthDate", new DateTime(1983, 3, 20))); JObject obj = (JObject)JsonConvert.DeserializeObject(profilebanner.RawSource); if (obj != null) { if (obj["user"]["profile_banner_url"] != null) { modelTwitterFeeds.UserPageCoverImageURL = (string)obj["user"]["profile_banner_url"]; } } } */ string TweetText = String.Empty; int tweetCountFirstRun = 0; foreach (var tweet in User_Tweets) { if (!String.IsNullOrEmpty(LastMaxTweetPostedId)) { if (LastMaxTweetPostedId.ToLower().Trim() == tweet.IdStr.ToLower().Trim()) { break; } } modelTwitterFeedsDetail = new ModelTwitterFeedsDetails(); modelModelFeedMultimedia = new ModelFeedMultimedia(); var userDetail = tweet.User; if (tweet != null) { if (tweetCountFirstRun == 0) { if (userDetail != null) { modelTwitterFeeds.FeedId = feedId; modelTwitterFeeds.UserPageId = tweet.User.Id; modelTwitterFeeds.UserPageTitle = tweet.User.Name; modelTwitterFeeds.UserScreenName = tweet.User.ScreenName; modelTwitterFeeds.UserPageDesc = tweet.User.Description; modelTwitterFeeds.UserPageLanguage = tweet.User.Language; modelTwitterFeeds.UserPageFollowers = tweet.User.FollowersCount; //modelTwitterFeeds.UserPageCoverImageURL = tweet.User.ProfileBackgroundImageUrl; modelTwitterFeeds.UserPageLogoImage = tweet.User.ProfileImageUrl; //JObject o = new JObject(new JProperty("Name", "John Smith"),new JProperty("BirthDate", new DateTime(1983, 3, 20))); JObject obj = (JObject)JsonConvert.DeserializeObject(tweet.RawSource); if (obj != null) { if (obj["user"]["profile_banner_url"] != null) { modelTwitterFeeds.UserPageCoverImageURL = (string)obj["user"]["profile_banner_url"]; } } } tweetCountFirstRun++; } var st1 = tweet.Text.ToSafeString(); //string TweetText = tweet.Text; //modelTwitterFeedsDetail.TwitterPageId; modelTwitterFeedsDetail.FeedPostedtId = tweet.Id; modelTwitterFeedsDetail.FeedText = String.Empty; modelTwitterFeedsDetail.FeedLanguage = tweet.Language; modelTwitterFeedsDetail.FeedPostDate = tweet.CreatedDate; modelTwitterFeedsDetail.FeedTextDetail = ""; var st2 = tweet.Source; //string var st3 = tweet.TextAsHtml; //string var st4 = tweet.TextDecoded; //string var st5 = tweet.RetweetedStatus; //TwitterStatus var st6 = tweet.RetweetCount; //int var st7 = tweet.RawSource; //string var st8 = tweet.Place; //TwitterPlace var st9 = tweet.Location; //TwitterGeoLocation var st10 = tweet.Language; //string var st11 = tweet.IsTruncated; //bool var st12 = tweet.IsRetweeted; //bool var st13 = tweet.IsPossiblySensitive; //bool is nullable var st14 = tweet.IsFavorited; //bool var st15 = tweet.InReplyToUserId; //long is nullable var st16 = tweet.InReplyToStatusId; //long is nullable var st17 = tweet.InReplyToScreenName; //string var st18 = tweet.IdStr; //string var st19 = tweet.Id; //long var st20 = tweet.FavoriteCount; //int var st21 = tweet.ExtendedEntities; //TwitterExtendedEntities var st22 = tweet.Entities; //TwitterEntities var twitterEntities = new TwitterEntities(); twitterEntities = tweet.Entities; IList<TwitterUrl> twitterUrl = twitterEntities.Urls; //List<TwitterUrl> if (twitterUrl != null && twitterUrl.Count > 0) { foreach (var url in twitterUrl) { var url1 = url.DisplayUrl; //string var url2 = url.EndIndex; //int var url3 = url.EntityType; //TwitterEntityType var entityType = url3; //4 Entity Types are Defined //TwitterEntityType.HashTag; //0 //TwitterEntityType.Mention; //1 //TwitterEntityType.Url; //2 //TwitterEntityType.Media; //3 var url4 = url.ExpandedValue; //string var url5 = url.Indices; //IList<int> var url6 = url.StartIndex; //int var url7 = url.Value; //string //MODEL DATA //Tweet Detail Page URL if (!String.IsNullOrEmpty(url.ExpandedValue)) { modelTwitterFeedsDetail.FeedDetailPageURL = url.ExpandedValue.Trim(); } else if (!String.IsNullOrEmpty(url.Value)) { if (String.IsNullOrEmpty(modelTwitterFeedsDetail.FeedDetailPageURL)) { modelTwitterFeedsDetail.FeedDetailPageURL = url.Value.Trim(); } } TweetText = TweetText.Trim().Replace(url.Value, String.Empty).Trim(); } } IList<TwitterMedia> twitterMedia = twitterEntities.Media; //List<TwitterMedia> foreach (var media in twitterMedia) { var media1 = media.DisplayUrl; //string var media2 = media.EndIndex; //int var media3 = media.EntityType; //TwitterEntity var media4 = media.ExpandedUrl; //string var media5 = media.Id; //long var media6 = media.IdAsString; //string var media7 = media.Indices; //IList<int> var media8 = media.MediaType; //TwitterMediaType var twitterMediaType = media8; /*Three Types of MediaType TwitterMediaType.Photo; //0 TwitterMediaType.Video; //1 TwitterMediaType.AnimatedGif; //2 */ var media9 = media.MediaUrl; //string var media10 = media.MediaUrlHttps; //string //var FeedImageURLhttp = media.MediaUrl; for Http Image //var FeedImageURLhttps = media.MediaUrlHttps; for Https Image var media11 = media.Sizes; //TwitterMediaSizes var twitterMediaSizes = media11; //media11.Large //media11.Medium //media11.Small //media11.Thumb var media12 = media.StartIndex; //int var media13 = media.Url; //string //MODEL DATA string tweetMultimediaURL = !String.IsNullOrEmpty(media.Url.Trim()) ? media.Url.Trim() : String.Empty; TweetText = TweetText.Trim().Replace(media.Url, String.Empty).Trim(); if (!String.IsNullOrEmpty(tweetMultimediaURL)) { modelModelFeedMultimedia.MultiMediaURL = tweetMultimediaURL; modelModelFeedMultimedia.MultiMediaType = Convert.ToInt32(media.MediaType); //Add model object to list //lstmodelModelFeedImage.Add(modelModelFeedMultimedia); modelTwitterFeedsDetail.FeedMultimediaList.Add(modelModelFeedMultimedia); } } IList<TwitterHashTag> twitterHashTag = twitterEntities.HashTags; //List<TwitterHashTag> foreach (var hashTag in twitterHashTag) { var ht1 = hashTag.EndIndex; var ht2 = hashTag.EntityType; var ht3 = hashTag.Indices; var ht4 = hashTag.StartIndex; var ht5 = hashTag.Text; // hashTagText for refine Tweet Text TweetText = TweetText.Trim().Replace("#" + hashTag.Text, string.Empty).Trim(); } TweetText = TweetText.Replace("\n", String.Empty).Trim(); var st23 = tweet.CreatedDate; //DateTime var st24 = tweet.Author; //ITweeter var ProfileImageURL = st24.ProfileImageUrl; var ss = st24.RawSource; var screenName = st24.ScreenName; modelTwitterFeedsDetail.FeedText = TweetText; //string DetailPageSelector = "div.story-body__inner p:not(:first-of-type)"; if (!String.IsNullOrEmpty(modelTwitterFeedsDetail.FeedDetailPageURL)) { Uri DetailPageURI = new Uri(modelTwitterFeedsDetail.FeedDetailPageURL); string DetailPageTextSelector = String.Empty; string DetailPageImageSelector = String.Empty; ModelFeedsDetailScrappingHelper rowDetailHelper = listScrappingHelper.Where(m => m.HostName.Contains(DetailPageURI.Host.Trim().ToLower())).FirstOrDefault(); if (rowDetailHelper != null) { DetailPageTextSelector = rowDetailHelper.DetailPagePath; DetailPageImageSelector = rowDetailHelper.DetailPageImagePath; } /* if (DetailPageURI.Host.Trim().ToLower().Contains("cnn")) { //bbc.com Text Details= //div[@class='story-body'] //p[position()>=1] //bbc.com img = //div[@class='story-body__inner'] //img[@class='js-image-replace'] //cnn = ////p[@dir='RTL']js-image-replace DetailPageTextSelector = "//div[contains(@class, 'article-left')] //p"; DetailPageImageSelector = "//div[contains(@class, 'gallery-big-images')] //img"; } else if (DetailPageURI.Host.Trim().ToLower().Contains("soundcloud")) { DetailPageTextSelector = "//div[contains(@class, 'sc-type-small')] //p"; DetailPageImageSelector = "//div[@class='article-left'] //p"; } */ HtmlDocument requestedDoc = ScrapeDetailfromPage(DetailPageURI.AbsoluteUri); if (requestedDoc != null) { //if Details Page Selector Text not found then not send request if (!String.IsNullOrEmpty(DetailPageTextSelector)) { string detailTextFromURL = ScrapeTextfromDetailPage(requestedDoc, DetailPageTextSelector); modelTwitterFeedsDetail.FeedTextDetail = detailTextFromURL; } //if default twitter post not have any multimedia type then if (modelTwitterFeedsDetail.FeedMultimediaList.Count <= 0) { //if Details Page Selector Image not found then not send request if (!String.IsNullOrEmpty(DetailPageImageSelector)) { modelTwitterFeedsDetail.FeedMultimediaList = ScrapeImageDetailfromPage(requestedDoc, DetailPageImageSelector); //if details page not found on detected tag & Tweet having Image and Embeded Video on Details page // Then we make a new request to scrape twitter create image from sperate static created request if (modelTwitterFeedsDetail.FeedMultimediaList.Count <= 0) { //Sample URI for Video Image get://https://twitter.com/cnnarabic/status/678548429530906624 //https://twitter.com/i/cards/tfw/v1/678585688783257600?cardname=summary_large_image //https://twitter.com/i/cards/tfw/v1/678585688783257600 //string StaticImageURI = "https://twitter.com/" + modelTwitterFeeds.UserScreenName.Trim() + "//status/" + modelTwitterFeedsDetail.FeedPostId; string StaticImageURI = "https://twitter.com/i/cards/tfw/v1/" + tweet.Id; HtmlDocument VideoImageRequestedDoc = ScrapeDetailfromPage(StaticImageURI); string staticImageSelector = "//div[contains(@class, 'SummaryCard-image')] //img"; modelTwitterFeedsDetail.FeedMultimediaList = ScrapeImageDetailfromPage(VideoImageRequestedDoc, staticImageSelector); } } } } } else { modelTwitterFeedsDetail.FeedTextDetail = String.Empty; } } lstModelTwitterFeedsDetail.Add(modelTwitterFeedsDetail); } modelTwitterFeeds.TwitterFeedDetails = lstModelTwitterFeedsDetail; objBLLFeed.InsertTwitterScrappedData(modelTwitterFeeds); //StartSafeScrappedData(modelTwitterFeeds, modelFeed); } catch (Exception ex) { throw ex; } }