public void SendVideosToDownload(VideosToDownload videoToDownload) { string jsonMessage = JsonConvert.SerializeObject(videoToDownload, JsonSerializerSettings); CloudQueueMessage message = new CloudQueueMessage(jsonMessage); videosToDownloadQueue.AddMessage(message); }
public async Task ProcessPosts(IEnumerable <Post> posts, TraceWriter log, string likerBlogname = null) { foreach (Post post in posts) { SanitizePostPhotos(post); // sometimes post.Photos has Alt_sizes with length 0, needs to be sanitized PostEntity postEntityInTable = postsTableAdapter.GetPost(post.Blog_name, post.Id.ToString()); PostEntity postEntityFromTumblr = new PostEntity(post); if (!postsTableAdapter.InsertPost(postEntityFromTumblr)) { break; } if (likerBlogname != null && post.Liked_Timestamp.HasValue) { likeIndexTableAdapter.InsertLikeIndex(likerBlogname, post.Liked_Timestamp.ToString(), post.Blog_name, post.Id.ToString(), post.Reblog_key); } log.Info("Post " + post.Blog_name + "/" + post.Id + " inserted to table"); PhotosToDownload photosToDownloadMessage = null; if (postEntityFromTumblr.PhotosJson != null) { if (postEntityInTable == null || postEntityInTable.PicsDownloadLevel == null || postEntityInTable.PicsDownloadLevel < Constants.MaxPicsDownloadLevel) { photosToDownloadMessage = new PhotosToDownload(post) { Photos = post.Photos }; } else { log.Info("Photos already downloaded"); } } List <VideoUrls> videoUrlsList = new List <VideoUrls>(); if (post.Content != null && post.Content.Length > 0) { List <Photo> photos = new List <Photo>(post.Content.Length); foreach (Content content in post.Content) { if (content.Type == "image") { Photo photo = ConvertContentToPhoto(content); photos.Add(photo); } else if (content.Type == "video" && content.Url != null && content.Poster != null) { VideoUrls videoUrls = new VideoUrls { VideoUrl = content.Url, VideoThumbUrl = content.Poster.OrderBy(x => x.Width).LastOrDefault()?.Url }; videoUrlsList.Add(videoUrls); } } if (photos.Count > 0) { UpdatePhotosToDownloadMessage(ref photosToDownloadMessage, post, photos); } } if (postEntityInTable == null || postEntityInTable.VideosDownloadLevel == null || postEntityInTable.VideosDownloadLevel < Constants.MaxVideosDownloadLevel) { if (!string.IsNullOrEmpty(post.Video_url)) { VideoUrls videoUrls = new VideoUrls { VideoUrl = post.Video_url, VideoThumbUrl = post.Thumbnail_url }; videoUrlsList.Add(videoUrls); } if (post.Player != null && post.Player.Length > 0 && post.Video_type.Equals("instagram", StringComparison.OrdinalIgnoreCase)) { Player largestPlayer = post.Player.OrderBy(x => x.Width).Last(); HtmlDocument playerHtmlDoc = new HtmlDocument(); playerHtmlDoc.LoadHtml(largestPlayer.Embed_code); HtmlNode blockquoteNode = playerHtmlDoc.DocumentNode.Descendants("blockquote") .FirstOrDefault(x => !string.IsNullOrEmpty(x.Attributes["data-instgrm-permalink"].Value)); if (blockquoteNode != null) { string url = blockquoteNode.Attributes["data-instgrm-permalink"].Value; VideoUrls videoUrls = await GetInstagramVideo(url); if (videoUrls != null) { videoUrlsList.Add(videoUrls); } } } } if (!string.IsNullOrEmpty(post.Body)) { HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(post.Body); if (postEntityInTable == null || postEntityInTable.PicsDownloadLevel == null || postEntityInTable.PicsDownloadLevel < Constants.MaxPicsDownloadLevel) { List <Photo> photos = ExctractPhotosFromHtml(htmlDoc); if (photos.Count > 0) { photosToDownloadMessage = UpdatePhotosToDownloadMessage(ref photosToDownloadMessage, post, photos); } } if (postEntityInTable == null || postEntityInTable.VideosDownloadLevel == null || postEntityInTable.VideosDownloadLevel < Constants.MaxVideosDownloadLevel) { List <VideoUrls> videoUrlsListFromBody = GetVideoUrls(htmlDoc, log); videoUrlsList.AddRange(videoUrlsListFromBody); } } if (photosToDownloadMessage != null) { queueAdapter.SendPhotosToDownload(photosToDownloadMessage); log.Info("PhotosToDownload message published"); } if (videoUrlsList.Count > 0) { VideosToDownload videosToDownload = new VideosToDownload(post) { VideoUrls = videoUrlsList.ToArray() }; queueAdapter.SendVideosToDownload(videosToDownload); log.Info("VideosToDownload message published"); } } }
public static async Task Run([QueueTrigger(Constants.VideosToDownloadQueueName, Connection = "AzureWebJobsStorage")] string myQueueItem, TraceWriter log) { Startup.Init(); log.Info($"C# Queue trigger function processed: {myQueueItem}"); VideosToDownload videosToDownload = JsonConvert.DeserializeObject <VideosToDownload>(myQueueItem); BlobAdapter blobAdapter = new BlobAdapter(); blobAdapter.Init(); VideoIndexTableAdapter videoIndexTableAdapter = new VideoIndexTableAdapter(); videoIndexTableAdapter.Init(); PostsTableAdapter postsTableAdapter = new PostsTableAdapter(); postsTableAdapter.Init(log); ReversePostsTableAdapter reversePostsTableAdapter = new ReversePostsTableAdapter(); reversePostsTableAdapter.Init(log); string sourceBlog = string.IsNullOrEmpty(videosToDownload.SourceBlog) ? videosToDownload.IndexInfo.BlogName : videosToDownload.SourceBlog; sourceBlog = SanityHelper.SanitizeSourceBlog(sourceBlog); using (HttpClient httpClient = new HttpClient()) { httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("video/*")); httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("image/*")); List <Video> videos = new List <Video>(); string blogname = videosToDownload.IndexInfo.BlogName; string id = videosToDownload.IndexInfo.PostId; DateTime date = videosToDownload.IndexInfo.PostDate; foreach (VideoUrls videoUrls in videosToDownload.VideoUrls) { try { Video blobVideo = await blobAdapter.HandleVideo(videoUrls, videosToDownload.IndexInfo.BlogName, log); videos.Add(blobVideo); videoIndexTableAdapter.InsertVideoIndex(blogname, id, date, blobVideo, videosToDownload.VideoType, blobVideo.Bytes, videosToDownload.Duration); log.Info("Video successfully downloaded: " + videoUrls.VideoUrl); } catch (HttpRequestException ex) { log.Warning("HTTP Error while downloading video " + videoUrls.VideoUrl + " - " + ex.Message); postsTableAdapter.MarkWithVideoDownloadError(blogname, id, ex.Message); } catch (Exception ex) { log.Error("Error while downloading video ", ex); throw; } } if (videos.Count > 0) { postsTableAdapter.MarkVideosAsDownloaded(videosToDownload.IndexInfo.BlogName, videosToDownload.IndexInfo.PostId, videos.ToArray()); ReversePostEntity reversePost = new ReversePostEntity(blogname, id, videosToDownload.PostType, date, videosToDownload.Body, videosToDownload.Title) { Videos = JsonConvert.SerializeObject(videos) }; reversePostsTableAdapter.InsertPost(reversePost); } } }