public static HttpResponseMessage Run([HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = "analyze-from-likes/{blogname}")] HttpRequestMessage req, string blogname, TraceWriter log) { Startup.Init(); LikeIndexTableAdapter likeIndexTableAdapter = new LikeIndexTableAdapter(); likeIndexTableAdapter.Init(); PostsTableAdapter postsTableAdapter = new PostsTableAdapter(); postsTableAdapter.Init(log); ImageAnalysisTableAdapter imageAnalysisTableAdapter = new ImageAnalysisTableAdapter(); imageAnalysisTableAdapter.Init(); PhotoToAnalyzeQueueAdapter photoToAnalyzeQueueAdapter = new PhotoToAnalyzeQueueAdapter(); photoToAnalyzeQueueAdapter.Init(); string blobBaseUrl = ConfigurationManager.AppSettings["BlobBaseUrl"]; string afterParam = req.GetQueryNameValuePairs().FirstOrDefault(q => q.Key.Equals("after", StringComparison.OrdinalIgnoreCase)).Value; List <LikeIndexEntity> likes; if (!string.IsNullOrEmpty(afterParam) && long.TryParse(afterParam, out long afterTimestamp)) { log.Info($"Getting likes newer than timestamp {afterTimestamp}"); likes = likeIndexTableAdapter.GetNewerThan(blogname, afterTimestamp); } else { likes = likeIndexTableAdapter.GetAll(blogname); } log.Info($"Loaded {likes.Count} posts"); int messageCount = 0; foreach (LikeIndexEntity like in likes) { if (like.LikedBlogName == null || like.LikedPostId == null) { continue; } PostEntity postEntity = postsTableAdapter.GetPost(like.LikedBlogName, like.LikedPostId); if (postEntity == null) { log.Warning($"Post {like.LikedBlogName}/{like.LikedPostId} not found, skipping"); continue; } if (string.IsNullOrEmpty(postEntity.PhotoBlobUrls)) { continue; } List <Photo> sitePhotos = JsonConvert.DeserializeObject <List <Photo> >(postEntity.PhotoBlobUrls); foreach (Photo photo in sitePhotos) { List <PhotoSize> sortedSizes = photo.Sizes.OrderByDescending(x => x.Nominal).ToList(); PhotoSize original = sortedSizes.FirstOrDefault(); if (original == null) { continue; } string url = blobBaseUrl + "/" + original.Container + "/" + photo.Name + "_" + original.Nominal + "." + photo.Extension; if (imageAnalysisTableAdapter.GetImageAnalysis(url) != null) { log.Info($"Image {url} already analyzed"); continue; } PhotoToAnalyze message = new PhotoToAnalyze { Blog = blogname, PostDate = postEntity.Date, Url = url }; photoToAnalyzeQueueAdapter.Send(message); log.Info($"Published PhotoToAnalyze message with URL {url}"); messageCount++; } } return(req.CreateResponse(HttpStatusCode.OK, $"Processed {likes.Count} posts, sent {messageCount} messages")); }
public static HttpResponseMessage Run([HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = "analyzerandomphotos")] HttpRequestMessage req, TraceWriter log) { Startup.Init(); PostsTableAdapter postsTableAdapter = new PostsTableAdapter(); postsTableAdapter.Init(log); ImageAnalysisTableAdapter imageAnalysisTableAdapter = new ImageAnalysisTableAdapter(); imageAnalysisTableAdapter.Init(); PhotoToAnalyzeQueueAdapter photoToAnalyzeQueueAdapter = new PhotoToAnalyzeQueueAdapter(); photoToAnalyzeQueueAdapter.Init(); string blobBaseUrl = ConfigurationManager.AppSettings["BlobBaseUrl"]; int blogsLimit = 50; int photosInBlogLimit = 10; BlogInfoTableAdapter blogInfoTableAdapter = new BlogInfoTableAdapter(); blogInfoTableAdapter.Init(); List <BlogStats> blogStats = blogInfoTableAdapter.GetBlogStats(); log.Info($"Got {blogStats.Count} blogs to index"); Random random = new Random(); blogStats.Shuffle(random); blogStats = blogStats.Take(blogsLimit).ToList(); int totalCount = 0; foreach (string blogname in blogStats.Select(x => x.RowKey)) { int analyzedInBlogCount = 0; List <PostEntity> noteCounts = postsTableAdapter.GetPostNoteCounts(blogname).OrderByDescending(x => x.NoteCount).ToList(); log.Info($"Got note counts for {noteCounts.Count} posts in blog {blogname}"); foreach (PostEntity noteCountPost in noteCounts) { PostEntity postEntity = postsTableAdapter.GetPost(blogname, noteCountPost.RowKey); if (postEntity == null) { log.Warning($"Post {blogname}/{noteCountPost.RowKey} not found, skipping"); continue; } if (string.IsNullOrEmpty(postEntity.PhotoBlobUrls)) { continue; } List <Photo> sitePhotos = JsonConvert.DeserializeObject <List <Photo> >(postEntity.PhotoBlobUrls); foreach (Photo photo in sitePhotos) { List <PhotoSize> sortedSizes = photo.Sizes.OrderByDescending(x => x.Nominal).ToList(); PhotoSize original = sortedSizes.FirstOrDefault(); if (original == null) { continue; } string url = blobBaseUrl + "/" + original.Container + "/" + photo.Name + "_" + original.Nominal + "." + photo.Extension; if (imageAnalysisTableAdapter.GetImageAnalysis(url) != null) { log.Info($"Image {url} already analyzed"); continue; } PhotoToAnalyze message = new PhotoToAnalyze { Blog = blogname, PostDate = postEntity.Date, Url = url }; photoToAnalyzeQueueAdapter.Send(message); log.Info($"Published PhotoToAnalyze message with URL {url}"); analyzedInBlogCount++; totalCount++; } if (analyzedInBlogCount >= photosInBlogLimit) { break; } } } return(req.CreateResponse(HttpStatusCode.OK, $"Will analyze {totalCount} new photos")); }
public async Task ProcessPosts(IEnumerable <Post> posts, TraceWriter log, string likerBlogname = null) { foreach (Post post in posts) { SanitizePostPhotos(post); // sometimes post.Photos has Alt_sizes with length 0, needs to be sanitized PostEntity postEntityInTable = postsTableAdapter.GetPost(post.Blog_name, post.Id.ToString()); PostEntity postEntityFromTumblr = new PostEntity(post); if (!postsTableAdapter.InsertPost(postEntityFromTumblr)) { break; } if (likerBlogname != null && post.Liked_Timestamp.HasValue) { likeIndexTableAdapter.InsertLikeIndex(likerBlogname, post.Liked_Timestamp.ToString(), post.Blog_name, post.Id.ToString(), post.Reblog_key); } log.Info("Post " + post.Blog_name + "/" + post.Id + " inserted to table"); PhotosToDownload photosToDownloadMessage = null; if (postEntityFromTumblr.PhotosJson != null) { if (postEntityInTable == null || postEntityInTable.PicsDownloadLevel == null || postEntityInTable.PicsDownloadLevel < Constants.MaxPicsDownloadLevel) { photosToDownloadMessage = new PhotosToDownload(post) { Photos = post.Photos }; } else { log.Info("Photos already downloaded"); } } List <VideoUrls> videoUrlsList = new List <VideoUrls>(); if (post.Content != null && post.Content.Length > 0) { List <Photo> photos = new List <Photo>(post.Content.Length); foreach (Content content in post.Content) { if (content.Type == "image") { Photo photo = ConvertContentToPhoto(content); photos.Add(photo); } else if (content.Type == "video" && content.Url != null && content.Poster != null) { VideoUrls videoUrls = new VideoUrls { VideoUrl = content.Url, VideoThumbUrl = content.Poster.OrderBy(x => x.Width).LastOrDefault()?.Url }; videoUrlsList.Add(videoUrls); } } if (photos.Count > 0) { UpdatePhotosToDownloadMessage(ref photosToDownloadMessage, post, photos); } } if (postEntityInTable == null || postEntityInTable.VideosDownloadLevel == null || postEntityInTable.VideosDownloadLevel < Constants.MaxVideosDownloadLevel) { if (!string.IsNullOrEmpty(post.Video_url)) { VideoUrls videoUrls = new VideoUrls { VideoUrl = post.Video_url, VideoThumbUrl = post.Thumbnail_url }; videoUrlsList.Add(videoUrls); } if (post.Player != null && post.Player.Length > 0 && post.Video_type.Equals("instagram", StringComparison.OrdinalIgnoreCase)) { Player largestPlayer = post.Player.OrderBy(x => x.Width).Last(); HtmlDocument playerHtmlDoc = new HtmlDocument(); playerHtmlDoc.LoadHtml(largestPlayer.Embed_code); HtmlNode blockquoteNode = playerHtmlDoc.DocumentNode.Descendants("blockquote") .FirstOrDefault(x => !string.IsNullOrEmpty(x.Attributes["data-instgrm-permalink"].Value)); if (blockquoteNode != null) { string url = blockquoteNode.Attributes["data-instgrm-permalink"].Value; VideoUrls videoUrls = await GetInstagramVideo(url); if (videoUrls != null) { videoUrlsList.Add(videoUrls); } } } } if (!string.IsNullOrEmpty(post.Body)) { HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(post.Body); if (postEntityInTable == null || postEntityInTable.PicsDownloadLevel == null || postEntityInTable.PicsDownloadLevel < Constants.MaxPicsDownloadLevel) { List <Photo> photos = ExctractPhotosFromHtml(htmlDoc); if (photos.Count > 0) { photosToDownloadMessage = UpdatePhotosToDownloadMessage(ref photosToDownloadMessage, post, photos); } } if (postEntityInTable == null || postEntityInTable.VideosDownloadLevel == null || postEntityInTable.VideosDownloadLevel < Constants.MaxVideosDownloadLevel) { List <VideoUrls> videoUrlsListFromBody = GetVideoUrls(htmlDoc, log); videoUrlsList.AddRange(videoUrlsListFromBody); } } if (photosToDownloadMessage != null) { queueAdapter.SendPhotosToDownload(photosToDownloadMessage); log.Info("PhotosToDownload message published"); } if (videoUrlsList.Count > 0) { VideosToDownload videosToDownload = new VideosToDownload(post) { VideoUrls = videoUrlsList.ToArray() }; queueAdapter.SendVideosToDownload(videosToDownload); log.Info("VideosToDownload message published"); } } }