Esempio n. 1
0
        public static HttpResponseMessage Run([HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = "analyze-from-likes/{blogname}")]
                                              HttpRequestMessage req, string blogname, TraceWriter log)
        {
            Startup.Init();

            LikeIndexTableAdapter likeIndexTableAdapter = new LikeIndexTableAdapter();

            likeIndexTableAdapter.Init();

            PostsTableAdapter postsTableAdapter = new PostsTableAdapter();

            postsTableAdapter.Init(log);

            ImageAnalysisTableAdapter imageAnalysisTableAdapter = new ImageAnalysisTableAdapter();

            imageAnalysisTableAdapter.Init();

            PhotoToAnalyzeQueueAdapter photoToAnalyzeQueueAdapter = new PhotoToAnalyzeQueueAdapter();

            photoToAnalyzeQueueAdapter.Init();

            string blobBaseUrl = ConfigurationManager.AppSettings["BlobBaseUrl"];

            string afterParam = req.GetQueryNameValuePairs().FirstOrDefault(q => q.Key.Equals("after", StringComparison.OrdinalIgnoreCase)).Value;

            List <LikeIndexEntity> likes;

            if (!string.IsNullOrEmpty(afterParam) && long.TryParse(afterParam, out long afterTimestamp))
            {
                log.Info($"Getting likes newer than timestamp {afterTimestamp}");
                likes = likeIndexTableAdapter.GetNewerThan(blogname, afterTimestamp);
            }
            else
            {
                likes = likeIndexTableAdapter.GetAll(blogname);
            }

            log.Info($"Loaded {likes.Count} posts");

            int messageCount = 0;

            foreach (LikeIndexEntity like in likes)
            {
                if (like.LikedBlogName == null || like.LikedPostId == null)
                {
                    continue;
                }

                PostEntity postEntity = postsTableAdapter.GetPost(like.LikedBlogName, like.LikedPostId);

                if (postEntity == null)
                {
                    log.Warning($"Post {like.LikedBlogName}/{like.LikedPostId} not found, skipping");
                    continue;
                }

                if (string.IsNullOrEmpty(postEntity.PhotoBlobUrls))
                {
                    continue;
                }

                List <Photo> sitePhotos = JsonConvert.DeserializeObject <List <Photo> >(postEntity.PhotoBlobUrls);

                foreach (Photo photo in sitePhotos)
                {
                    List <PhotoSize> sortedSizes = photo.Sizes.OrderByDescending(x => x.Nominal).ToList();

                    PhotoSize original = sortedSizes.FirstOrDefault();
                    if (original == null)
                    {
                        continue;
                    }

                    string url = blobBaseUrl + "/" + original.Container + "/" + photo.Name + "_" + original.Nominal + "." + photo.Extension;

                    if (imageAnalysisTableAdapter.GetImageAnalysis(url) != null)
                    {
                        log.Info($"Image {url} already analyzed");
                        continue;
                    }

                    PhotoToAnalyze message = new PhotoToAnalyze
                    {
                        Blog     = blogname,
                        PostDate = postEntity.Date,
                        Url      = url
                    };
                    photoToAnalyzeQueueAdapter.Send(message);
                    log.Info($"Published PhotoToAnalyze message with URL {url}");
                    messageCount++;
                }
            }

            return(req.CreateResponse(HttpStatusCode.OK, $"Processed {likes.Count} posts, sent {messageCount} messages"));
        }
Esempio n. 2
0
        public static HttpResponseMessage Run([HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = "analyzerandomphotos")]
                                              HttpRequestMessage req, TraceWriter log)
        {
            Startup.Init();

            PostsTableAdapter postsTableAdapter = new PostsTableAdapter();

            postsTableAdapter.Init(log);

            ImageAnalysisTableAdapter imageAnalysisTableAdapter = new ImageAnalysisTableAdapter();

            imageAnalysisTableAdapter.Init();

            PhotoToAnalyzeQueueAdapter photoToAnalyzeQueueAdapter = new PhotoToAnalyzeQueueAdapter();

            photoToAnalyzeQueueAdapter.Init();

            string blobBaseUrl = ConfigurationManager.AppSettings["BlobBaseUrl"];

            int blogsLimit        = 50;
            int photosInBlogLimit = 10;

            BlogInfoTableAdapter blogInfoTableAdapter = new BlogInfoTableAdapter();

            blogInfoTableAdapter.Init();

            List <BlogStats> blogStats = blogInfoTableAdapter.GetBlogStats();

            log.Info($"Got {blogStats.Count} blogs to index");

            Random random = new Random();

            blogStats.Shuffle(random);
            blogStats = blogStats.Take(blogsLimit).ToList();

            int totalCount = 0;

            foreach (string blogname in blogStats.Select(x => x.RowKey))
            {
                int analyzedInBlogCount      = 0;
                List <PostEntity> noteCounts = postsTableAdapter.GetPostNoteCounts(blogname).OrderByDescending(x => x.NoteCount).ToList();
                log.Info($"Got note counts for {noteCounts.Count} posts in blog {blogname}");
                foreach (PostEntity noteCountPost in noteCounts)
                {
                    PostEntity postEntity = postsTableAdapter.GetPost(blogname, noteCountPost.RowKey);

                    if (postEntity == null)
                    {
                        log.Warning($"Post {blogname}/{noteCountPost.RowKey} not found, skipping");
                        continue;
                    }

                    if (string.IsNullOrEmpty(postEntity.PhotoBlobUrls))
                    {
                        continue;
                    }

                    List <Photo> sitePhotos = JsonConvert.DeserializeObject <List <Photo> >(postEntity.PhotoBlobUrls);

                    foreach (Photo photo in sitePhotos)
                    {
                        List <PhotoSize> sortedSizes = photo.Sizes.OrderByDescending(x => x.Nominal).ToList();

                        PhotoSize original = sortedSizes.FirstOrDefault();
                        if (original == null)
                        {
                            continue;
                        }

                        string url = blobBaseUrl + "/" + original.Container + "/" + photo.Name + "_" + original.Nominal + "." + photo.Extension;

                        if (imageAnalysisTableAdapter.GetImageAnalysis(url) != null)
                        {
                            log.Info($"Image {url} already analyzed");
                            continue;
                        }

                        PhotoToAnalyze message = new PhotoToAnalyze
                        {
                            Blog     = blogname,
                            PostDate = postEntity.Date,
                            Url      = url
                        };
                        photoToAnalyzeQueueAdapter.Send(message);
                        log.Info($"Published PhotoToAnalyze message with URL {url}");
                        analyzedInBlogCount++;
                        totalCount++;
                    }

                    if (analyzedInBlogCount >= photosInBlogLimit)
                    {
                        break;
                    }
                }
            }

            return(req.CreateResponse(HttpStatusCode.OK, $"Will analyze {totalCount} new photos"));
        }
Esempio n. 3
0
        public async Task ProcessPosts(IEnumerable <Post> posts, TraceWriter log, string likerBlogname = null)
        {
            foreach (Post post in posts)
            {
                SanitizePostPhotos(post); // sometimes post.Photos has Alt_sizes with length 0, needs to be sanitized

                PostEntity postEntityInTable = postsTableAdapter.GetPost(post.Blog_name, post.Id.ToString());

                PostEntity postEntityFromTumblr = new PostEntity(post);

                if (!postsTableAdapter.InsertPost(postEntityFromTumblr))
                {
                    break;
                }

                if (likerBlogname != null && post.Liked_Timestamp.HasValue)
                {
                    likeIndexTableAdapter.InsertLikeIndex(likerBlogname, post.Liked_Timestamp.ToString(), post.Blog_name, post.Id.ToString(), post.Reblog_key);
                }

                log.Info("Post " + post.Blog_name + "/" + post.Id + " inserted to table");

                PhotosToDownload photosToDownloadMessage = null;

                if (postEntityFromTumblr.PhotosJson != null)
                {
                    if (postEntityInTable == null || postEntityInTable.PicsDownloadLevel == null ||
                        postEntityInTable.PicsDownloadLevel < Constants.MaxPicsDownloadLevel)
                    {
                        photosToDownloadMessage = new PhotosToDownload(post)
                        {
                            Photos = post.Photos
                        };
                    }
                    else
                    {
                        log.Info("Photos already downloaded");
                    }
                }

                List <VideoUrls> videoUrlsList = new List <VideoUrls>();

                if (post.Content != null && post.Content.Length > 0)
                {
                    List <Photo> photos = new List <Photo>(post.Content.Length);

                    foreach (Content content in post.Content)
                    {
                        if (content.Type == "image")
                        {
                            Photo photo = ConvertContentToPhoto(content);
                            photos.Add(photo);
                        }
                        else if (content.Type == "video" && content.Url != null && content.Poster != null)
                        {
                            VideoUrls videoUrls = new VideoUrls
                            {
                                VideoUrl      = content.Url,
                                VideoThumbUrl = content.Poster.OrderBy(x => x.Width).LastOrDefault()?.Url
                            };
                            videoUrlsList.Add(videoUrls);
                        }
                    }

                    if (photos.Count > 0)
                    {
                        UpdatePhotosToDownloadMessage(ref photosToDownloadMessage, post, photos);
                    }
                }

                if (postEntityInTable == null || postEntityInTable.VideosDownloadLevel == null ||
                    postEntityInTable.VideosDownloadLevel < Constants.MaxVideosDownloadLevel)
                {
                    if (!string.IsNullOrEmpty(post.Video_url))
                    {
                        VideoUrls videoUrls = new VideoUrls
                        {
                            VideoUrl      = post.Video_url,
                            VideoThumbUrl = post.Thumbnail_url
                        };

                        videoUrlsList.Add(videoUrls);
                    }

                    if (post.Player != null && post.Player.Length > 0 && post.Video_type.Equals("instagram", StringComparison.OrdinalIgnoreCase))
                    {
                        Player largestPlayer = post.Player.OrderBy(x => x.Width).Last();

                        HtmlDocument playerHtmlDoc = new HtmlDocument();
                        playerHtmlDoc.LoadHtml(largestPlayer.Embed_code);
                        HtmlNode blockquoteNode = playerHtmlDoc.DocumentNode.Descendants("blockquote")
                                                  .FirstOrDefault(x => !string.IsNullOrEmpty(x.Attributes["data-instgrm-permalink"].Value));
                        if (blockquoteNode != null)
                        {
                            string    url       = blockquoteNode.Attributes["data-instgrm-permalink"].Value;
                            VideoUrls videoUrls = await GetInstagramVideo(url);

                            if (videoUrls != null)
                            {
                                videoUrlsList.Add(videoUrls);
                            }
                        }
                    }
                }

                if (!string.IsNullOrEmpty(post.Body))
                {
                    HtmlDocument htmlDoc = new HtmlDocument();
                    htmlDoc.LoadHtml(post.Body);
                    if (postEntityInTable == null || postEntityInTable.PicsDownloadLevel == null ||
                        postEntityInTable.PicsDownloadLevel < Constants.MaxPicsDownloadLevel)
                    {
                        List <Photo> photos = ExctractPhotosFromHtml(htmlDoc);

                        if (photos.Count > 0)
                        {
                            photosToDownloadMessage = UpdatePhotosToDownloadMessage(ref photosToDownloadMessage, post, photos);
                        }
                    }

                    if (postEntityInTable == null || postEntityInTable.VideosDownloadLevel == null ||
                        postEntityInTable.VideosDownloadLevel < Constants.MaxVideosDownloadLevel)
                    {
                        List <VideoUrls> videoUrlsListFromBody = GetVideoUrls(htmlDoc, log);
                        videoUrlsList.AddRange(videoUrlsListFromBody);
                    }
                }

                if (photosToDownloadMessage != null)
                {
                    queueAdapter.SendPhotosToDownload(photosToDownloadMessage);
                    log.Info("PhotosToDownload message published");
                }

                if (videoUrlsList.Count > 0)
                {
                    VideosToDownload videosToDownload = new VideosToDownload(post)
                    {
                        VideoUrls = videoUrlsList.ToArray()
                    };

                    queueAdapter.SendVideosToDownload(videosToDownload);
                    log.Info("VideosToDownload message published");
                }
            }
        }