Exemple #1
0
        public static async Task Run([QueueTrigger(Constants.PostsToProcessQueueName, Connection = "AzureWebJobsStorage")] string myQueueItem, TraceWriter log)
        {
            Startup.Init();

            PostsToProcess postsToProcess = JsonConvert.DeserializeObject <PostsToProcess>(myQueueItem);

            PostProcessor postProcessor = new PostProcessor();

            postProcessor.Init(log);

            try
            {
                await postProcessor.ProcessPosts(postsToProcess.Posts, log, postsToProcess.LikerBlogname);
            }
            catch (Exception ex)
            {
                log.Error("Error", ex);
                throw;
            }
        }
Exemple #2
0
        public static HttpResponseMessage Run([HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = "getfrequencies")]
                                              HttpRequestMessage req, TraceWriter log)
        {
            Startup.Init();

            ImageAnalysisTableAdapter imageAnalysisTableAdapter = new ImageAnalysisTableAdapter();

            imageAnalysisTableAdapter.Init();

            TokenAllocationTableAdapter tokenAllocationTableAdapter = new TokenAllocationTableAdapter();

            tokenAllocationTableAdapter.Init();

            List <ImageAnalysisEntity> analyses = imageAnalysisTableAdapter.GetAllCanonical();

            Dictionary <string, int> digramFrequencies = new Dictionary <string, int>();
            Dictionary <string, int> labelFrequencies  = new Dictionary <string, int>();

            int processedCount = 0;

            foreach (ImageAnalysisEntity entity in analyses)
            {
                ImageAnalysis canonicalAnalysis = JsonConvert.DeserializeObject <ImageAnalysis>(entity.CanonicalJson);

                UpdateCounts(StringTokenizer.GetDigrams(canonicalAnalysis.TokenizedText), digramFrequencies);
                UpdateCounts(canonicalAnalysis.Labels.Keys.Distinct(), labelFrequencies);
                processedCount++;
                if (processedCount % 100 == 0)
                {
                    log.Info($"Processed frequencies for {processedCount} image analyses");
                }
            }

            log.Info($"Inserting {digramFrequencies.Count} digrams");
            tokenAllocationTableAdapter.InsertFrequencies(TokenAllocationTableAdapter.PartitionDigram, digramFrequencies);

            log.Info($"Inserting {labelFrequencies.Count} labels");
            tokenAllocationTableAdapter.InsertFrequencies(TokenAllocationTableAdapter.PartitionLabel, labelFrequencies);

            return(req.CreateResponse(HttpStatusCode.OK, $"Processed {analyses.Count} analyses"));
        }
Exemple #3
0
        public static HttpResponseMessage Run([HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = "initindexing")]
                                              HttpRequestMessage req, TraceWriter log)
        {
            Startup.Init();

            PostsTableAdapter postsTableAdapter = new PostsTableAdapter();

            postsTableAdapter.Init(log);

            BlogToIndexQueueAdapter blogToIndexQueueAdapter = new BlogToIndexQueueAdapter();

            blogToIndexQueueAdapter.Init();

            List <string> partitions = postsTableAdapter.GetAllPartitions();

            log.Info($"Got {partitions.Count} blogs to index");

            //Random random = new Random();
            //partitions.Shuffle(random);
            //partitions = partitions.Take(10).ToList();

            int index = 0;

            foreach (string blogname in partitions)
            {
                blogToIndexQueueAdapter.Send(new BlogToIndex {
                    Blogname = blogname
                });
                index++;
                if (index % 100 == 0)
                {
                    log.Info($"Queued {index} blogs for indexing");
                }
            }

            return(req.CreateResponse(HttpStatusCode.OK, "Queued " + partitions.Count + " blogs for indexing"));
        }
Exemple #4
0
        public static HttpResponseMessage Run([HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = "redo-canonical")] HttpRequestMessage req, TraceWriter log)
        {
            Startup.Init();

            ImageAnalysisTableAdapter imageAnalysisTableAdapter = new ImageAnalysisTableAdapter();

            imageAnalysisTableAdapter.Init();

            List <ImageAnalysisEntity> analyses = imageAnalysisTableAdapter.GetAll();

            foreach (ImageAnalysisEntity entity in analyses)
            {
                Response    visionResponse = JsonConvert.DeserializeObject <Response>(entity.GoogleVisionApiJson);
                Analysis    msAnalysis     = JsonConvert.DeserializeObject <Analysis>(entity.MsAnalysisJson);
                List <Face> msFaces        = JsonConvert.DeserializeObject <List <Face> >(entity.MsCognitiveFaceDetectJson);

                ImageAnalysis canonicalImageAnalysis = new ImageAnalysis(visionResponse, msAnalysis, msFaces);

                entity.CanonicalJson = JsonConvert.SerializeObject(canonicalImageAnalysis, JsonUtils.AnalysisSerializerSettings);
                imageAnalysisTableAdapter.UpdateImageAnalysis(entity);
            }

            return(req.CreateResponse(HttpStatusCode.OK, $"Updated {analyses.Count} analyses"));
        }
Exemple #5
0
        public static HttpResponseMessage Run([HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = "analyzerandomphotos")]
                                              HttpRequestMessage req, TraceWriter log)
        {
            Startup.Init();

            PostsTableAdapter postsTableAdapter = new PostsTableAdapter();

            postsTableAdapter.Init(log);

            ImageAnalysisTableAdapter imageAnalysisTableAdapter = new ImageAnalysisTableAdapter();

            imageAnalysisTableAdapter.Init();

            PhotoToAnalyzeQueueAdapter photoToAnalyzeQueueAdapter = new PhotoToAnalyzeQueueAdapter();

            photoToAnalyzeQueueAdapter.Init();

            string blobBaseUrl = ConfigurationManager.AppSettings["BlobBaseUrl"];

            int blogsLimit        = 50;
            int photosInBlogLimit = 10;

            BlogInfoTableAdapter blogInfoTableAdapter = new BlogInfoTableAdapter();

            blogInfoTableAdapter.Init();

            List <BlogStats> blogStats = blogInfoTableAdapter.GetBlogStats();

            log.Info($"Got {blogStats.Count} blogs to index");

            Random random = new Random();

            blogStats.Shuffle(random);
            blogStats = blogStats.Take(blogsLimit).ToList();

            int totalCount = 0;

            foreach (string blogname in blogStats.Select(x => x.RowKey))
            {
                int analyzedInBlogCount      = 0;
                List <PostEntity> noteCounts = postsTableAdapter.GetPostNoteCounts(blogname).OrderByDescending(x => x.NoteCount).ToList();
                log.Info($"Got note counts for {noteCounts.Count} posts in blog {blogname}");
                foreach (PostEntity noteCountPost in noteCounts)
                {
                    PostEntity postEntity = postsTableAdapter.GetPost(blogname, noteCountPost.RowKey);

                    if (postEntity == null)
                    {
                        log.Warning($"Post {blogname}/{noteCountPost.RowKey} not found, skipping");
                        continue;
                    }

                    if (string.IsNullOrEmpty(postEntity.PhotoBlobUrls))
                    {
                        continue;
                    }

                    List <Photo> sitePhotos = JsonConvert.DeserializeObject <List <Photo> >(postEntity.PhotoBlobUrls);

                    foreach (Photo photo in sitePhotos)
                    {
                        List <PhotoSize> sortedSizes = photo.Sizes.OrderByDescending(x => x.Nominal).ToList();

                        PhotoSize original = sortedSizes.FirstOrDefault();
                        if (original == null)
                        {
                            continue;
                        }

                        string url = blobBaseUrl + "/" + original.Container + "/" + photo.Name + "_" + original.Nominal + "." + photo.Extension;

                        if (imageAnalysisTableAdapter.GetImageAnalysis(url) != null)
                        {
                            log.Info($"Image {url} already analyzed");
                            continue;
                        }

                        PhotoToAnalyze message = new PhotoToAnalyze
                        {
                            Blog     = blogname,
                            PostDate = postEntity.Date,
                            Url      = url
                        };
                        photoToAnalyzeQueueAdapter.Send(message);
                        log.Info($"Published PhotoToAnalyze message with URL {url}");
                        analyzedInBlogCount++;
                        totalCount++;
                    }

                    if (analyzedInBlogCount >= photosInBlogLimit)
                    {
                        break;
                    }
                }
            }

            return(req.CreateResponse(HttpStatusCode.OK, $"Will analyze {totalCount} new photos"));
        }
Exemple #6
0
        public static HttpResponseMessage Run([HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = "analyze-from-likes/{blogname}")]
                                              HttpRequestMessage req, string blogname, TraceWriter log)
        {
            Startup.Init();

            LikeIndexTableAdapter likeIndexTableAdapter = new LikeIndexTableAdapter();

            likeIndexTableAdapter.Init();

            PostsTableAdapter postsTableAdapter = new PostsTableAdapter();

            postsTableAdapter.Init(log);

            ImageAnalysisTableAdapter imageAnalysisTableAdapter = new ImageAnalysisTableAdapter();

            imageAnalysisTableAdapter.Init();

            PhotoToAnalyzeQueueAdapter photoToAnalyzeQueueAdapter = new PhotoToAnalyzeQueueAdapter();

            photoToAnalyzeQueueAdapter.Init();

            string blobBaseUrl = ConfigurationManager.AppSettings["BlobBaseUrl"];

            string afterParam = req.GetQueryNameValuePairs().FirstOrDefault(q => q.Key.Equals("after", StringComparison.OrdinalIgnoreCase)).Value;

            List <LikeIndexEntity> likes;

            if (!string.IsNullOrEmpty(afterParam) && long.TryParse(afterParam, out long afterTimestamp))
            {
                log.Info($"Getting likes newer than timestamp {afterTimestamp}");
                likes = likeIndexTableAdapter.GetNewerThan(blogname, afterTimestamp);
            }
            else
            {
                likes = likeIndexTableAdapter.GetAll(blogname);
            }

            log.Info($"Loaded {likes.Count} posts");

            int messageCount = 0;

            foreach (LikeIndexEntity like in likes)
            {
                if (like.LikedBlogName == null || like.LikedPostId == null)
                {
                    continue;
                }

                PostEntity postEntity = postsTableAdapter.GetPost(like.LikedBlogName, like.LikedPostId);

                if (postEntity == null)
                {
                    log.Warning($"Post {like.LikedBlogName}/{like.LikedPostId} not found, skipping");
                    continue;
                }

                if (string.IsNullOrEmpty(postEntity.PhotoBlobUrls))
                {
                    continue;
                }

                List <Photo> sitePhotos = JsonConvert.DeserializeObject <List <Photo> >(postEntity.PhotoBlobUrls);

                foreach (Photo photo in sitePhotos)
                {
                    List <PhotoSize> sortedSizes = photo.Sizes.OrderByDescending(x => x.Nominal).ToList();

                    PhotoSize original = sortedSizes.FirstOrDefault();
                    if (original == null)
                    {
                        continue;
                    }

                    string url = blobBaseUrl + "/" + original.Container + "/" + photo.Name + "_" + original.Nominal + "." + photo.Extension;

                    if (imageAnalysisTableAdapter.GetImageAnalysis(url) != null)
                    {
                        log.Info($"Image {url} already analyzed");
                        continue;
                    }

                    PhotoToAnalyze message = new PhotoToAnalyze
                    {
                        Blog     = blogname,
                        PostDate = postEntity.Date,
                        Url      = url
                    };
                    photoToAnalyzeQueueAdapter.Send(message);
                    log.Info($"Published PhotoToAnalyze message with URL {url}");
                    messageCount++;
                }
            }

            return(req.CreateResponse(HttpStatusCode.OK, $"Processed {likes.Count} posts, sent {messageCount} messages"));
        }
Exemple #7
0
        public static async Task Run([QueueTrigger(Constants.BlogToIndexQueueName, Connection = "AzureWebJobsStorage")]
                                     string myQueueItem, TraceWriter log)
        {
            Startup.Init();

            BlogToIndex blogToIndex = JsonConvert.DeserializeObject <BlogToIndex>(myQueueItem);

            PhotoIndexTableAdapter photoIndexTableAdapter = new PhotoIndexTableAdapter();

            photoIndexTableAdapter.Init();

            PostsTableAdapter postsTableAdapter = new PostsTableAdapter();

            postsTableAdapter.Init(log);

            ReversePostsTableAdapter reversePostsTableAdapter = new ReversePostsTableAdapter();

            reversePostsTableAdapter.Init(log);

            PostToGetQueueAdapter postToGetQueueAdapter = new PostToGetQueueAdapter();

            postToGetQueueAdapter.Init();

            BlogInfoTableAdapter blogInfoTableAdapter = new BlogInfoTableAdapter();

            blogInfoTableAdapter.Init();

            MediaToDownloadQueueAdapter mediaToDownloadQueueAdapter = new MediaToDownloadQueueAdapter();

            mediaToDownloadQueueAdapter.Init(log);

            List <PhotoIndexEntity> photoIndexEntities = photoIndexTableAdapter.GetAll(blogToIndex.Blogname);

            log.Info("Loaded " + photoIndexEntities.Count + " photo index entities");

            BlogEntity blogEntity = await blogInfoTableAdapter.GetBlog(blogToIndex.Blogname);

            Dictionary <string, List <Model.Site.Photo> > photosByBlogById = CreatePhotosByBlogById(photoIndexEntities);
            BlogStats blogStats = CreateBlogStatsFromPhotos(photoIndexEntities, blogToIndex.Blogname);

            blogStats.UpdateFromBlogEntity(blogEntity);

            List <PostEntity> postEntities = postsTableAdapter.GetAll(blogToIndex.Blogname);

            UpdateBlogStatsFromPosts(blogStats, postEntities);
            UpdateMonthIndex(blogToIndex.Blogname, postEntities, blogInfoTableAdapter);

            log.Info("Loaded " + postEntities.Count + " post entities");

            foreach (PostEntity postEntity in postEntities)
            {
                if (!string.IsNullOrEmpty(postEntity.PhotoBlobUrls))
                {
                    try
                    {
                        Model.Site.Photo[] photos = JsonConvert.DeserializeObject <Model.Site.Photo[]>(postEntity.PhotoBlobUrls);

                        if (photos.Any(x => !x.Name.Contains("_")))
                        {
                            SendToReprocessing(postEntity.PartitionKey, mediaToDownloadQueueAdapter, log, postEntity);
                        }
                    }
                    catch (Exception e)
                    {
                        log.Error("Error: " + e.Message);
                        throw;
                    }
                }
            }

            blogStats.DisplayablePosts = InsertReversePosts(blogToIndex.Blogname, photosByBlogById, postEntities, reversePostsTableAdapter,
                                                            postsTableAdapter, photoIndexTableAdapter, mediaToDownloadQueueAdapter, log);

            blogInfoTableAdapter.InsertBlobStats(blogStats);
        }
Exemple #8
0
        public static async Task Run([QueueTrigger(Constants.VideosToDownloadQueueName, Connection = "AzureWebJobsStorage")] string myQueueItem, TraceWriter log)
        {
            Startup.Init();

            log.Info($"C# Queue trigger function processed: {myQueueItem}");

            VideosToDownload videosToDownload = JsonConvert.DeserializeObject <VideosToDownload>(myQueueItem);

            BlobAdapter blobAdapter = new BlobAdapter();

            blobAdapter.Init();

            VideoIndexTableAdapter videoIndexTableAdapter = new VideoIndexTableAdapter();

            videoIndexTableAdapter.Init();

            PostsTableAdapter postsTableAdapter = new PostsTableAdapter();

            postsTableAdapter.Init(log);

            ReversePostsTableAdapter reversePostsTableAdapter = new ReversePostsTableAdapter();

            reversePostsTableAdapter.Init(log);

            string sourceBlog = string.IsNullOrEmpty(videosToDownload.SourceBlog)
                ? videosToDownload.IndexInfo.BlogName
                : videosToDownload.SourceBlog;

            sourceBlog = SanityHelper.SanitizeSourceBlog(sourceBlog);

            using (HttpClient httpClient = new HttpClient())
            {
                httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("video/*"));
                httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("image/*"));

                List <Video> videos = new List <Video>();

                string   blogname = videosToDownload.IndexInfo.BlogName;
                string   id       = videosToDownload.IndexInfo.PostId;
                DateTime date     = videosToDownload.IndexInfo.PostDate;

                foreach (VideoUrls videoUrls in videosToDownload.VideoUrls)
                {
                    try
                    {
                        Video blobVideo = await blobAdapter.HandleVideo(videoUrls, videosToDownload.IndexInfo.BlogName, log);

                        videos.Add(blobVideo);

                        videoIndexTableAdapter.InsertVideoIndex(blogname, id, date, blobVideo, videosToDownload.VideoType, blobVideo.Bytes, videosToDownload.Duration);

                        log.Info("Video successfully downloaded: " + videoUrls.VideoUrl);
                    }
                    catch (HttpRequestException ex)
                    {
                        log.Warning("HTTP Error while downloading video " + videoUrls.VideoUrl + " - " + ex.Message);
                        postsTableAdapter.MarkWithVideoDownloadError(blogname, id, ex.Message);
                    }
                    catch (Exception ex)
                    {
                        log.Error("Error while downloading video ", ex);
                        throw;
                    }
                }

                if (videos.Count > 0)
                {
                    postsTableAdapter.MarkVideosAsDownloaded(videosToDownload.IndexInfo.BlogName, videosToDownload.IndexInfo.PostId, videos.ToArray());

                    ReversePostEntity reversePost = new ReversePostEntity(blogname, id, videosToDownload.PostType, date, videosToDownload.Body, videosToDownload.Title)
                    {
                        Videos = JsonConvert.SerializeObject(videos)
                    };
                    reversePostsTableAdapter.InsertPost(reversePost);
                }
            }
        }
Exemple #9
0
        public static async Task Run([TimerTrigger("0 25 * * * *")] TimerInfo myTimer, TraceWriter log)
        {
            Startup.Init();

            log.Info($"C# Timer trigger function executed at: {DateTime.Now}");

            BlogToFetchQueueAdapter blogToFetchQueueAdapter = new BlogToFetchQueueAdapter();

            blogToFetchQueueAdapter.Init();

            BlogInfoTableAdapter blogInfoTableAdapter = new BlogInfoTableAdapter();

            blogInfoTableAdapter.Init();

            PostsGetter postsGetter = new PostsGetter();

            Stopwatch stopwatch = Stopwatch.StartNew();

            bool success; // basically means that can the message be deleted, or if it needs to be left in queue to be resumed later

            do
            {
                //TODO: error handling, if there is error from e.g. postsGetter
                CloudQueueMessage message = await blogToFetchQueueAdapter.GetNextMessage();

                if (message == null)
                {
                    return;
                }

                BlogToFetch blogToFetch = JsonConvert.DeserializeObject <BlogToFetch>(message.AsString);

                BlogEntity blogEntity = await blogInfoTableAdapter.GetBlog(blogToFetch.Blogname);

                long timeoutLeft = 270 - stopwatch.ElapsedMilliseconds / 1000;
                if (timeoutLeft < 10)
                {
                    return;
                }

                success = false;

                GetPostsResult result = null;
                if (blogToFetch.NewerThan.HasValue)
                {
                    result = await postsGetter.GetNewerPosts(log, blogToFetch.Blogname, blogToFetch.NewerThan.Value, timeoutLeft);

                    if (result.Success)
                    {
                        success = true;
                    }
                }

                int offset = 0;
                if (blogEntity?.FetchedUntilOffset != null && !blogToFetch.UpdateNpf)
                {
                    offset = blogEntity.FetchedUntilOffset.Value;
                }

                if (result != null)
                {
                    offset += (int)result.TotalReceived;
                }

                if (blogEntity != null && (!blogEntity.FetchedUntilOffset.HasValue || blogEntity.FetchedUntilOffset.Value < Constants.MaxPostsToFetch))
                {
                    result = await postsGetter.GetPosts(log, blogToFetch.Blogname, offset, timeoutSeconds : timeoutLeft, updateNpf : blogToFetch.UpdateNpf);

                    if (result.Success)
                    {
                        success = true;
                    }
                }
                else
                {
                    success = true; // enough fetched already, message can be deleted
                }

                if (success)
                {
                    await blogToFetchQueueAdapter.DeleteMessage(message);
                }
            } while (success);
        }
Exemple #10
0
        public static async Task <HttpResponseMessage> Run([HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = "getblogsfromlikes/{blogname}")] HttpRequestMessage req,
                                                           string blogname, TraceWriter log)
        {
            Startup.Init();

            LikeIndexTableAdapter likeIndexTableAdapter = new LikeIndexTableAdapter();

            likeIndexTableAdapter.Init();

            BlogInfoTableAdapter blogInfoTableAdapter = new BlogInfoTableAdapter();

            blogInfoTableAdapter.Init();

            PostsTableAdapter postsTableAdapter = new PostsTableAdapter();

            postsTableAdapter.Init(log);

            BlogToFetchQueueAdapter blogToFetchQueueAdapter = new BlogToFetchQueueAdapter();

            blogToFetchQueueAdapter.Init();

            List <LikeIndexEntity>            entities    = likeIndexTableAdapter.GetAll(blogname);
            ILookup <string, LikeIndexEntity> likesByBlog = entities.ToLookup(e => e.LikedBlogName);
            List <BlogStatsRow> stats = likesByBlog.Select(gr => new BlogStatsRow {
                Blogname = gr.Key, LikedPostCount = gr.Count()
            }).OrderByDescending(x => x.LikedPostCount).ToList();

            string apiKey = ConfigurationManager.AppSettings["TumblrApiKey"];

            List <BlogStatsRow> toDownload = new List <BlogStatsRow>();

            using (HttpClient httpClient = new HttpClient())
            {
                foreach (BlogStatsRow blogStatsRow in stats)
                {
                    if (blogStatsRow.LikedPostCount < 10)
                    {
                        continue;
                    }

                    string url = "https://api.tumblr.com/v2/blog/" + blogStatsRow.Blogname + "/info?api_key=" + apiKey;
                    //log.Info("Making request to: " + url);
                    HttpResponseMessage response = await httpClient.GetAsync(url);

                    if (response.IsSuccessStatusCode)
                    {
                        TumblrResponse <BlogInfo> tumblrResponse = await response.Content.ReadAsAsync <TumblrResponse <BlogInfo> >();

                        Blog       blog       = tumblrResponse.Response.Blog;
                        BlogEntity blogEntity = await blogInfoTableAdapter.GetBlog(blogStatsRow.Blogname);

                        blogStatsRow.HadPostCount   = postsTableAdapter.GetPostCount(blogStatsRow.Blogname);
                        blogStatsRow.TotalPostCount = blog.Posts;
                        long difference = blog.Posts - blogStatsRow.HadPostCount;
                        bool fetch      = false;
                        long?newerThan  = null;

                        if (blogEntity != null && blogEntity.Updated < blog.Updated)
                        {
                            log.Info("Blog " + blogStatsRow.Blogname + " to be downloaded, has new posts");
                            fetch     = true;
                            newerThan = blogEntity.Updated;
                        }
                        else if (blogStatsRow.HadPostCount > Constants.MaxPostsToFetch)
                        {
                            log.Info("Already fetched " + blogStatsRow.HadPostCount + " posts from blog " + blogStatsRow.Blogname);
                        }
                        else if (difference > 5)
                        {
                            log.Info("Blog " + blogStatsRow.Blogname + " to be downloaded, missing " + difference + " posts");
                            fetch = true;
                        }
                        else
                        {
                            log.Info("Blog " + blogStatsRow.Blogname + " already downloaded (difference " + difference + ")");
                        }

                        if (fetch)
                        {
                            blogToFetchQueueAdapter.SendBlogToFetch(new BlogToFetch
                            {
                                Blogname       = blog.Name,
                                TotalPostCount = blog.Posts,
                                NewerThan      = newerThan
                            });
                            toDownload.Add(blogStatsRow);
                        }

                        blogEntity = new BlogEntity(blog);
                        blogInfoTableAdapter.InsertBlog(blogEntity);
                    }
                }
            }

            return(req.CreateResponse(HttpStatusCode.OK, "Got " + toDownload.Count + " blogs to fetch"));
        }
Exemple #11
0
        public static async Task Run([HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = "processimage")]
                                     HttpRequestMessage req, TraceWriter log)
        {
            Startup.Init();

            ImageAnalysisTableAdapter imageAnalysisTableAdapter = new ImageAnalysisTableAdapter();

            imageAnalysisTableAdapter.Init();

            const string imageUrl = "https://tumblrpics.blob.core.windows.net/orig-66/c48440825ac6eab1af6c4de39bbc59d6_ph8zkhbeVA1tf8706_1280.jpg";

            using (HttpClient httpClient = new HttpClient())
            {
                Stopwatch stopwatch = Stopwatch.StartNew();

                string apiKey = ConfigurationManager.AppSettings["GoogleApiKey"];

                string url = "https://vision.googleapis.com/v1/images:annotate?key=" + apiKey;

                VisionApiRequest request = VisionApiRequest.CreateFromImageUris(imageUrl);

                string requestJson = JsonConvert.SerializeObject(request, JsonUtils.GoogleSerializerSettings);

                StringContent stringContent = new StringContent(requestJson, Encoding.UTF8, "application/json");

                HttpResponseMessage response = await httpClient.PostAsync(url, stringContent);

                HttpContent responseContent            = response.Content;
                string      googleVisionResponseString = await responseContent.ReadAsStringAsync();

                VisionApiResponse visionApiResponse =
                    JsonConvert.DeserializeObject <VisionApiResponse>(googleVisionResponseString, JsonUtils.GoogleSerializerSettings);

                string faceApiKey = ConfigurationManager.AppSettings["FaceApiKey"];
                httpClient.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", faceApiKey);
                stringContent = new StringContent($"{{\"url\":\"{imageUrl}\"}}", Encoding.UTF8, "application/json");
                response      = await httpClient.PostAsync(
                    "https://northeurope.api.cognitive.microsoft.com/face/v1.0/detect?returnFaceId=true&returnFaceLandmarks=false&returnFaceAttributes=" +
                    "age,gender,headPose,smile,facialHair,glasses,emotion,hair,makeup,occlusion,accessories,blur,exposure,noise",
                    stringContent);

                responseContent = response.Content;

                string msDetectResponseString = await responseContent.ReadAsStringAsync();

                List <Face> msFaces = JsonConvert.DeserializeObject <List <Face> >(msDetectResponseString);

                string visionApiKey = ConfigurationManager.AppSettings["ComputerVisionApiKey"];
                httpClient.DefaultRequestHeaders.Remove("Ocp-Apim-Subscription-Key");
                httpClient.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", visionApiKey);
                stringContent = new StringContent($"{{\"url\":\"{imageUrl}\"}}", Encoding.UTF8, "application/json");
                response      = await httpClient.PostAsync(
                    "https://northeurope.api.cognitive.microsoft.com/vision/v2.0/analyze?visualFeatures=Description,ImageType,Adult,Categories,Tags,Objects,Color&language=en",
                    stringContent);

                responseContent = response.Content;

                string msAnalyzeResponseString = await responseContent.ReadAsStringAsync();

                Analysis msAnalysis = JsonConvert.DeserializeObject <Analysis>(msAnalyzeResponseString);

                if (visionApiResponse.Responses.Count == 1 && msFaces.Count > 0 && msAnalysis != null)
                {
                    ImageAnalysis canonicalImageAnalysis = new ImageAnalysis(visionApiResponse.Responses[0], msAnalysis, msFaces);
                }
            }
        }
Exemple #12
0
        public static async Task<HttpResponseMessage> Run([HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = "getlikes/{blogname}")]
            HttpRequestMessage req, string blogname, TraceWriter log)
        {
            Startup.Init();

            blogname = blogname.ToLower().Replace(".tumblr.com", "");

            PostsToProcessQueueAdapter postsToProcessQueueAdapter = new PostsToProcessQueueAdapter();
            postsToProcessQueueAdapter.Init(log);

            LikeIndexTableAdapter likeIndexTableAdapter = new LikeIndexTableAdapter();
            likeIndexTableAdapter.Init();

            long newestLikedTimestamp = likeIndexTableAdapter.GetNewestLikedTimestamp(blogname);

            if (newestLikedTimestamp > 0)
            {
                log.Info($"Getting likes newer than timestamp {newestLikedTimestamp}");
            }

            Likes likes = null;
            long totalCount = 0;
            using (HttpClient httpClient = new HttpClient())
            {
                DateTime beforeTime = DateTime.UtcNow;
                do
                {
                    string apiKey = ConfigurationManager.AppSettings["TumblrApiKey"];
                    string url;
                    if (likes == null)
                    {
                        long timestamp = FunctionUtilities.GetUnixTime(beforeTime);
                        url = "https://api.tumblr.com/v2/blog/" + blogname + "/likes?before=" + timestamp + "&api_key=" + apiKey;
                    }
                    else
                    {
                        url = "https://api.tumblr.com" + likes._links.Next.Href + "&api_key=" + apiKey;
                    }

                    log.Info("Making request to: " + url);
                    HttpResponseMessage response = await httpClient.GetAsync(url);
                    if (response.IsSuccessStatusCode)
                    {
                        TumblrResponse<Likes> tumblrResponse = await response.Content.ReadAsAsync<TumblrResponse<Likes>>();
                        likes = tumblrResponse.Response;

                        if (newestLikedTimestamp > 0)
                        {
                            List<Post> newerPosts = likes.Liked_posts.Where(x => x.Liked_Timestamp > newestLikedTimestamp).ToList();
                            if (newerPosts.Count < likes.Liked_posts.Count)
                            {
                                log.Info($"Reached Liked_Timestamp of {newestLikedTimestamp} which has already been fetched, finishing");
                                postsToProcessQueueAdapter.SendPostsToProcess(newerPosts, blogname);

                                totalCount += newerPosts.Count;
                                break;
                            }
                        }

                        totalCount += likes.Liked_posts.Count;

                        postsToProcessQueueAdapter.SendPostsToProcess(likes.Liked_posts, blogname);
                    }
                    else
                    {
                        log.Info("Got response: " + response.ReasonPhrase + " " + response.StatusCode);
                        break;
                    }
                } while (likes._links != null && likes._links.Next != null && !string.IsNullOrEmpty(likes._links.Next.Href));
            }

            log.Info("C# HTTP trigger function processed a request.");

            // Fetching the name from the path parameter in the request URL
            return req.CreateResponse(HttpStatusCode.OK, "Got " + totalCount + " posts");
        }
Exemple #13
0
        public static async Task Run([TimerTrigger("0 15 * * * *")] TimerInfo myTimer, TraceWriter log)
        {
            Startup.Init();

            PhotoToAnalyzeQueueAdapter photoToAnalyzeQueueAdapter = new PhotoToAnalyzeQueueAdapter();

            photoToAnalyzeQueueAdapter.Init();
            CloudQueueMessage message        = null;
            PhotoToAnalyze    photoToAnalyze = null;

            try
            {
                ImageAnalysisTableAdapter imageAnalysisTableAdapter = new ImageAnalysisTableAdapter();
                imageAnalysisTableAdapter.Init();

                BlogInfoTableAdapter blogInfoTableAdapter = new BlogInfoTableAdapter();
                blogInfoTableAdapter.Init();

                int processedCount = 0;

                do
                {
                    message = await photoToAnalyzeQueueAdapter.GetNextMessage();

                    if (message == null)
                    {
                        return;
                    }

                    photoToAnalyze = JsonConvert.DeserializeObject <PhotoToAnalyze>(message.AsString);

                    if (imageAnalysisTableAdapter.GetImageAnalysis(photoToAnalyze.Url) != null)
                    {
                        log.Info($"Image {photoToAnalyze.Url} already analyzed, aborting");
                        await UpdateBlogInfo(photoToAnalyze, blogInfoTableAdapter);

                        continue;
                    }

                    string failure = await ImageAnalyzer.AnalyzePhoto(log, photoToAnalyze, imageAnalysisTableAdapter);

                    if (string.IsNullOrEmpty(failure))
                    {
                        await UpdateBlogInfo(photoToAnalyze, blogInfoTableAdapter);
                    }
                    else
                    {
                        photoToAnalyze.Error = failure;
                        await photoToAnalyzeQueueAdapter.SendToPoisonQueue(photoToAnalyze);

                        log.Info($"Message for {photoToAnalyze.Url} stored to poison queue");
                    }

                    processedCount++;

                    await photoToAnalyzeQueueAdapter.DeleteMessage(message);
                } while (processedCount < 10);
            }
            catch (Exception ex)
            {
                log.Error("Error in ProcessPhotosToAnalyze: " + ex.Message, ex);

                if (message != null && photoToAnalyze != null)
                {
                    photoToAnalyze.Error      = ex.Message;
                    photoToAnalyze.StackTrace = ex.StackTrace.Substring(0, Math.Max(36000, ex.StackTrace.Length));
                    await photoToAnalyzeQueueAdapter.SendToPoisonQueue(photoToAnalyze);

                    log.Info($"Message for {photoToAnalyze.Url} stored to poison queue");
                    await photoToAnalyzeQueueAdapter.DeleteMessage(message);

                    log.Info($"Failed message deleted successfully from main queue");
                    message = null;
                }
            }
        }
Exemple #14
0
        public static async Task Run([QueueTrigger(Constants.PhotosToDownloadQueueName, Connection = "AzureWebJobsStorage")]
                                     string myQueueItem, TraceWriter log)
        {
            Startup.Init();

            string requestUrl = null;

            try
            {
                PhotosToDownload photosToDownload = JsonConvert.DeserializeObject <PhotosToDownload>(myQueueItem);

                BlobAdapter blobAdapter = new BlobAdapter();
                blobAdapter.Init();

                PhotoIndexTableAdapter photoIndexTableAdapter = new PhotoIndexTableAdapter();
                photoIndexTableAdapter.Init();

                PostsTableAdapter postsTableAdapter = new PostsTableAdapter();
                postsTableAdapter.Init(log);

                ReversePostsTableAdapter reversePostsTableAdapter = new ReversePostsTableAdapter();
                reversePostsTableAdapter.Init(log);

                List <Photo> sitePhotos = new List <Photo>();

                string   blogname = photosToDownload.IndexInfo.BlogName;
                string   id       = photosToDownload.IndexInfo.PostId;
                DateTime date     = photosToDownload.IndexInfo.PostDate;

                string sourceBlog = string.IsNullOrEmpty(photosToDownload.SourceBlog)
                    ? photosToDownload.IndexInfo.BlogName
                    : photosToDownload.SourceBlog;
                sourceBlog = SanityHelper.SanitizeSourceBlog(sourceBlog);

                using (HttpClient httpClient = new HttpClient())
                {
                    httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("image/*"));

                    foreach (Model.Tumblr.Photo photo in photosToDownload.Photos)
                    {
                        bool  isOriginal = true;
                        Photo sitePhoto  = null;

                        foreach (AltSize altSize in photo.Alt_sizes)
                        {
                            PhotoUrlHelper urlHelper = PhotoUrlHelper.ParseTumblr(altSize.Url);

                            if (isOriginal || urlHelper != null && DownloadSizes.Contains(urlHelper.Size))
                            {
                                if (sitePhoto == null)
                                {
                                    sitePhoto = new Photo
                                    {
                                        Name      = urlHelper.Container + "_" + urlHelper.Name,
                                        Extension = urlHelper.Extension,
                                        Sizes     = new PhotoSize[0]
                                    }
                                }
                                ;

                                PhotoUrlIndexEntity urlIndexEntity = photoIndexTableAdapter.GetPhotoUrlndex(sourceBlog, altSize.Url);
                                if (urlIndexEntity != null)                                         // photo already downloaded
                                {
                                    AddSizeToSitePhoto(sitePhoto, urlIndexEntity.BlobUrl, altSize); // need this to produce correct sitePhotos
                                    isOriginal = false;
                                }
                                else // photo not downloaded
                                {
                                    requestUrl = altSize.Url;
                                    byte[] photoBytes = await httpClient.GetByteArrayAsync(altSize.Url);

                                    if (photoBytes.Length > 0)
                                    {
                                        Uri blobUri = await blobAdapter.UploadPhotoBlob(urlHelper, photoBytes, isOriginal);

                                        AddSizeToSitePhoto(sitePhoto, blobUri.ToString(), altSize);

                                        photoIndexTableAdapter.InsertPhotoIndex(blogname, id, date, SanityHelper.SanitizeSourceBlog(photosToDownload.SourceBlog),
                                                                                blobUri.ToString(), urlHelper.Name, urlHelper.Size,
                                                                                altSize.Width, altSize.Height, altSize.Url);
                                        isOriginal = false;
                                        log.Info("Downloaded photo from: " + altSize.Url);
                                    }
                                }
                            }
                        }

                        if (sitePhoto?.Sizes.Length > 0)
                        {
                            sitePhotos.Add(sitePhoto);
                        }
                    }
                }

                string modifiedBody = BodyUrlModifier.ModifyUrls(sourceBlog, photosToDownload.Body, photoIndexTableAdapter, sitePhotos, out List <Model.Tumblr.Photo> extractedPhotos);

                if (extractedPhotos != null)
                {
                    log.Warning("Trying to modify body in ProcessPhotosToDownload but some images were not possible to replace");
                }

                postsTableAdapter.MarkPhotosAsDownloaded(photosToDownload.IndexInfo.BlogName, photosToDownload.IndexInfo.PostId, sitePhotos, modifiedBody);

                ReversePostEntity reversePost = new ReversePostEntity(photosToDownload.IndexInfo.BlogName, photosToDownload.IndexInfo.PostId,
                                                                      photosToDownload.PostType, photosToDownload.IndexInfo.PostDate, modifiedBody, photosToDownload.Title)
                {
                    Photos = JsonConvert.SerializeObject(sitePhotos)
                };
                reversePostsTableAdapter.InsertPost(reversePost);
            }
            catch (Exception ex)
            {
                if (ex is HttpRequestException httpRequestException && httpRequestException.Message.Contains("403") && httpRequestException.Message.Contains("Forbidden"))
                {
                    log.Warning("HTTP request was forbidden to URL: " + requestUrl);
                }