private static void SendToReprocessing(string blogname, MediaToDownloadQueueAdapter mediaToDownloadQueueAdapter, TraceWriter log, PostEntity postEntity) { if (!string.IsNullOrEmpty(postEntity.PhotosJson)) { SendPhotosToDownload(mediaToDownloadQueueAdapter, postEntity, JsonConvert.DeserializeObject <Photo[]>(postEntity.PhotosJson)); } else if (!string.IsNullOrEmpty(postEntity.Body)) { HtmlDocument htmlDoc = new HtmlDocument(); string unescapedBody = JsonConvert.DeserializeObject <string>(postEntity.Body); htmlDoc.LoadHtml(unescapedBody); List <Photo> photosFromHtml = PostProcessor.ExctractPhotosFromHtml(htmlDoc); if (photosFromHtml.Count > 0) { SendPhotosToDownload(mediaToDownloadQueueAdapter, postEntity, photosFromHtml.ToArray()); } else { log.Warning($"Post {blogname}/{postEntity.RowKey} has obsolete data and is missing PhotosJson and Body with photos"); } } else { log.Warning($"Post {blogname}/{postEntity.RowKey} has obsolete data and is missing PhotosJson"); } }
public static string ModifyUrls(string sourceBlog, string body, PhotoIndexTableAdapter photoIndexTableAdapter, List <Photo> sitePhotos, out List <Model.Tumblr.Photo> extractedPhotos) { extractedPhotos = null; if (string.IsNullOrEmpty(body)) { return(null); } string decodedBody = body; if (body.StartsWith("\"")) { decodedBody = JsonConvert.DeserializeObject <string>(body); } HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(decodedBody); List <HtmlNode> imgNodes = htmlDoc.DocumentNode.Descendants("img").ToList(); bool hasPhotosNotFound = false; foreach (HtmlNode imgNode in imgNodes) { string url = imgNode.Attributes["src"].Value; string mappedUrl = TryToGetMappedUrl(url, sitePhotos, sourceBlog, photoIndexTableAdapter); if (mappedUrl != null) { imgNode.Attributes["src"].Value = mappedUrl; } else { hasPhotosNotFound = true; } } if (hasPhotosNotFound) { extractedPhotos = PostProcessor.ExctractPhotosFromHtml(htmlDoc); } StringWriter sw = new StringWriter(); htmlDoc.Save(sw); string result = sw.ToString(); return(result); }
public static async Task Run([QueueTrigger(Constants.PostsToProcessQueueName, Connection = "AzureWebJobsStorage")] string myQueueItem, TraceWriter log) { Startup.Init(); PostsToProcess postsToProcess = JsonConvert.DeserializeObject <PostsToProcess>(myQueueItem); PostProcessor postProcessor = new PostProcessor(); postProcessor.Init(log); try { await postProcessor.ProcessPosts(postsToProcess.Posts, log, postsToProcess.LikerBlogname); } catch (Exception ex) { log.Error("Error", ex); throw; } }