public async Task <ThumbnailResponse> FindImageAsync(Uri uri) { try { HttpResponseMessage response = await Policies.HttpPolicy.ExecuteAsync( _ => client.GetAsync(uri), new Dictionary <string, object> { { "uri", uri } }); using (response) { if (!response.IsSuccessStatusCode) { Log.FindThumbnailServerError(uri, response); return(null); } string mediaType = response.Content.Headers.ContentType?.MediaType ?? ""; if (mediaType.Contains("image")) { var iu = new ImageUrl { Uri = uri, Kind = "Direct" }; ThumbnailResponse result = await FetchThumbnailAsync(iu, uri, skipChecks : true); if (result != null) { ThumbnailLog.LogThumbnail(uri, uri, iu.Kind, result, "Best"); } return(result); } if (mediaType.Contains("html")) { using (Stream stream = await response.Content.ReadAsStreamAsync()) { var parser = new HtmlParser(); IHtmlDocument document = await parser.ParseDocumentAsync(stream); return(await FindThumbnailInSoupAsync("item_document", uri, document)); } } } } catch (TaskCanceledException) { Log.FindThumbnailTimeout(uri); } catch (HttpRequestException hre) { Log.FindThumbnailNetworkError(uri, hre); } catch (WebException we) { Log.FindThumbnailNetworkError(uri, we); } return(null); }
public static void LogThumbnail( Uri referrer, Uri thumbnailUri, string kind, ThumbnailResponse image, string judgement, Exception exception = null) { int? width = null; int? height = null; float?aspectRatio = null; float?area = null; if (image != null) { width = image.OriginalWidth; height = image.OriginalHeight; aspectRatio = (float)Math.Max(image.OriginalWidth, image.OriginalHeight) / (float)Math.Min(image.OriginalWidth, image.OriginalHeight); area = image.OriginalWidth * image.OriginalHeight; } Logger.Information( exception, "{Referrer}: Image {Thumbnail} ({Kind}): {Judgement} ({Width}x{Height} / Ratio: {AspectRatio} / Area: {Area})", referrer.AbsoluteUri, thumbnailUri.AbsoluteUri, kind, judgement, width, height, aspectRatio, area); }
async Task <ThumbnailResponse> FetchThumbnailAsync(ImageUrl imageUrl, Uri referrer, bool skipChecks = false) { try { // N.B.: We put the whole bit of cache logic in here because somebody might succeed or fail altogether // while we wait on retries, and we want to re-check the cache on every loop. return(await Policies.HttpPolicy.ExecuteAsync(async _ => { object cachedObject = imageCache.Get(imageUrl.Uri.AbsoluteUri); if (cachedObject is string) { Log.ThumbnailErrorCacheHit(referrer, imageUrl.Uri, cachedObject); return null; } if (cachedObject is ThumbnailResponse) { Log.ThumbnailSuccessCacheHit(referrer, imageUrl.Uri); return (ThumbnailResponse)cachedObject; } ThumbnailResponse response = await this.thumbnailServiceClient.GetThumbnail( imageUrl.Uri, skipChecks: skipChecks, referrer: referrer ); if (response.Error != null) { ThumbnailLog.LogThumbnail(referrer, imageUrl.Uri, imageUrl.Kind, null, response.ErrorString()); CacheError(imageUrl, response.ErrorString()); return null; } return CacheSuccess(imageUrl, response); }, new Dictionary <string, object> { { "uri", imageUrl.Uri } })); } catch (TaskCanceledException tce) { ThumbnailLog.LogThumbnail(referrer, imageUrl.Uri, imageUrl.Kind, null, "Timeout"); CacheError(imageUrl, tce.Message); return(null); } catch (HttpRequestException hre) { ThumbnailLog.LogThumbnail(referrer, imageUrl.Uri, imageUrl.Kind, null, "NetworkError", hre); CacheError(imageUrl, hre.Message); return(null); } catch (WebException we) { ThumbnailLog.LogThumbnail(referrer, imageUrl.Uri, imageUrl.Kind, null, "NetworkError", we); CacheError(imageUrl, we.Message); return(null); } }
async Task <Item> GetItemThumbnailAsync(Uri baseUri, Item item) { Uri itemLink = SyndicationUtil.Rebase(item.Link, baseUri); ThumbnailResponse sourceImage = null; // We might already have found a thumbnail... if (item.Thumbnail != null) { Uri baseUrl = itemLink ?? baseUri; Uri thumbnailUrl = MakeThumbnailUrl(baseUrl, item.Thumbnail.Url); if (thumbnailUrl != null) { sourceImage = await FetchThumbnailAsync( new ImageUrl { Kind = "EmbeddedThumb", Uri = thumbnailUrl }, baseUrl, skipChecks : true); if (sourceImage != null) { ThumbnailLog.LogThumbnail(baseUrl, thumbnailUrl, "EmbeddedThumb", sourceImage, "Best"); } } } // Look in the item soup; maybe we have it? string[] soup_ids = new string[] { "content", "description", "summary", }; XElement[] soups = new XElement[] { item.Content, item.Description, item.Summary }; for (int i = 0; i < soups.Length && sourceImage == null; i++) { XElement xe = soups[i]; if (xe != null) { Uri soupBase = SyndicationUtil.TryParseAbsoluteUrl(xe.BaseUri, baseUri) ?? itemLink ?? baseUri; sourceImage = await FindThumbnailInSoupAsync( soup_ids[i], soupBase, SoupFromElement(soups[i]) ); } } if (sourceImage == null && itemLink != null) { sourceImage = await FindImageAsync(itemLink); } if (sourceImage == null) { return(item); } return(item.With( thumbnail: new Thumbnail( sourceImage.ThumbnailUrl, sourceImage.ThumbnailWidth, sourceImage.ThumbnailHeight ) )); }
static ThumbnailResponse CacheSuccess(ImageUrl imageUrl, ThumbnailResponse image) { return(imageCache.Set(imageUrl.Uri.AbsoluteUri, image, SuccessCacheLifetime)); }
async Task <ThumbnailResponse> FindThumbnailInSoupAsync(string soup, Uri baseUrl, IHtmlDocument document) { // These get preferential treatment; if we find them then great otherwise we have to search the whole doc. // (Note that they also still have to pass the URL filter.) ImageUrl easyUri = SyndicationUtil.ConcatSequence( ExtractOpenGraphImageUrls(baseUrl, document), ExtractTwitterImageUrls(baseUrl, document), ExtractLinkRelImageUrls(baseUrl, document), ExtractKnownGoodnessImageUrls(baseUrl, document) ).FirstOrDefault(); if (easyUri != null) { ThumbnailResponse easyResponse = await FetchThumbnailAsync(easyUri, baseUrl, skipChecks : true); ThumbnailLog.LogThumbnail(baseUrl, easyUri.Uri, easyUri.Kind, easyResponse, "Best"); return(easyResponse); } IEnumerable <Uri> distinctSrc = (from element in document.GetElementsByTagName("img") let src = MakeThumbnailUrl(baseUrl, element.Attributes["src"]?.Value) where src != null select src).Distinct(); ImageUrl[] imageUrls = (from src in distinctSrc select new ImageUrl { Uri = src, Kind = "ImgTag" }).ToArray(); Stopwatch loadTimer = Stopwatch.StartNew(); Log.BeginGetThumbsFromSoup(soup, baseUrl, imageUrls.Length); var potentialThumbnails = new Task <ThumbnailResponse> [imageUrls.Length]; for (int i = 0; i < potentialThumbnails.Length; i++) { potentialThumbnails[i] = FetchThumbnailAsync(imageUrls[i], baseUrl); } ThumbnailResponse[] images = await Task.WhenAll(potentialThumbnails); Log.EndGetThumbsFromSoup(soup, baseUrl, imageUrls.Length, loadTimer); ImageUrl bestImageUrl = null; ThumbnailResponse bestImage = null; float bestArea = 0; for (int i = 0; i < images.Length; i++) { ImageUrl imageUrl = imageUrls[i]; ThumbnailResponse image = images[i]; if (image == null) { continue; } // It was invalid. int width = image.OriginalWidth; int height = image.OriginalHeight; float area = width * height; if (area < 5000) { ThumbnailLog.LogThumbnail(baseUrl, imageUrl.Uri, imageUrl.Kind, image, "Small"); CacheError(imageUrl, "Too Small"); continue; } float ratio = (float)Math.Max(width, height) / (float)Math.Min(width, height); if (ratio > 2.25f) { ThumbnailLog.LogThumbnail(baseUrl, imageUrl.Uri, imageUrl.Kind, image, "Oblong"); CacheError(imageUrl, "Too Oblong"); continue; } if (imageUrl.Uri.AbsolutePath.Contains("sprite")) { area /= 10; } // Penalize images named "sprite" if (area > bestArea) { if (bestImageUrl != null) { CacheError(bestImageUrl, "Not the best"); ThumbnailLog.LogThumbnail(baseUrl, bestImageUrl.Uri, bestImageUrl.Kind, bestImage, "NotBest"); } bestArea = area; bestImage = image; bestImageUrl = imageUrls[i]; } else { ThumbnailLog.LogThumbnail(baseUrl, imageUrl.Uri, imageUrl.Kind, image, "NotBest"); } } if (bestImage != null) { ThumbnailLog.LogThumbnail(baseUrl, bestImageUrl.Uri, bestImageUrl.Kind, bestImage, "Best"); Log.FoundThumbnail(soup, baseUrl, bestImageUrl.Uri, bestImageUrl.Kind); } else { Log.NoThumbnailFound(soup, baseUrl); } return(bestImage); }