public static bool ShouldExcludeCategory(this FeedCfg feedCfg, FeedItem feedItem) { foreach (var catFilter in feedCfg.CategoryFilters) { switch (catFilter.Mode) { case Models.FilterMode.Exclude: if (feedItem.Categories?.Contains(catFilter.Category) ?? false) { return(true); } break; case Models.FilterMode.Include: if (feedItem.Categories?.Contains(catFilter.Category) ?? false) { return(false); } break; default: throw new NotImplementedException($"No such FilterMode {catFilter.Mode}"); } } return(false); }
private async Task <FeedData> DownloadFeedAsync(FeedCfg feedCfg) { try { var lastRefresh = DateTime.Now; var feed = await FeedReader.ReadAsync(feedCfg.InterpolateUrl()).ConfigureAwait(false); if (feed?.Items == null || feed.Items.Count <= 0) { return(null); } feedCfg.PendingRefreshDate = lastRefresh; var feedData = await DownloadFeedContentsAsync(new FeedData(feedCfg, feed)).ConfigureAwait(false); if (feedData == null) { return(null); } feedData.NewItems = feedData.NewItems .OrderByDescending(fi => fi.PublishingDate ?? DateTime.MinValue) .ToList(); return(feedData); } catch (Exception ex) { LogTo.Warning(ex, "Exception while reading feed {Name}", feedCfg.Name); } return(null); }
public static string InterpolateUrl(this FeedCfg feedCfg) { return(feedCfg.SourceUrl.Interpolate( ("now", DateTime.Now), ("lastPubDate", feedCfg.LastPubDate), ("lastRefreshDate", feedCfg.LastRefreshDate) )); }
private async Task <FeedItemExt> DownloadFeedContentAsync(FeedCfg feedCfg, FeedItem feedItem) { WebsiteCfg webCfg = null; try { if (feedItem.Link != null) { webCfg = WebsitesConfig.FindConfig(feedItem.Link); feedItem.Link = feedItem.MakeLink(webCfg); } // // Check & update publishing dates if (feedCfg.UsePubDate) { if (feedItem.PublishingDate == null) { LogTo.Warning("Date missing, or unknown format for feed {Name}, item title '{Title}', raw date '{PublishingDateString}'", feedCfg.Name, feedItem.Title, feedItem.PublishingDateString); return(null); } if (feedItem.PublishingDate <= feedCfg.LastPubDate) { return(null); } } // // Check guid if (feedCfg.UseGuid && feedCfg.EntriesGuid.Contains(feedItem.Id)) { return(null); } // // Check categories if (feedCfg.ShouldExcludeCategory(feedItem)) { return(null); } // // Download content or use inline content if (feedItem.Link != null) { var httpReq = webCfg?.CreateRequest( feedItem.Link, string.IsNullOrWhiteSpace(webCfg.Cookie) ? null : new FlurlClient() /*.Configure(s => s.CookiesEnabled = false)*/) ?? feedItem.Link.CreateRequest(); var httpResp = await httpReq.GetStringAsync().ConfigureAwait(false); if (httpResp != null) { feedItem.Content = httpResp; } else { feedItem.Content = null; LogTo.Warning("Failed to download content for feed {Name}, item title '{Title}', link '{Link}'.", feedCfg.Name, feedItem.Title, feedItem.Link); } } else { feedItem.Content ??= feedItem.Description; } if (string.IsNullOrWhiteSpace(feedItem.Content)) { return(null); } // // Process content if necessary & push back feedItem.Content = await webCfg.ProcessContent(feedItem.Content, feedItem.Link).ConfigureAwait(false); // Add feed item return(new FeedItemExt(feedItem, webCfg)); } catch (UriFormatException ex) { LogTo.Warning(ex, "Invalid content URI in feed {Name}, item title '{Title}', link '{Link}'.", feedCfg.Name, feedItem.Title, feedItem.Link); } catch (FlurlHttpException ex) { LogTo.Warning(ex, "Failed to download content for feed {Name}, item title '{Title}', link '{Link}'.", feedCfg.Name, feedItem.Title, feedItem.Link); } catch (Exception ex) { LogTo.Error(ex, "Exception while downloading content for feed {Name}, item title '{Title}', link '{Link}'", feedCfg.Name, feedItem.Title, feedItem.Link); } return(null); }
public static string MakeLink(this FeedItem feedItem, FeedCfg feedCfg) { return(feedItem.Link + (feedCfg.LinkParameter ?? string.Empty)); }
private static async Task <FeedItemExt> DownloadFeedContent(FeedCfg feedCfg, FeedItem feedItem, SemaphoreSlim throttler, HttpClient client) { try { await throttler.WaitAsync(); // // Check & update publishing dates if (feedCfg.UsePubDate) { if (feedItem.PublishingDate == null) { LogTo.Warning( $"Date missing, or unknown format for feed {feedCfg.Name}, item title '{feedItem.Title}', raw date '{feedItem.PublishingDateString}'"); return(null); } if (feedItem.PublishingDate <= feedCfg.LastPubDate) { return(null); } } // // Check guid if (feedCfg.UseGuid) { if (feedCfg.EntriesGuid.Contains(feedItem.Id)) { return(null); } } // // Check categories if (feedCfg.ShouldExcludeCategory(feedItem)) { return(null); } // // Download content or use inline content if (feedItem.Link != null) { var httpReq = new HttpRequestMessage(HttpMethod.Get, feedItem.MakeLink(feedCfg)); var httpResp = await client.SendAsync(httpReq); if (httpResp != null && httpResp.IsSuccessStatusCode) { feedItem.Content = await httpResp.Content.ReadAsStringAsync(); } else { feedItem.Content = null; LogTo.Warning( $"Failed to download content for feed {feedCfg.Name}, item title '{feedItem.Title}', link '{feedItem.MakeLink(feedCfg)}'. HTTP Status code : {httpResp?.StatusCode}"); } } else { feedItem.Content = feedItem.Content ?? feedItem.Description; } if (string.IsNullOrWhiteSpace(feedItem.Content)) { return(null); } // // Process content if necessary & push back if (feedCfg.Filters.Any()) { feedItem.Content = string.Join( "\r\n", feedCfg.Filters .Select(f => f.Filter(feedItem.Content)) .Where(s => string.IsNullOrWhiteSpace(s) == false) ); } if (feedItem.Link != null) { feedItem.Content = HtmlUtils.EnsureAbsoluteLinks(feedItem.Content, new Uri(feedItem.MakeLink(feedCfg))); } // Add feed item return(new FeedItemExt(feedItem)); } catch (Exception ex) { LogTo.Error(ex, $"Exception while downloading content for feed {feedCfg.Name}, item title '{feedItem.Title}', link '{feedItem.MakeLink(feedCfg)}'"); } finally { throttler.Release(); } return(null); }
/// <inheritdoc /> public FeedData(FeedCfg feedCfg, Feed feed) { FeedCfg = feedCfg; Feed = feed; }