protected void ucRssFeed_ItemDataBound(object sender, RssFeedItemEventArgs e) { if ((e.Item.ItemType == RssFeedItemType.Item) || (e.Item.ItemType == RssFeedItemType.AlternatingItem)) { RssFeedItem item = e.Item; switch (item.DataItem.Description) { case RSSConst.NO_UPDATES: DisplayErrorMessage(RSSConst.RSSFeedType.NO_UPDATES); break; case RSSConst.INCORRECT_FORMAT: DisplayErrorMessage(RSSConst.RSSFeedType.INCORRECT_FORMAT); break; case RSSConst.ORG_CONN_STR_NOT_FOUND: DisplayErrorMessage(RSSConst.RSSFeedType.ORG_CONN_STR_NOT_FOUND); break; case RSSConst.QUERY_STRING_PARAMETERS_NOT_DEFINED: DisplayErrorMessage(RSSConst.RSSFeedType.QUERY_STRING_PARAMETERS_NOT_DEFINED); break; case RSSConst.NO_LOG_ENTRIES: Image img = (Image)e.Item.FindControl("imgCategory"); if (img != null) { img.Visible = false; } break; } } }
public SimpleCraigslistListenerTest() { RssFeedItem item1 = new RssFeedItem() { Title = "test item 1", Description = "first item", Link = "phoenix.craigslist.org/1", PublishDate = new DateTime(2012, 01, 01) }; RssFeedItem item2 = new RssFeedItem() { Title = "test item 2", Description = "second item", Link = "phoenix.craigslist.org/2", PublishDate = new DateTime(2012, 02, 01) }; RssFeedItem item3 = new RssFeedItem() { Title = "test item 3", Description = "third item", Link = "phoenix.craigslist.org/3", PublishDate = new DateTime(2012, 03, 01) }; RssFeedItem item4 = new RssFeedItem() { Title = "test item 4", Description = "fourth item", Link = "phoenix.craigslist.org/4", PublishDate = new DateTime(2012, 04, 01) }; RssFeedItem item5 = new RssFeedItem() { Title = "test item 5", Description = "fifth item", Link = "phoenix.craigslist.org/5", PublishDate = new DateTime(2012, 05, 01) }; RssFeedItem item6 = new RssFeedItem() { Title = "test item 6", Description = "6 item", Link = "phoenix.craigslist.org/6", PublishDate = new DateTime(2012, 06, 01) }; RssFeedItem item7 = new RssFeedItem() { Title = "test item 7", Description = "7 item", Link = "phoenix.craigslist.org/7", PublishDate = new DateTime(2012, 07, 01) }; RssFeedItem item8 = new RssFeedItem() { Title = "test item 8", Description = "8 item", Link = "phoenix.craigslist.org/8", PublishDate = new DateTime(2012, 08, 01) }; RssFeedItem item9 = new RssFeedItem() { Title = "test item 9", Description = "9 item", Link = "phoenix.craigslist.org/9", PublishDate = new DateTime(2012, 09, 01) }; RssFeedItem item10 = new RssFeedItem() { Title = "test item 10", Description = "10 item", Link = "phoenix.craigslist.org/10", PublishDate = new DateTime(2012, 010, 01) }; preFeed.Add(item1); preFeed.Add(item2); preFeed.Add(item3); preFeed.Add(item4); preFeed.Add(item5); postFeed.Add(item1); postFeed.Add(item2); postFeed.Add(item3); postFeed.Add(item4); postFeed.Add(item6); }
private bool TryParseAndSave(RssFeed feed, RssFeedItem item) { var uri = new Uri(item.HtmlAttributes.GetValueOrDefault("Url") ?? item.FeedAttributes.Url); if (uri.AbsolutePath == "/" && string.IsNullOrEmpty(uri.Query)) { Log.Information("URI '{uri}' detected as a home page rather than an article, skipping parse operation", uri); return(false); } try { // Parse the downloaded file as dictated by the site parsing definitions _articleParser.Parse(item); } catch (Exception ex) { Log.Error(ex, "PARSE_ERROR: UrlHash '{urlHash}':'{url}'", item.FeedAttributes.UrlHash, item.FeedAttributes.Url); return(false); } try { _crawlerRepository.SaveDocument <RssFeedItem>(_crawlerCollectionName, item, feed.DatabaseRetentionDays, "", null, ""); return(true); } catch (Exception ex) { Log.Error(ex, "SAVE_ERROR: UrlHash '{urlHash}':'{url}'", item.FeedAttributes.UrlHash, item.FeedAttributes.Url); } return(false); }
private void OnItemSelected(RssFeedItem item) { if (item != null) { Device.OpenUri(item.Link); } }
private async Task <IEnumerable <RssFeedItem> > GetItemsFromFeed() { // Retrieve RSS feed var result = await RequestWithCookiesAndRetryAsync(RssFeedUri); // Parse as XML document var xmlDocument = new XmlDocument(); xmlDocument.LoadXml(result.ContentString); // Parse to RssFeedItems var xmlNodes = xmlDocument.GetElementsByTagName("item"); List <RssFeedItem> feedItems = new List <RssFeedItem>(); foreach (var n in xmlNodes) { var node = (XmlNode)n; if (RssFeedItem.TryParse(node, out RssFeedItem item)) { feedItems.Add(item); } else { logger.Warn($"Could not parse {DisplayName} RSS item '{node.InnerText}'"); } } return(feedItems); }
public void SendEmail() { RssFeedItem sampleItem = new RssFeedItem() { Title = "test item 6", Description = "6 item", Link = "phoenix.craigslist.org/6", PublishDate = new DateTime(2012, 06, 01) }; string subject = "Craigslist notification, a new post detected - " + sampleItem.Link; string body = "A new item has been detected!! <br><br> <a href=\"" + sampleItem.Link + "\">" + sampleItem.Title + "</a><br>" + sampleItem.Description + ""; PK.SimpleCraigslistListener.Utilities.Utilities.SendMail(subject, body); //SimpleCraigslistListener.Program.SendMail(sampleItem, "custom body"); }
public void CompareRssFeedItem() { RssFeedItem item6 = new RssFeedItem() { Title = "test item 6", Description = "6 item", Link = "phoenix.craigslist.org/6", PublishDate = new DateTime(2012, 06, 01) }; List<RssFeedItem> result = postFeed.Except(preFeed).ToList(); RssFeedItem resultDif = result.First(); Assert.AreEqual(item6.Link, resultDif.Link); Assert.AreEqual(item6.Description, resultDif.Description); Assert.AreEqual(item6.PublishDate, resultDif.PublishDate); }
public void RemovingHtmlCanHandleEmptyDescription() { // Setup var xDoc = XDocumentFactory.CreateSampleFeed(); var firstEl = xDoc.Descendants("item").First(); firstEl.Element("description").Value = string.Empty; // Act var rssFeedItem = new RssFeedItem(firstEl); // Verify Assert.AreEqual(string.Empty, rssFeedItem.Description); }
public void CanRemoveHtmlFromDescription() { // Setup var xDoc = XDocumentFactory.CreateSampleFeed(); var firstEl = xDoc.Descendants("item").First(); firstEl.Element("description").Value = "some element <b>no html</b>"; // Act var rssFeedItem = new RssFeedItem(firstEl); // Verify Assert.AreEqual("some element no html", rssFeedItem.Description); }
public void Parse(RssFeedItem item) { // Article failed to download for some reason, skip over meta data processing if (!File.Exists(item.FeedAttributes.FileName)) { Log.Debug("No file to parse, skipping metadata values for '{url}'", item.FeedAttributes.Url); return; } // Graphics file or PDF won't have og tags if (item.FeedAttributes.FileName.EndsWith(".png") || item.FeedAttributes.FileName.EndsWith(".jpg") || item.FeedAttributes.FileName.EndsWith(".gif") || item.FeedAttributes.FileName.EndsWith(".pdf")) { Log.Information("Binary file detected, skipping metadata values for '{url}'", item.FeedAttributes.Url); return; } Log.Debug("Parsing meta tags from file '{fileName}'", item.FeedAttributes.FileName); var doc = new HtmlDocument(); doc.Load(item.FeedAttributes.FileName); // Parse the meta data from the raw HTML document item.OpenGraphAttributes.Add(ParseOpenGraphAttributes(doc)); item.HtmlAttributes.Add(ParseHtmlAttributes(doc)); item.HostName = GetHostName(item); item.SiteName = GetSiteName(item); // Check if we have a site parser defined for the site name var definition = _definitionFactory.Get(item.SiteName); using (LogContext.PushProperty("siteName", item.SiteName)) { // Determine the named parser to use along with article and para selectors (string namedParser, string articleSelector, string paragraphSelector) = GetRouteMatchedTagParser(definition, GetRouteOnly(item)); // Resolve the named parameter using DI var parser = _container.ResolveNamed <ITagParser>(namedParser); parser.Initialize(doc.Text, item); // Parse the content to get the article text parser.PreParse(); item.HtmlAttributes.Add("ParserResult", parser.ParseTagsBySelector(articleSelector, paragraphSelector)); parser.PostParse(); } }
protected virtual string GetCanonicalUrl(RssFeedItem item) { // The best reference URL is usually from the OpenGraph tags, however they are NOT // always set to a full canonical URL (looking at you, frontpagemag.com) string url = item.OpenGraphAttributes.GetValueOrDefault("og:url") ?? ""; // If the URL doesn't have a protocol assigned (not canonical) fall back to the URL // we crawled (which also might be null) if (!url.StartsWith("http")) { url = item.HtmlAttributes.GetValueOrDefault("Url"); } // Last but not least, fall back to the URL we detected in the feed return(url ?? item.FeedAttributes.Url); }
public EraiRawsReleaseInfo(RssFeedItem feedItem) { Title = StripTitle(feedItem.Title); Quality = feedItem.Quality; Size = ReleaseInfo.GetBytes(feedItem.Size); DetailsLink = ParseDetailsLink(feedItem.Description); if (Uri.TryCreate(feedItem.Link, UriKind.Absolute, out Uri magnetUri)) { MagnetLink = magnetUri; } if (DateTimeOffset.TryParse(feedItem.PublishDate, out DateTimeOffset publishDate)) { PublishDate = publishDate; } }
public EraiRawsReleaseInfo(RssFeedItem feedItem) { var splitTitle = SplitQualityAndTitle(feedItem.Title); Quality = splitTitle.quality; Title = splitTitle.title; if (Uri.TryCreate(feedItem.Link, UriKind.Absolute, out Uri magnetUri)) { Link = magnetUri; } if (DateTimeOffset.TryParse(feedItem.PublishDate, out DateTimeOffset publishDate)) { PublishDate = publishDate; } }
public IActionResult Put(long id, [FromBody] RssFeedItem item) { var feed = _context.RssFeedItems.Find(id); if (feed == null) { return(NotFound()); } feed.Name = item.Name; feed.FeedUrl = item.FeedUrl; feed.SiteUrl = item.SiteUrl; _context.RssFeedItems.Update(feed); _context.SaveChanges(); return(NoContent()); }
public static bool TryParse(XmlNode rssItem, out RssFeedItem item) { var title = rssItem.SelectSingleNode("title")?.InnerText; var link = rssItem.SelectSingleNode("link")?.InnerText; var publishDate = rssItem.SelectSingleNode("pubDate")?.InnerText; if (string.IsNullOrWhiteSpace(title) || string.IsNullOrWhiteSpace(link) || string.IsNullOrWhiteSpace(publishDate)) { // One of the properties was empty so fail to parse item = null; return(false); } item = new RssFeedItem(title, link, publishDate); return(true); }
private string GetHostName(RssFeedItem item) { string url = item.OpenGraphAttributes.GetValueOrDefault("og:url") ?? ""; // Make sure the Url is complete if (!url.StartsWith("http")) { url = item.HtmlAttributes.GetValueOrDefault("Url") ?? item.FeedAttributes.Url; } if (!url.StartsWith("http")) { url = _webUtils.RepairUrl(url, item.FeedAttributes.Url); } Uri uri = new Uri(url); return(uri.GetComponents(UriComponents.Host, UriFormat.Unescaped).ToLower()); }
public static bool TryParse(XmlNamespaceManager nsm, XmlNode rssItem, out RssFeedItem item) { var title = rssItem.SelectSingleNode("title")?.InnerText; var link = rssItem.SelectSingleNode("link")?.InnerText; var publishDate = rssItem.SelectSingleNode("pubDate")?.InnerText; var size = rssItem.SelectSingleNode("erai:size", nsm)?.InnerText; var description = rssItem.SelectSingleNode("description")?.InnerText; var quality = rssItem.SelectSingleNode("erai:res", nsm)?.InnerText; item = new RssFeedItem { Title = title, Link = link, PublishDate = publishDate, Size = size, Description = description, Quality = quality }; return(item.IsValid()); }
private async Task <IEnumerable <RssFeedItem> > GetItemsFromFeed() { // Retrieve RSS feed var result = await RequestWithCookiesAndRetryAsync(RssFeedUri); if (result.IsRedirect) { await FollowIfRedirect(result); } // Parse as XML document var xmlDocument = new XmlDocument(); xmlDocument.LoadXml(result.ContentString); var nsm = new XmlNamespaceManager(xmlDocument.NameTable); nsm.AddNamespace("erai", "https://www.erai-raws.info/rss-page/"); // Parse to RssFeedItems var xmlNodes = xmlDocument.GetElementsByTagName("item"); List <RssFeedItem> feedItems = new List <RssFeedItem>(); foreach (var n in xmlNodes) { var node = (XmlNode)n; if (RssFeedItem.TryParse(nsm, node, out RssFeedItem item)) { feedItems.Add(item); } else { logger.Warn($"Could not parse {DisplayName} RSS item '{node.OuterXml}'"); } } return(feedItems); }
protected virtual void SetExtendedArticleMetaData(ExportFeedItem exportFeedItem, RssFeedItem item, string hostName) { // Extract the meta data from the Open Graph tags helpfully provided with almost every article string url = exportFeedItem.Url; exportFeedItem.Url = item.OpenGraphAttributes.GetValueOrDefault("og:url") ?? ""; // Make sure the Url is complete if (!exportFeedItem.Url.StartsWith("http")) { exportFeedItem.Url = item.HtmlAttributes.GetValueOrDefault("Url") ?? item.FeedAttributes.Url; } // Extract the meta data from the Open Graph tags exportFeedItem.ArticleText = item.HtmlAttributes.GetValueOrDefault("ParserResult") ?? ""; exportFeedItem.Subtitle = item.OpenGraphAttributes.GetValueOrDefault("og:title") ?? null; exportFeedItem.ImageUrl = item.OpenGraphAttributes.GetValueOrDefault("og:image") ?? null; exportFeedItem.SiteName = item.OpenGraphAttributes.GetValueOrDefault("og:site_name")?.ToLower() ?? ""; exportFeedItem.HostName = hostName; // Fixup apnews on populist press links which sometimes report incorrectly if (string.IsNullOrWhiteSpace(exportFeedItem.SiteName) || (exportFeedItem.SiteName == "ap news" && exportFeedItem.Url.Contains("populist.press"))) { exportFeedItem.SiteName = exportFeedItem.HostName; } // Fixup news.trust.org imageUrl links which have an embedded redirect if (string.IsNullOrWhiteSpace(exportFeedItem.ImageUrl) || (exportFeedItem.SiteName == "news.trust.org" && exportFeedItem.Url.Contains("news.trust.org"))) { exportFeedItem.ImageUrl = null; } // Remove the protocol portion if there is one, i.e. 'https://' if (exportFeedItem.SiteName.IndexOf('/') > 0) { exportFeedItem.SiteName = exportFeedItem.SiteName.Substring(exportFeedItem.SiteName.LastIndexOf('/') + 1); } }
/// <summary> /// Adds the article. /// </summary> /// <param name="savedArticleList">The saved article list.</param> /// <param name="savedFeedItem">The saved feed item.</param> public void AddArticle(ObservableCollection <SavedArticle> savedArticleList, RssFeedItem savedFeedItem) { if (savedArticleList.Count() != 0) { bool exist = savedArticleList.Any(c => c.Title == savedFeedItem.Item.Title.Text); if (exist) { MessageBox.Show(Resources.ERROR_SAVING_ARTICLE_MESSAGE, Resources.MESSAGEBOX_ALERT, MessageBoxButton.OK, MessageBoxImage.Information); } else { savedArticleList.Add(new SavedArticle(savedFeedItem.Item.Title.Text, savedFeedItem.Item.Links[0].Uri)); } } else { savedArticleList.Add(new SavedArticle(savedFeedItem.Item.Title.Text, savedFeedItem.Item.Links[0].Uri)); } _saveUtility.SaveArticlesToFile(savedArticleList); }
public static List<RssFeedItem> GetItemFeed(string url) { List<RssFeedItem> feed = new List<RssFeedItem>(); XmlDocument doc = new XmlDocument(); XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable); nsmgr.AddNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); nsmgr.AddNamespace("rss", "http://purl.org/rss/1.0/"); nsmgr.AddNamespace("dc", "http://purl.org/dc/elements/1.1/"); XmlTextReader reader = new XmlTextReader(url); doc.Load(reader); XmlNodeList nodes = doc.SelectNodes("/rdf:RDF//rss:item", nsmgr); foreach (XmlNode node in nodes) { XmlNodeList titleNode = node.SelectNodes("rss:title", nsmgr); XmlNodeList descNode = node.SelectNodes("rss:description", nsmgr); XmlNodeList linkNode = node.SelectNodes("rss:link", nsmgr); XmlNodeList dateNode = node.SelectNodes("dc:date", nsmgr); string title = titleNode.Count == 0 ? "" : titleNode[0].InnerText; string desc = descNode.Count == 0 ? "" : descNode[0].InnerText; string link = linkNode.Count == 0 ? "" : linkNode[0].InnerText; string date = dateNode.Count == 0 ? "" : dateNode[0].InnerText; RssFeedItem item = new RssFeedItem() { Description = desc, Title = title, Link = link, PublishDate = DateTime.Parse(date) }; feed.Add(item); } return feed; }
/// <summary> /// Parses the item. /// </summary> /// <param name="reader">The reader.</param> /// <returns></returns> public IFeedItem ParseItem(XmlReader reader) { IRssFeedItem item = new RssFeedItem(); reader.ReadToDescendant("item"); bool readContent = false; while (readContent || reader.Read()) { readContent = false; if (reader.NodeType == XmlNodeType.Element) { readContent = true; switch (reader.Name) { case "author": item.Author = reader.ReadElementContentAsString(); break; case "category": using (XmlReader subReader = reader.ReadSubtree()) { item.Category = ConvertToIRssCategory(subReader); } if (reader.IsEmptyElement) { readContent = false; } break; case "comments": item.Comments = CachedPropertiesProvider.ConvertToUri(reader.ReadElementContentAsString()); break; case "description": item.Description = reader.ReadElementContentAsString(); break; case "enclosure": using (XmlReader subReader = reader.ReadSubtree()) { item.Enclosure = ConvertToIRssEnclosure(subReader); } if (reader.IsEmptyElement) { readContent = false; } break; case "guid": using (XmlReader subReader = reader.ReadSubtree()) { item.Guid = ConvertToIRssGuid(subReader); } if (reader.IsEmptyElement) { readContent = false; } break; case "link": item.Link = CachedPropertiesProvider.ConvertToUri(reader.ReadElementContentAsString()); break; case "pubDate": item.PublicationDate = CachedPropertiesProvider.ConvertToTzDateTime(reader.ReadElementContentAsString()); break; case "source": using (XmlReader subReader = reader.ReadSubtree()) { item.Source = ConvertToIRssSource(subReader); } if (reader.IsEmptyElement) { readContent = false; } break; case "title": item.Title = reader.ReadElementContentAsString(); break; default: UnhandledElement(item, reader); break; } } } reader.Close(); return(item); }
public void Initialize(string sourceHtml, RssFeedItem item) { _sourceHtml = sourceHtml; _item = item; }
protected virtual void SetGraphicMetaData(RssFeedItem item, ExportFeedItem exportFeedItem) { exportFeedItem.ImageUrl = item.FeedAttributes.Url; exportFeedItem.HostName = item.HostName; exportFeedItem.SiteName = item.HostName; }
protected virtual void SetVideoMetaData(ExportFeedItem exportFeedItem, RssFeedItem item, string hostName) { // Extract the meta data from the Open Graph tags exportFeedItem.Subtitle = item.OpenGraphAttributes.GetValueOrDefault("og:title") ?? ""; exportFeedItem.ImageUrl = item.OpenGraphAttributes.GetValueOrDefault("og:image") ?? ""; exportFeedItem.SiteName = item.OpenGraphAttributes.GetValueOrDefault("og:site_name")?.ToLower() ?? ""; exportFeedItem.HostName = hostName; var description = item.OpenGraphAttributes.GetValueOrDefault("og:description") ?? ""; if (string.IsNullOrWhiteSpace(exportFeedItem.SiteName)) { exportFeedItem.SiteName = hostName; } if (item.SiteName == "rumble") { var text = item.HtmlAttributes.GetValueOrDefault("ParserResult") ?? ""; if (!text.StartsWith("<")) { Log.Debug("EXPORT: Processing rumble.com ld+json metadata"); // application/ld+json parser result var list = JsonConvert.DeserializeObject <List <JsonLdRumbleValues> >(text); foreach (var value in list) { if (string.IsNullOrWhiteSpace(value.embedUrl)) { continue; } exportFeedItem.VideoUrl = value.embedUrl; exportFeedItem.VideoHeight = int.TryParse(Convert.ToString(value.height), out int height) ? height : 0; exportFeedItem.VideoWidth = int.TryParse(Convert.ToString(value.width), out int width) ? width : 0; break; } } } else if (item.SiteName == "bitchute") { Log.Information("EXPORT: Processing bitchute.com metadata"); // Bitchute logic is a little convoluted, they don't provide much metadata var result = item.HtmlAttributes.GetValueOrDefault("ParserResult") ?? ""; var start = result.IndexOf("https://"); var length = result.IndexOf('\"', start) - start; exportFeedItem.VideoUrl = result.Substring(start, length); exportFeedItem.VideoHeight = 1080; exportFeedItem.VideoWidth = 1920; } else { Log.Debug("EXPORT: Processing open graph video metadata"); // Sites that provide video metadata via open graph tags exportFeedItem.VideoUrl = item.OpenGraphAttributes.GetValueOrDefault("og:video:secure_url") ?? item.OpenGraphAttributes.GetValueOrDefault("og:video:url") ?? item.OpenGraphAttributes.GetValueOrDefault("og:video") ?? item.OpenGraphAttributes.GetValueOrDefault("og:x:video") ?? ""; exportFeedItem.VideoHeight = int.TryParse(item.OpenGraphAttributes.GetValueOrDefault("og:video:height") ?? item.OpenGraphAttributes.GetValueOrDefault("og:x:video:height") ?? item.OpenGraphAttributes.GetValueOrDefault("og:image:height"), out int height) ? height : 0; exportFeedItem.VideoWidth = int.TryParse(item.OpenGraphAttributes.GetValueOrDefault("og:video:width") ?? item.OpenGraphAttributes.GetValueOrDefault("og:x:video:width") ?? item.OpenGraphAttributes.GetValueOrDefault("og:image:width"), out int width) ? width : 0; string result = item.HtmlAttributes.GetValueOrDefault("ParserResult") ?? ""; if (!string.IsNullOrEmpty(result)) { description = result; } } using (LogContext.PushProperty("hostName", hostName)) { Log.Information("Video URL: '{url}' ({height}x{width})", exportFeedItem.VideoUrl, exportFeedItem.VideoHeight, exportFeedItem.VideoWidth); } // There's no article text for most video sites, so just use the meta description exportFeedItem.ArticleText = $"<p>{description}</p>"; }
public static bool IsSameFeedItem(this RssFeedItem left, RssFeedItem right) { return(left.Guid == right.Guid); }
protected virtual void SetBasicArticleMetaData(ExportFeedItem exportFeedItem, RssFeedItem item, string hostName) { exportFeedItem.HostName = hostName; exportFeedItem.SiteName = hostName; exportFeedItem.ArticleText = $"<p>Unable to crawl article content. Click the link below to view in your browser.</p>"; }
public async Task AddItemAsync(RssFeedItem item) { await this._container.CreateItemAsync <RssFeedItem>(item, new PartitionKey(item.Id)); }
public async Task UpdateItemAsync(string id, RssFeedItem item) { await this._container.UpsertItemAsync <RssFeedItem>(item, new PartitionKey(id)); }
public void UpdateCell(RssFeedItem item) { Title.Text = item.Title; Description.Text = item.Description; }
public IActionResult Post([FromBody] RssFeedItem item) { _context.RssFeedItems.Add(item); _context.SaveChanges(); return(CreatedAtRoute("Get", new { id = item.Id }, item)); }
public ExportFeedItem FormatItem(RssFeedItem item, RssFeed feed) { // The UrlHash is a hash of the feed source, not the ultimate target URL. This is to avoid // over-crawling with link shortening services such as bit.ly and t.co. Once we detect a hash // has been crawled from the source, there is no need to crawl again. It means the hash does // not truly reflect the target URL, but that's ok as there are duplicate crawls across the // different feeds anyway. var exportFeedItem = new ExportFeedItem { Id = Guid.NewGuid().ToString(), FeedId = item.FeedAttributes.FeedId, Url = GetCanonicalUrl(item), UrlHash = item.FeedAttributes.UrlHash, DateAdded = item.FeedAttributes.DateAdded, LinkLocation = item.FeedAttributes.LinkLocation, Title = item.FeedAttributes.Title }; Uri uri = new Uri(exportFeedItem.Url); string hostName = uri.GetComponents(UriComponents.Host, UriFormat.Unescaped).ToLower(); var fileName = item.FeedAttributes.FileName ?? ""; if (fileName.EndsWith(".png") || fileName.EndsWith(".jpg") || fileName.EndsWith(".gif") || fileName.EndsWith(".pdf")) { SetGraphicMetaData(item, exportFeedItem); exportFeedItem.ArticleText = ApplyTemplateToDescription(exportFeedItem, feed, ExportTemplates.GraphicTemplate); return(exportFeedItem); } string videoUrl = item.OpenGraphAttributes.GetValueOrDefault("og:video:secure_url") ?? item.OpenGraphAttributes.GetValueOrDefault("og:video:url") ?? item.OpenGraphAttributes.GetValueOrDefault("og:video") ?? item.OpenGraphAttributes.GetValueOrDefault("og:x:video") ?? ""; // Some sites do not provide OpenGraph video tags so watch for those specifically string videoType = item.OpenGraphAttributes.GetValueOrDefault("og:video:type") ?? item.OpenGraphAttributes.GetValueOrDefault("og:x:video:type") ?? (videoUrl.EndsWith(".mp4") || item.SiteName == "bitchute" ? "video/mp4" : videoUrl.Contains("youtube.com") || item.SiteName == "rumble" ? "text/html" : ""); bool hasSupportedVideoFormat = (videoUrl.Length > 0 || item.SiteName == "rumble" || item.SiteName == "bitchute") && (videoType == "text/html" || videoType == "video/mp4" || videoType == "application/x-mpegURL"); if (hasSupportedVideoFormat) { Log.Debug("Applying video metadata values for '{hostname}'", hostName); SetVideoMetaData(exportFeedItem, item, hostName); if (exportFeedItem.VideoHeight > 0) { if (videoType == "video/mp4" || videoType == "application/x-mpegURL") { exportFeedItem.ArticleText = ApplyTemplateToDescription(exportFeedItem, feed, ExportTemplates.Mp4VideoTemplate); } else { exportFeedItem.ArticleText = ApplyTemplateToDescription(exportFeedItem, feed, ExportTemplates.HtmlVideoTemplate); } } else { exportFeedItem.ArticleText = ApplyTemplateToDescription(exportFeedItem, feed, ExportTemplates.ExtendedTemplate); } } else { var result = item.HtmlAttributes.GetValueOrDefault("ParserResult") ?? ""; if (string.IsNullOrEmpty(result)) { Log.Debug("No parsed result, applying basic metadata values for '{hostname}'", hostName); // Article failed to download, display minimal basic meta data SetBasicArticleMetaData(exportFeedItem, item, hostName); exportFeedItem.ArticleText = ApplyTemplateToDescription(exportFeedItem, feed, ExportTemplates.BasicTemplate); } else { Log.Debug("Applying extended metadata values for '{hostname}'", hostName); SetExtendedArticleMetaData(exportFeedItem, item, hostName); exportFeedItem.ArticleText = ApplyTemplateToDescription(exportFeedItem, feed, ExportTemplates.ExtendedTemplate); } } return(exportFeedItem); }
/// <summary> /// Saves the articles. /// </summary> /// <param name="savedArticleList">The saved article list.</param> /// <param name="feedItem">The feed item.</param> public void SaveArticles(ObservableCollection <SavedArticle> savedArticleList, RssFeedItem feedItem) { _savedFeedItemManager.AddArticle(savedArticleList, feedItem); }
private async Task SendRssItem(UserDetails user, RssFeedItem item, bool isMuted) { const string ReutersLink = "http://feeds.reuters.com/"; if (Configuration.USE_EMAIL_NOTIFICATION) { var text = item.Description; // handicapt mode for ruters var origin = item.Origin .Replace("meduza.io", "meduza") .Replace("feeds.reuters.com", "reuters") .Replace("feeds.feedburner.com", "feedburner") .Replace("www.rte.ie", "RTE") .Replace("www.xkcd.com", "xkcd"); var message = $@" <html> <header></header> <body> <a href=""{item.Link}""><h2>{item.Title}</h2></a> <b>Publication date: {item.PublicationDate:yyyy-MMM-dd HH:mm}</b> <br/> <br/> {item.Description} <br/> <br/> <a href=""{item.Link}"">Read More</a> </body> </html> "; await MailClient.SendMail( Configuration.NOTIFICATION_EMAIL_DST, $"{origin}: {item.Title}", body : message, isHtmlFormatted : true); } else if (!item.Link.ToLower().Contains(ReutersLink)) { var message = $"[{item.Title}]({item.Link})"; await API.SendMessage( user.ChatId.ToString(), message, disable_notification : isMuted?(bool?)true : null, disable_web_page_preview : false, parse_mode : "Markdown" ); } else { var text = item.Description.Length > 200 ? item.Description.Substring(0, 200) : item.Description; // handicapt mode for ruters var message = $"*{item.Title}*\n\n_{text}_\n\n[Read More]({item.Link})"; await API.SendMessage( user.ChatId.ToString(), message, disable_notification : isMuted?(bool?)true : null, disable_web_page_preview : true, parse_mode : "Markdown" ); } }