public void FormatSampleFeed() { var feed = new Rss10Feed { Channel = new Rss10Channel { Title = "This is my feed title", Description = "Description of my feed", TextInput = new Rss10TextInput { Name = "TextInput", Title = "My text input", Description = "Description of my text input", Link = "https://example.org/my-text-input", About = "https://example.org/my-text-input", }, About = "https://example.org/channel", Link = "https://example.org/channel", Image = new Rss10Image { Title = "My channel image", Link = "https://example.org/channel", Url = "https://example.org/image.png", About = "https://example.org/image.png", }, Items = new List <Rss10Item> { new Rss10Item { Title = "My awesome article", Description = "My awesome article description...", Link = "https://example.org/article", About = "https://example.org/article", }, }, }, }; var tryFormatResult = Rss10FeedFormatter.TryFormatRss10Feed(feed, out var document); Assert.True(tryFormatResult); var targetEncoding = Encoding.UTF8; var xmlWriterSettings = new XmlWriterSettings { Encoding = targetEncoding, Indent = true, }; using (var memoryStream = new MemoryStream()) using (var streamWriter = new StreamWriter(memoryStream, targetEncoding)) using (var xmlWriter = XmlWriter.Create(streamWriter, xmlWriterSettings)) { document.WriteTo(xmlWriter); xmlWriter.Flush(); var xmlString = targetEncoding.GetString(memoryStream.ToArray()); Assert.NotEmpty(xmlString); } }
public void ParseAndFormat(SampleFeed embeddedDocument) { // arrange var document1 = embeddedDocument.XDocument; // action var tryParseResult = Rss10FeedParser.TryParseRss10Feed(document1, out var feed); Assert.True(tryParseResult); var tryFormatResult = Rss10FeedFormatter.TryFormatRss10Feed(feed, out var document2); Assert.True(tryFormatResult); var xmlWriterSettings = new XmlWriterSettings { Indent = true }; var xmlStringBuilder1 = new StringBuilder(); var xmlStringBuilder2 = new StringBuilder(); using (var xmlWriter1 = XmlWriter.Create(xmlStringBuilder1, xmlWriterSettings)) using (var xmlWriter2 = XmlWriter.Create(xmlStringBuilder2, xmlWriterSettings)) { document1.WriteTo(xmlWriter1); document2.WriteTo(xmlWriter2); xmlWriter1.Flush(); xmlWriter2.Flush(); // assert var xmlString1 = xmlStringBuilder1.ToString(); var xmlString2 = xmlStringBuilder2.ToString(); Assert.Equal(xmlString1, xmlString2); } }
public void FormatSampleFeedEmpty() { var feed = new Rss10Feed { Channel = new Rss10Channel { Items = new List <Rss10Item> { new Rss10Item(), }, }, }; var tryFormatResult = Rss10FeedFormatter.TryFormatRss10Feed(feed, out var document); Assert.True(tryFormatResult); var targetEncoding = Encoding.UTF8; var xmlWriterSettings = new XmlWriterSettings { Encoding = targetEncoding, Indent = true, }; using (var memoryStream = new MemoryStream()) using (var streamWriter = new StreamWriter(memoryStream, targetEncoding)) using (var xmlWriter = XmlWriter.Create(streamWriter, xmlWriterSettings)) { document.WriteTo(xmlWriter); xmlWriter.Flush(); var xmlString = targetEncoding.GetString(memoryStream.ToArray()); Assert.NotEmpty(xmlString); } }
public static IObservable <SyndicationFeed> GetSydicationObservable(this WebClient client) { var o = Observable.FromEvent <OpenReadCompletedEventHandler, OpenReadCompletedEventArgs>( h => (sender, e) => h(e), h => client.OpenReadCompleted += h, h => client.OpenReadCompleted -= h) .Select(e => { if (e.Error != null) { throw e.Error; } if (formatter == null) { formatter = new Rss10FeedFormatter(); } formatter.ReadFrom(XmlReader.Create(e.Result)); var feed = formatter.Feed; return(feed); } ); return(o.Take(1)); }
public static bool IsXmlFeed(this XmlReader xmlReader) { try { SyndicationFeed.Load(xmlReader); return(true); } catch { var rss10 = new Rss10FeedFormatter(); return(rss10.CanRead(xmlReader)); } }
public Feed GetRemoteFeed(Feed lFeed, int timeout = 0) { string url = lFeed.Url; string siteUrl = lFeed.SiteUrl; var remoteFeed = new Feed { Url = url }; SyndicationFeed rss = null; XmlReader reader = null; var encoding = GetEncoding(url, siteUrl); try { var str = string.Empty; using (var wc = new WebClient()) { remoteFeed.Encoding = encoding; wc.Headers["User-Agent"] = "www.rssheap.com"; wc.Encoding = encoding; str = wc.DownloadString(url); } str = str.Replace("media:thumbnail", "media"); //mashable fix reader = new MyXmlReader(new StringReader(str), new XmlReaderSettings { //MaxCharactersInDocument can be used to control the maximum amount of data //read from the reader and helps prevent OutOfMemoryException MaxCharactersInDocument = 1024 * 64 * 64 * 64, DtdProcessing = DtdProcessing.Parse }); rss = SyndicationFeed.Load(reader); } catch { try { var rss10ff = new Rss10FeedFormatter(); rss10ff.ReadFrom(reader); rss = rss10ff.Feed; } catch { return(null); } } if (rss == null || rss.Items == null) { return(null); } string authorName = (rss.Authors != null && rss.Authors.Count > 0) ? rss.Authors.First().Name ?? rss.Authors.First().Email : string.Empty; remoteFeed.Author = authorName; string title = rss.Title != null ? rss.Title.Text : string.Empty; remoteFeed.Name = title; string description = rss.Description != null ? rss.Description.Text : string.Empty; remoteFeed.Description = description; //get the site url if (rss.Links != null) { var siteLink = rss.Links.FirstOrDefault(l => l.RelationshipType == "alternate"); if (siteLink != null) { try { remoteFeed.SiteUrl = siteLink.Uri.AbsoluteUri; } catch { } } if (remoteFeed.SiteUrl.IsNullOrEmpty() && rss.Links.Count > 0) { try { remoteFeed.SiteUrl = rss.Links.First().Uri.AbsoluteUri; } catch { } } } if (remoteFeed.SiteUrl.IsNullOrEmpty()) { try { remoteFeed.SiteUrl = new Uri(rss.Id).AbsoluteUri; } catch { } } if (remoteFeed.SiteUrl.IsNullOrEmpty() && !siteUrl.IsNullOrEmpty()) { remoteFeed.SiteUrl = siteUrl; //find canonical var html = string.Empty; using (var wc = new WebClient()) { wc.Encoding = encoding; html = wc.DownloadString(siteUrl); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); var canonical = htmlDocument.DocumentNode.SelectNodes("//link") .Where(s => s.GetAttributeValue("rel", false) && s.GetAttributeValue("href", false) && s.GetAttributeValue("rel", string.Empty) == "canonical") .Select(s => s.GetAttributeValue("href", string.Empty)) .FirstOrDefault(c => !c.IsNullOrEmpty()); if (!canonical.IsNullOrEmpty()) { if (canonical != "/") { remoteFeed.SiteUrl = canonical; } else { remoteFeed.SiteUrl = siteUrl; } } } } if (remoteFeed.SiteUrl.IsNullOrEmpty()) { try { remoteFeed.SiteUrl = new Uri(url).GetLeftPart(UriPartial.Authority); } catch { } } foreach (var item in rss.Items) { try { var uri = item.Links[0].Uri; string itemUrl = string.Empty; if (uri.IsAbsoluteUri) { if (item.Links.Any(t => t.MediaType.IsNullOrEmpty())) { itemUrl = item.Links.First(t => t.MediaType.IsNullOrEmpty()).Uri.AbsoluteUri; } else { itemUrl = item.Links[0].Uri.AbsoluteUri; } } else { itemUrl = new Uri(new Uri(remoteFeed.SiteUrl), uri.ToString()).AbsoluteUri; } if (itemUrl.IsNullOrEmpty()) { continue; } string itemBody = string.Empty; Article feedItem = new Article(); itemBody = item.Summary != null ? item.Summary.Text : string.Empty; if (item.ElementExtensions != null) { string bodyTemp = item.ElementExtensions .ReadElementExtensions <string>("encoded", "http://purl.org/rss/1.0/modules/content/") .FirstOrDefault(); if (bodyTemp != null) { itemBody = bodyTemp; } } if (itemBody.IsNullOrEmpty()) { if (item.Content != null) { if (item.Content is TextSyndicationContent textContent) { itemBody = textContent.Text; } } } feedItem.Name = item.Title != null ? item.Title.Text : string.Empty; if (feedItem.Name.IsNullOrEmpty()) { feedItem.Name = itemUrl; if (itemUrl.IndexOf("/") > 0) { var lastIndex = itemUrl.LastIndexOf("/") + 1; var strippedUrl = itemUrl.Substring(lastIndex, itemUrl.Length - lastIndex).Replace("-", " "); if (strippedUrl.IndexOf("-") > 0) { feedItem.Name = strippedUrl; } } } if (!feedItem.Name.IsNullOrEmpty()) { feedItem.Name = feedItem.Name.Trim(); } if (item.Authors.Count > 0) { remoteFeed.Author = item.Authors[0].Name ?? item.Authors[0].Email; } feedItem.Url = itemUrl; //if it is feedburner get the url from <feedburner:origLink> var elemExt = item.ElementExtensions.FirstOrDefault(e => e.OuterName == "origLink"); if (elemExt != null) { feedItem.Url = elemExt.GetObject <XElement>().Value; } feedItem.Body = itemBody; feedItem.Published = item.PublishDate.DateTime != DateTime.MinValue ? item.PublishDate.DateTime : item.LastUpdatedTime.DateTime; if (feedItem.Published == DateTime.MinValue) { try { var date = item.ElementExtensions.FirstOrDefault(e => e.OuterName.ToLower().Contains("date")).GetObject <XElement>().Value; feedItem.Published = DateTime.Parse(date); } catch { } } if (feedItem.Published == DateTime.MinValue) { try { var blogPubDate = rss.ElementExtensions.FirstOrDefault(e => e.OuterName.ToLower().Contains("date")).GetObject <XElement>().Value; feedItem.Published = DateTime.Parse(blogPubDate); } catch { if (rss.LastUpdatedTime.Date > DateTime.MinValue) { feedItem.Published = rss.LastUpdatedTime.Date; } } } if (remoteFeed.SiteUrl.Contains("echojs.com") && feedItem.Published == DateTime.MinValue) { feedItem.Published = DateTime.Now; } if (feedItem.Published == DateTime.MinValue && lFeed.Public) { feedItem.Published = DateTime.Now; } remoteFeed.Articles.Add(feedItem); } catch (Exception ex) { new LogService().InsertError(ex.ToString(), "GetRemoteRss FeedUrl: " + url); } } return(remoteFeed); }
/// <summary> /// Supports RSS 1, 2 and ATOM 1.0 feed standards /// </summary> /// <param name="url"></param> /// <param name="timeout"></param> /// <returns></returns> internal SyndicationFeed GetFeed(Uri url, int timeout, NetworkCredential credentials) { SyndicationFeed feed = null; HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); request.UserAgent = Configuration.UserAgent; request.Timeout = timeout; if (credentials != null) { request.Credentials = credentials; request.PreAuthenticate = true; } try { using (WebResponse response = request.GetResponse()) { XmlSanitizingStream stream = new XmlSanitizingStream(response.GetResponseStream()); var xml = stream.ReadToEnd(); using (RssXmlReader reader = new RssXmlReader(Flush(xml))) { if (Atom03FeedFormatter.CanReadFrom(reader)) { var aff = new Atom03FeedFormatter(); aff.ReadFrom(reader); feed = aff.Feed; } else if (Rss10FeedFormatter.CanReadFrom(reader)) { // RSS 1.0 var rff = new Rss10FeedFormatter(); rff.ReadFrom(reader); feed = rff.Feed; } else { // RSS 2.0 or Atom 1.0 try { feed = CustomSyndicationFeed.Load(reader); } catch (XmlException ex) { logger.Error(ex); throw; } } } } } catch (WebException ex) { logger.Error(ex); } catch (SocketException ex) { logger.Error(ex); } catch (IOException ex) { logger.Error(ex); } return(feed); }