public void FormatSampleFeed()
        {
            var feed = new Rss10Feed
            {
                Channel = new Rss10Channel
                {
                    Title       = "This is my feed title",
                    Description = "Description of my feed",
                    TextInput   = new Rss10TextInput
                    {
                        Name        = "TextInput",
                        Title       = "My text input",
                        Description = "Description of my text input",
                        Link        = "https://example.org/my-text-input",
                        About       = "https://example.org/my-text-input",
                    },
                    About = "https://example.org/channel",
                    Link  = "https://example.org/channel",
                    Image = new Rss10Image
                    {
                        Title = "My channel image",
                        Link  = "https://example.org/channel",
                        Url   = "https://example.org/image.png",
                        About = "https://example.org/image.png",
                    },
                    Items = new List <Rss10Item>
                    {
                        new Rss10Item
                        {
                            Title       = "My awesome article",
                            Description = "My awesome article description...",
                            Link        = "https://example.org/article",
                            About       = "https://example.org/article",
                        },
                    },
                },
            };

            var tryFormatResult = Rss10FeedFormatter.TryFormatRss10Feed(feed, out var document);

            Assert.True(tryFormatResult);

            var targetEncoding    = Encoding.UTF8;
            var xmlWriterSettings = new XmlWriterSettings
            {
                Encoding = targetEncoding,
                Indent   = true,
            };

            using (var memoryStream = new MemoryStream())
                using (var streamWriter = new StreamWriter(memoryStream, targetEncoding))
                    using (var xmlWriter = XmlWriter.Create(streamWriter, xmlWriterSettings))
                    {
                        document.WriteTo(xmlWriter);
                        xmlWriter.Flush();

                        var xmlString = targetEncoding.GetString(memoryStream.ToArray());
                        Assert.NotEmpty(xmlString);
                    }
        }
        public void ParseAndFormat(SampleFeed embeddedDocument)
        {
            // arrange
            var document1 = embeddedDocument.XDocument;

            // action
            var tryParseResult = Rss10FeedParser.TryParseRss10Feed(document1, out var feed);

            Assert.True(tryParseResult);

            var tryFormatResult = Rss10FeedFormatter.TryFormatRss10Feed(feed, out var document2);

            Assert.True(tryFormatResult);

            var xmlWriterSettings = new XmlWriterSettings {
                Indent = true
            };
            var xmlStringBuilder1 = new StringBuilder();
            var xmlStringBuilder2 = new StringBuilder();

            using (var xmlWriter1 = XmlWriter.Create(xmlStringBuilder1, xmlWriterSettings))
                using (var xmlWriter2 = XmlWriter.Create(xmlStringBuilder2, xmlWriterSettings))
                {
                    document1.WriteTo(xmlWriter1);
                    document2.WriteTo(xmlWriter2);
                    xmlWriter1.Flush();
                    xmlWriter2.Flush();

                    // assert
                    var xmlString1 = xmlStringBuilder1.ToString();
                    var xmlString2 = xmlStringBuilder2.ToString();
                    Assert.Equal(xmlString1, xmlString2);
                }
        }
        public void FormatSampleFeedEmpty()
        {
            var feed = new Rss10Feed
            {
                Channel = new Rss10Channel
                {
                    Items = new List <Rss10Item>
                    {
                        new Rss10Item(),
                    },
                },
            };

            var tryFormatResult = Rss10FeedFormatter.TryFormatRss10Feed(feed, out var document);

            Assert.True(tryFormatResult);

            var targetEncoding    = Encoding.UTF8;
            var xmlWriterSettings = new XmlWriterSettings
            {
                Encoding = targetEncoding,
                Indent   = true,
            };

            using (var memoryStream = new MemoryStream())
                using (var streamWriter = new StreamWriter(memoryStream, targetEncoding))
                    using (var xmlWriter = XmlWriter.Create(streamWriter, xmlWriterSettings))
                    {
                        document.WriteTo(xmlWriter);
                        xmlWriter.Flush();

                        var xmlString = targetEncoding.GetString(memoryStream.ToArray());
                        Assert.NotEmpty(xmlString);
                    }
        }
        public static IObservable <SyndicationFeed> GetSydicationObservable(this WebClient client)
        {
            var o = Observable.FromEvent <OpenReadCompletedEventHandler, OpenReadCompletedEventArgs>(
                h => (sender, e) => h(e),
                h => client.OpenReadCompleted += h,
                h => client.OpenReadCompleted -= h)
                    .Select(e =>
            {
                if (e.Error != null)
                {
                    throw e.Error;
                }

                if (formatter == null)
                {
                    formatter = new Rss10FeedFormatter();
                }
                formatter.ReadFrom(XmlReader.Create(e.Result));
                var feed = formatter.Feed;
                return(feed);
            }
                            );

            return(o.Take(1));
        }
Пример #5
0
 public static bool IsXmlFeed(this XmlReader xmlReader)
 {
     try
     {
         SyndicationFeed.Load(xmlReader);
         return(true);
     }
     catch
     {
         var rss10 = new Rss10FeedFormatter();
         return(rss10.CanRead(xmlReader));
     }
 }
Пример #6
0
        public Feed GetRemoteFeed(Feed lFeed, int timeout = 0)
        {
            string url     = lFeed.Url;
            string siteUrl = lFeed.SiteUrl;

            var remoteFeed = new Feed
            {
                Url = url
            };

            SyndicationFeed rss      = null;
            XmlReader       reader   = null;
            var             encoding = GetEncoding(url, siteUrl);

            try
            {
                var str = string.Empty;
                using (var wc = new WebClient())
                {
                    remoteFeed.Encoding      = encoding;
                    wc.Headers["User-Agent"] = "www.rssheap.com";
                    wc.Encoding = encoding;
                    str         = wc.DownloadString(url);
                }

                str = str.Replace("media:thumbnail", "media");  //mashable fix

                reader = new MyXmlReader(new StringReader(str), new XmlReaderSettings
                {
                    //MaxCharactersInDocument can be used to control the maximum amount of data
                    //read from the reader and helps prevent OutOfMemoryException
                    MaxCharactersInDocument = 1024 * 64 * 64 * 64,
                    DtdProcessing           = DtdProcessing.Parse
                });

                rss = SyndicationFeed.Load(reader);
            }
            catch
            {
                try
                {
                    var rss10ff = new Rss10FeedFormatter();
                    rss10ff.ReadFrom(reader);
                    rss = rss10ff.Feed;
                }
                catch
                {
                    return(null);
                }
            }

            if (rss == null || rss.Items == null)
            {
                return(null);
            }

            string authorName = (rss.Authors != null && rss.Authors.Count > 0) ? rss.Authors.First().Name ?? rss.Authors.First().Email : string.Empty;

            remoteFeed.Author = authorName;

            string title = rss.Title != null ? rss.Title.Text : string.Empty;

            remoteFeed.Name = title;

            string description = rss.Description != null ? rss.Description.Text : string.Empty;

            remoteFeed.Description = description;

            //get the site url
            if (rss.Links != null)
            {
                var siteLink = rss.Links.FirstOrDefault(l => l.RelationshipType == "alternate");
                if (siteLink != null)
                {
                    try
                    {
                        remoteFeed.SiteUrl = siteLink.Uri.AbsoluteUri;
                    }
                    catch { }
                }
                if (remoteFeed.SiteUrl.IsNullOrEmpty() && rss.Links.Count > 0)
                {
                    try
                    {
                        remoteFeed.SiteUrl = rss.Links.First().Uri.AbsoluteUri;
                    }
                    catch { }
                }
            }

            if (remoteFeed.SiteUrl.IsNullOrEmpty())
            {
                try
                {
                    remoteFeed.SiteUrl = new Uri(rss.Id).AbsoluteUri;
                }
                catch { }
            }

            if (remoteFeed.SiteUrl.IsNullOrEmpty() && !siteUrl.IsNullOrEmpty())
            {
                remoteFeed.SiteUrl = siteUrl;

                //find canonical
                var html = string.Empty;
                using (var wc = new WebClient())
                {
                    wc.Encoding = encoding;
                    html        = wc.DownloadString(siteUrl);

                    var htmlDocument = new HtmlDocument();
                    htmlDocument.LoadHtml(html);
                    var canonical = htmlDocument.DocumentNode.SelectNodes("//link")
                                    .Where(s => s.GetAttributeValue("rel", false) &&
                                           s.GetAttributeValue("href", false) &&
                                           s.GetAttributeValue("rel", string.Empty) == "canonical")
                                    .Select(s => s.GetAttributeValue("href", string.Empty))
                                    .FirstOrDefault(c => !c.IsNullOrEmpty());

                    if (!canonical.IsNullOrEmpty())
                    {
                        if (canonical != "/")
                        {
                            remoteFeed.SiteUrl = canonical;
                        }
                        else
                        {
                            remoteFeed.SiteUrl = siteUrl;
                        }
                    }
                }
            }

            if (remoteFeed.SiteUrl.IsNullOrEmpty())
            {
                try
                {
                    remoteFeed.SiteUrl = new Uri(url).GetLeftPart(UriPartial.Authority);
                }
                catch
                {
                }
            }

            foreach (var item in rss.Items)
            {
                try
                {
                    var    uri     = item.Links[0].Uri;
                    string itemUrl = string.Empty;
                    if (uri.IsAbsoluteUri)
                    {
                        if (item.Links.Any(t => t.MediaType.IsNullOrEmpty()))
                        {
                            itemUrl = item.Links.First(t => t.MediaType.IsNullOrEmpty()).Uri.AbsoluteUri;
                        }
                        else
                        {
                            itemUrl = item.Links[0].Uri.AbsoluteUri;
                        }
                    }
                    else
                    {
                        itemUrl = new Uri(new Uri(remoteFeed.SiteUrl), uri.ToString()).AbsoluteUri;
                    }

                    if (itemUrl.IsNullOrEmpty())
                    {
                        continue;
                    }

                    string itemBody = string.Empty;

                    Article feedItem = new Article();
                    itemBody = item.Summary != null ? item.Summary.Text : string.Empty;

                    if (item.ElementExtensions != null)
                    {
                        string bodyTemp = item.ElementExtensions
                                          .ReadElementExtensions <string>("encoded", "http://purl.org/rss/1.0/modules/content/")
                                          .FirstOrDefault();
                        if (bodyTemp != null)
                        {
                            itemBody = bodyTemp;
                        }
                    }

                    if (itemBody.IsNullOrEmpty())
                    {
                        if (item.Content != null)
                        {
                            if (item.Content is TextSyndicationContent textContent)
                            {
                                itemBody = textContent.Text;
                            }
                        }
                    }

                    feedItem.Name = item.Title != null ? item.Title.Text : string.Empty;
                    if (feedItem.Name.IsNullOrEmpty())
                    {
                        feedItem.Name = itemUrl;
                        if (itemUrl.IndexOf("/") > 0)
                        {
                            var lastIndex   = itemUrl.LastIndexOf("/") + 1;
                            var strippedUrl = itemUrl.Substring(lastIndex, itemUrl.Length - lastIndex).Replace("-", " ");
                            if (strippedUrl.IndexOf("-") > 0)
                            {
                                feedItem.Name = strippedUrl;
                            }
                        }
                    }

                    if (!feedItem.Name.IsNullOrEmpty())
                    {
                        feedItem.Name = feedItem.Name.Trim();
                    }

                    if (item.Authors.Count > 0)
                    {
                        remoteFeed.Author = item.Authors[0].Name ?? item.Authors[0].Email;
                    }
                    feedItem.Url = itemUrl;

                    //if it is feedburner get the url from <feedburner:origLink>
                    var elemExt = item.ElementExtensions.FirstOrDefault(e => e.OuterName == "origLink");
                    if (elemExt != null)
                    {
                        feedItem.Url = elemExt.GetObject <XElement>().Value;
                    }

                    feedItem.Body      = itemBody;
                    feedItem.Published = item.PublishDate.DateTime != DateTime.MinValue ? item.PublishDate.DateTime : item.LastUpdatedTime.DateTime;
                    if (feedItem.Published == DateTime.MinValue)
                    {
                        try
                        {
                            var date = item.ElementExtensions.FirstOrDefault(e => e.OuterName.ToLower().Contains("date")).GetObject <XElement>().Value;
                            feedItem.Published = DateTime.Parse(date);
                        }
                        catch { }
                    }

                    if (feedItem.Published == DateTime.MinValue)
                    {
                        try
                        {
                            var blogPubDate = rss.ElementExtensions.FirstOrDefault(e => e.OuterName.ToLower().Contains("date")).GetObject <XElement>().Value;
                            feedItem.Published = DateTime.Parse(blogPubDate);
                        }
                        catch
                        {
                            if (rss.LastUpdatedTime.Date > DateTime.MinValue)
                            {
                                feedItem.Published = rss.LastUpdatedTime.Date;
                            }
                        }
                    }

                    if (remoteFeed.SiteUrl.Contains("echojs.com") && feedItem.Published == DateTime.MinValue)
                    {
                        feedItem.Published = DateTime.Now;
                    }

                    if (feedItem.Published == DateTime.MinValue && lFeed.Public)
                    {
                        feedItem.Published = DateTime.Now;
                    }

                    remoteFeed.Articles.Add(feedItem);
                }
                catch (Exception ex)
                {
                    new LogService().InsertError(ex.ToString(), "GetRemoteRss FeedUrl: " + url);
                }
            }
            return(remoteFeed);
        }
Пример #7
0
        /// <summary>
        /// Supports RSS 1, 2 and ATOM 1.0 feed standards
        /// </summary>
        /// <param name="url"></param>
        /// <param name="timeout"></param>
        /// <returns></returns>
        internal SyndicationFeed GetFeed(Uri url, int timeout, NetworkCredential credentials)
        {
            SyndicationFeed feed    = null;
            HttpWebRequest  request = (HttpWebRequest)WebRequest.Create(url);

            request.UserAgent = Configuration.UserAgent;
            request.Timeout   = timeout;
            if (credentials != null)
            {
                request.Credentials     = credentials;
                request.PreAuthenticate = true;
            }

            try
            {
                using (WebResponse response = request.GetResponse())
                {
                    XmlSanitizingStream stream = new XmlSanitizingStream(response.GetResponseStream());
                    var xml = stream.ReadToEnd();
                    using (RssXmlReader reader = new RssXmlReader(Flush(xml)))
                    {
                        if (Atom03FeedFormatter.CanReadFrom(reader))
                        {
                            var aff = new Atom03FeedFormatter();
                            aff.ReadFrom(reader);
                            feed = aff.Feed;
                        }
                        else if (Rss10FeedFormatter.CanReadFrom(reader))
                        {
                            // RSS 1.0
                            var rff = new Rss10FeedFormatter();
                            rff.ReadFrom(reader);
                            feed = rff.Feed;
                        }
                        else
                        {
                            // RSS 2.0 or Atom 1.0
                            try
                            {
                                feed = CustomSyndicationFeed.Load(reader);
                            }
                            catch (XmlException ex)
                            {
                                logger.Error(ex);
                                throw;
                            }
                        }
                    }
                }
            }
            catch (WebException ex)
            {
                logger.Error(ex);
            }
            catch (SocketException ex)
            {
                logger.Error(ex);
            }
            catch (IOException ex)
            {
                logger.Error(ex);
            }
            return(feed);
        }