Пример #1
0
        public static async void ParseRSS()
        {
            string feed = null;

            using (var client = new HttpClient())
            {
                try
                {
                    feed = await client.GetStringAsync("https://news.yandex.ru/internet.rss");
                }
                catch { }
            }

            if (feed != null)
            {
                var parser = new RssParser();
                var rss    = parser.Parse(feed);

                foreach (var element in rss)
                {
                    Console.WriteLine($"Заголовок: {element.Title}");
                    Console.WriteLine($"Текст: {element.Summary}");
                    Console.WriteLine($"Ссылка: {element.FeedUrl}");
                }
            }
        }
Пример #2
0
        public async Task <List <Article> > GetArticles(string endpoint)
        {
            string feed = null;

            using (var client = new HttpClient())
            {
                var feedResponse = await client.GetAsync(endpoint);

                if (!feedResponse.IsSuccessStatusCode)
                {
                    throw new Exception("Feed unavailable");
                }

                feed = await feedResponse.Content.ReadAsStringAsync();
            }

            var parser = new RssParser();
            var rss    = parser.Parse(feed);

            var articles = new List <Article>();

            foreach (var element in rss)
            {
                var article = new Article(element.Title, element.Summary, element.ImageUrl, element.PublishDate);
                articles.Add(article);
            }

            return(articles);
        }
Пример #3
0
        private async Task <List <Article> > ScrapeArticles()
        {
            var articles = new List <Article>();

            var feed = await httpHelper.Get(FeedUrl);

            if (string.IsNullOrEmpty(feed))
            {
                throw new Exception($"Could not get feed from url {FeedUrl}");
            }

            var parser = new RssParser();
            var rss    = parser.Parse(feed);
            var newestArticlesSchemas = rss.OrderByDescending(x => x.PublishDate).Take(10);

            foreach (var schema in newestArticlesSchemas)
            {
                var document = await httpHelper.GetDocumentFromUrl(schema.InternalID);

                articles.Add(new Article
                {
                    Source    = NewsSource.PriznajemHr,
                    Guid      = GetGuidFromUrl(schema.InternalID),
                    Title     = schema.Title,
                    Image     = GetArticleImage(document),
                    Text      = await httpHelper.GetArticleText(schema.Content),
                    Summary   = await httpHelper.GetFirstParagraph(schema.Content),
                    Keywords  = keywordHelper.GetKeywordsFromTitle(schema.Title),
                    SourceUrl = schema.InternalID
                });
            }

            return(articles);
        }
Пример #4
0
        public void Parse_WithChannelData_CorrectObjectReturns()
        {
            var fx = new Fixture();

            var title               = fx.Create <string>();
            var link                = fx.Create <Uri>();
            var description         = fx.Create <string>();
            var lastBuildDateString = "Fri, 16 Mar 2018 20:20:20 +0000";
            var lastBuildDate       = new DateTimeOffset(2018, 3, 16, 20, 20, 20, TimeSpan.FromHours(0));

            var rssXml = new RssXmlBuilder()
                         .WithTitle(title)
                         .WithLink(link.ToString())
                         .WithDescription(description)
                         .WithLastBuildDate(lastBuildDateString)
                         .Build();

            var parser  = new RssParser();
            var rssFeed = parser.Parse(rssXml);

            Assert.Equal(title, rssFeed.Title);
            Assert.Equal(link, rssFeed.Link);
            Assert.Equal(description, rssFeed.Description);
            Assert.Equal(lastBuildDate, rssFeed.LastBuildDate);
        }
Пример #5
0
        public async Task <IEnumerable <RssSchema> > Parse(string url)
        {
            string feed = null;

            using (var client = new HttpClient())
            {
                try
                {
                    feed = await client.GetStringAsync(url);
                }
                catch
                {
                    // ignored
                }
            }

            if (feed == null)
            {
                return(new List <RssSchema>());
            }

            var parser = new RssParser();
            var rss    = parser.Parse(feed);

            return(rss);
        }
Пример #6
0
        public void Parse_WithItem_RssFeedItemWithCorrectDataReturns()
        {
            //Given
            var fx = new Fixture();

            var title         = fx.Create <string>();
            var link          = fx.Create <Uri>();
            var description   = fx.Create <string>();
            var pubDateString = "Sun, 18 Mar 2018 16:48:17 +0000";
            var pubDate       = new DateTimeOffset(2018, 3, 18, 16, 48, 17, TimeSpan.FromHours(0));

            var item = new RssItemXmlBuilder()
                       .WithTitle(title)
                       .WithDescription(description)
                       .WithLink(link.ToString())
                       .WithPubDate(pubDateString)
                       .Build();

            var rssXml = new RssXmlBuilder()
                         .WithItems(new [] { item })
                         .Build();

            //When
            var parser  = new RssParser();
            var rssFeed = parser.Parse(rssXml);

            //Then
            Assert.Equal(1, rssFeed.Items?.Length);
            Assert.Equal(title, rssFeed.Items?[0].Title);
            Assert.Equal(description, rssFeed.Items?[0].Description);
            Assert.Equal(pubDate, rssFeed.Items?[0].PubDate);
            Assert.Equal(link, rssFeed.Items?[0].Link);
        }
        static async Task Main(string[] args)
        {
            Console.WriteLine("Haetaan uutisotsikkoja YLE:ltä...");

            HttpClient   client    = new HttpClient();
            const string YleRSSUrl = "https://feeds.yle.fi/uutiset/v1/majorHeadlines/YLE_UUTISET.rss";
            string       rss       = await client.GetStringAsync(YleRSSUrl);

            RssParser parser             = new RssParser();
            IEnumerable <RssSchema> feed = parser.Parse(rss);

            int lkm = 0;

            foreach (RssSchema element in feed)
            {
                Console.WriteLine($"Title: {element.Title}");
                // Console.WriteLine($"Summary: {element.Summary}");

                lkm++;
                if (lkm > 10)
                {
                    break;
                }
            }
        }
        public async void ParseRSS()
        {
            string feed = null;
            RSSFeed.Clear();

            using (var client = new HttpClient())
            {
                try
                {
                    feed = await client.GetStringAsync(Url);
                }
                catch
                {
                }
            }

            if (feed != null)
            {
                var parser = new RssParser();
                var rss = parser.Parse(feed);

                foreach (var element in rss)
                {
                    RSSFeed.Add(element);
                }
            }
        }
Пример #9
0
        public async Task <List <RSSResult> > GetNewsLinks(DateTime lastCheck)
        {
            logger.LogInformation("Started feed acq");
            async Task <List <RSSResult> > GetFeed(string url)
            {
                var feed = await httpClient.GetStringAsync(url);

                if (string.IsNullOrWhiteSpace(feed))
                {
                    throw new HttpRequestException("Empty feed");
                }
                var rss = rssParser.Parse(feed);

                return(rss.Where(f => f.PublishDate > lastCheck).Select(f => new RSSResult(new Uri(f.FeedUrl), new DateTimeOffset(f.PublishDate))).ToList());
            }

            var warming = GetFeed(GLOBAL_WARMING_RSS);
            var climate = GetFeed(CLIMATE_RSS);
            var feeds   = await Task.WhenAll(warming, climate);

            var res = feeds.SelectMany(f => f).Distinct().ToList();

            logger.LogInformation("Feed count {Count}", res.Count);
            return(res);
        }
Пример #10
0
 public void ParseFeed_NotRss()
 {
     using (var stream = File.OpenRead(GetExamplesWith("Sample.xml")))
     {
         Assert.That(RssParser.Parse(stream), Is.Null);
     }
 }
Пример #11
0
 public int ParseFeed_Content(string filename)
 {
     using (var stream = File.OpenRead(GetExamplesWith(filename)))
     {
         return(RssParser.Parse(stream).Items.First().Content.Length);
     }
 }
Пример #12
0
        /// <summary>
        /// 从URL获取解析后的Item的信息
        /// </summary>
        /// <param name="url">地址</param>
        /// <returns></returns>
        public static async Task <List <RssSchema> > GetSchemaFromUrl(string url, bool isLimit = false)
        {
            string feed = null;

            try
            {
                feed = await GetTextFromUrl(url, isLimit);
            }
            catch (Exception)
            {
            }
            var list = new List <RssSchema>();

            if (!string.IsNullOrEmpty(feed))
            {
                try
                {
                    var parser = new RssParser();
                    var rss    = parser.Parse(feed);
                    foreach (var item in rss)
                    {
                        list.Add(item);
                    }
                }
                catch (Exception ex)
                {
                    Debug.WriteLine(ex.Message);
                }
            }
            return(list);
        }
Пример #13
0
        public static IEnumerable <RssSchema> ParseRSS(string feedContent)
        {
            RssParser parser            = new RssParser();
            IEnumerable <RssSchema> rss = parser.Parse(feedContent);

            return(rss);
        }
Пример #14
0
        private async Task <List <Article> > ScrapeArticles()
        {
            var articles = new List <Article>();

            var feed = await httpHelper.Get(FeedUrl);

            if (string.IsNullOrEmpty(feed))
            {
                throw new NullReferenceException($"Could not get feed from url {FeedUrl}");
            }

            var parser = new RssParser();
            var rss    = parser.Parse(feed);
            var newestArticlesSchemas = rss.OrderByDescending(x => x.PublishDate).Take(10);

            foreach (var schema in newestArticlesSchemas)
            {
                if (schema.Categories.Contains("Vijesti"))
                {
                    articles.Add(new Article
                    {
                        Source    = NewsSource.IndexHr,
                        Guid      = GetGuidFromUrl(schema.InternalID),
                        Title     = schema.Title,
                        Image     = schema.ImageUrl,
                        Text      = await GetArticleText(schema.InternalID),
                        Summary   = schema.Summary,
                        Keywords  = keywordHelper.GetKeywordsFromTitle(schema.Title),
                        SourceUrl = schema.InternalID
                    });
                }
            }

            return(articles);
        }
Пример #15
0
        public async static Task <List <NewsInformation> > GetFilteredNewsAsync()
        {
            List <NewsInformation> results = new List <NewsInformation>();

            var client = new HttpClient();
            var feed   = await client.GetStringAsync("https://www.dr.dk/nyheder/service/feeds/allenyheder/");

            var parser     = new RssParser();
            var newsResult = parser.Parse(feed);

            results = (from item in newsResult
                       select new NewsInformation()
            {
                Title = item.Title,
                Description = item.Summary,
                CreatedDate = item.PublishDate,
                //ImageUrl = item.
                ImageUrl = @"https://is1-ssl.mzstatic.com/image/thumb/Purple124/v4/29/55/2e/29552ea2-5952-af7a-f398-89d177968258/AppIcon-0-0-1x_U007emarketing-0-0-0-7-0-0-85-220.png/600x600wa.png"
            }).ToList();

            // var filteredResult = results.Where(w => w.Description.Contains("corona") || w.Title.Contains("Covid-19"))
            var filteredResult = results.Where(w => (w.Title.IndexOf("Corona", StringComparison.OrdinalIgnoreCase) != -1 ||
                                                     (w.Title.IndexOf("covid-19", StringComparison.OrdinalIgnoreCase) != -1)))
                                 .OrderBy(w => w.CreatedDate)
                                 .Take(12).ToList();

            return(filteredResult);
            //return results.Where(w => !string.IsNullOrEmpty(w.ImageUrl)).Take(10).ToList();
        }
Пример #16
0
        /*
         * Gets a single feed from the given url
         */
        private async void getFeed(String feedUrl)
        {
            string rssString = null;

            try
            {
                rssString = await ServerConfig.Instance.client.GetStringAsync(feedUrl);
            }
            catch (HttpRequestException e)
            {
                Console.WriteLine("RSS FEED ERROR: " + e.InnerException.Message);
                return;
            }

            foreach (RssSchema element in rssParser.Parse(rssString))
            {
                // If the item does not include an image, use a default one
                if (element.ExtraImageUrl == null)
                {
                    element.ExtraImageUrl = "http://csse-s302g3.canterbury.ac.nz/donationIcon.png";
                }
                if (element.ImageUrl == null)
                {
                    element.ImageUrl = element.ExtraImageUrl;
                }
                rss.Add(element);
            }
            rss.OrderByDescending(r => r.PublishDate);
        }
Пример #17
0
        public static async Task <IEnumerable <RssSchema> > FetchAsync(string feedUrl)
        {
            string feed = null;

            using (var client = new HttpClient())
            {
                try
                {
                    feed = await client.GetStringAsync(feedUrl);
                }
                catch
                {
                    // 这里暂不处理异常。
                }
            }

            if (feed != null)
            {
                var parser = new RssParser();
                var rss    = parser.Parse(feed);
                return(rss);
            }

            return(Enumerable.Empty <RssSchema>());
        }
Пример #18
0
 public virtual async Task <IEnumerable <RssSchema> > Read()
 {
     return(await Task.Run(() =>
     {
         IEnumerable <RssSchema> rss = null;
         for (var i = 0; i < Source.Length; i++)
         {
             try
             {
                 var data = Utils.HttpGET(Source[i]);
                 if (!string.IsNullOrEmpty(data))
                 {
                     var parser = new RssParser();
                     rss = parser.Parse(data);
                     break;
                 }
             }
             catch (Exception ex)
             {
                 ConsoleLog.Error("Feed", ex.GetFormatString());
                 ConsoleLog.Error("Feed", "Target Url: ".CreateStringBuilder()
                                  .AppendLine(Source[i]).Append(ConsoleLog.ErrorLogBuilder(ex, true)).ToString());
             }
         }
         return rss;
     }));
 }
        /// <summary>
        /// Gets the feed
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        private static async Task <IEnumerable <Feed> > ParseRss(string url)
        {
            string feed     = null;
            var    feedList = new List <Feed>();

            try
            {
                using (var client = new HttpClient(new HttpClientHandler
                {
                    AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate
                }))
                {
                    ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls12;

                    // Get the XML
                    var res = await client.GetAsync(url);

                    if (res.IsSuccessStatusCode)
                    {
                        feed = await res.Content.ReadAsStringAsync();

                        feed = feed.Replace("\r\n", string.Empty).Trim();
                        // Check XML is Valid
                        if (IsValidXml(feed))
                        {
                            if (feed != null)
                            {
                                var parser = new RssParser();
                                var rss    = parser.Parse(feed);

                                foreach (var element in rss)
                                {
                                    feedList.Add(new Feed()
                                    {
                                        Author        = element.Author,
                                        Categories    = element.Categories,
                                        Content       = element.Content,
                                        ExtraImageUrl = element.ExtraImageUrl,
                                        FeedUrl       = element.FeedUrl,
                                        ImageUrl      = element.ImageUrl,
                                        InternalID    = element.InternalID,
                                        MediaUrl      = element.MediaUrl,
                                        PublishDate   = element.PublishDate,
                                        Summary       = element.Summary,
                                        Title         = element.Title
                                    });
                                }
                            }
                        }
                    }
                }

                return(feedList);
            }
            catch
            {
                return(null);
            }
        }
        public async Task <RssSchema[]> GetRss(string url)
        {
            var feed = await _client.GetStringAsync(url);

            var rss = _parser.Parse(feed);

            return(rss.OrderByDescending(schema => schema.PublishDate).ToArray());
        }
Пример #21
0
 public void Strip_Summary()
 {
     using (var stream = File.OpenRead(GetExamplesWith("SampleRss20-03.xml")))
     {
         var dest = RssParser.Parse(stream);
         Assert.That(dest.Items[0].Summary, Is.EqualTo("この画像はテスト画像です。"));
     }
 }
Пример #22
0
        public void Parse_WithNullInput_ThrowsArgumentNullException()
        {
            var parser = new RssParser();

            Action act = () => parser.Parse(null);

            Assert.Throws <ArgumentNullException>(act);
        }
Пример #23
0
        public void Parse_WithEmptyInput_ThrowsArgumentException()
        {
            var parser = new RssParser();

            Action act = () => parser.Parse(string.Empty);

            Assert.Throws <ArgumentException>(act);
        }
Пример #24
0
        private void DownloadCompleted(object sender, DownloadStringCompletedEventArgs e)
        {
            UIApplication.SharedApplication.BeginInvokeOnMainThread(() => {
                UIApplication.SharedApplication.NetworkActivityIndicatorVisible = false;

                mAlert.DismissWithClickedButtonIndex(0, true);

                if (e.Error != null)
                {
                    DisplayError("Warning", "The rss feed could not be downloaded: " + e.Error.Message);
                }
                else
                {
                    try
                    {
                        //clear the selected items
                        this.GridView.ClearSelectedItems();

                        //Clear the rows
                        mDatasource.ClearRows();
                        mDatasource.Apps.Clear();

                        foreach (var v in RssParser.Parse(e.Result))
                        {
                            mDatasource.Apps.Add(v);
                        }

                        mAlert = new UIAlertView("Fetching Icons...", "", null, null, null);
                        mAlert.Show();
                        UIApplication.SharedApplication.NetworkActivityIndicatorVisible = true;

                        Task.Run(() => {
                            foreach (var anApp in mDatasource.Apps)
                            {
                                byte[] data = null;

                                using (var c = new GzipWebClient())
                                {
                                    data = c.DownloadData(anApp.ImageUrl);
                                }


                                anApp.Image = UIImage.LoadFromData(NSData.FromArray(data));
                            }
                        }).ContinueWith(prevTask => {
                            mAlert.DismissWithClickedButtonIndex(0, true);
                            UIApplication.SharedApplication.NetworkActivityIndicatorVisible = false;

                            GridView.ReloadData();
                        }, CancellationToken.None, TaskContinuationOptions.OnlyOnRanToCompletion, TaskScheduler.FromCurrentSynchronizationContext());
                    }
                    catch
                    {
                        DisplayError("Warning", "Malformed Xml was found in the Rss Feed.");
                    }
                }
            });
        }
Пример #25
0
 public async Task <IEnumerable <RssSchema> > Read()
 {
     return(await Task.Run(() =>
     {
         var data = Utils.HttpGET(Source);
         var parser = new RssParser();
         return parser.Parse(data);
     }));
 }
        bool GetRssNews(int id)
        {
            WeiboUser user = null;

            try
            {
                if (RssFeedsQueue.Count == 0)
                {
                    return(false);
                }

                user = RssFeedsQueue.Dequeue();
                if (user == null)
                {
                    return(false);
                }
            }
            catch (Exception ex)
            {
                _logger.LogError($"Thread[{id}] error.", ex);
                return(false);
            }

            try
            {
                IList <News> newsDatas = RssParser.Parse(user.RssUrl);
                foreach (var news in newsDatas)
                {
                    try
                    {
                        if (_rssDataService.GetByLink(news.Link) != null)
                        {
                            continue;
                        }

                        news.City      = user.City;
                        news.Country   = user.Country;
                        news.Province  = user.Province;
                        news.Publisher = user.Name;
                        news.Source    = "微博";
                        news.Status    = NewsStatus.Unconfirmed;
                        _rssDataService.Create(news);
                    }
                    catch (Exception ex)
                    {
                        _logger.LogError($"Thread[{id}] mongo error.", ex);
                        continue;
                    }
                }
            }
            catch (Exception ex)
            {
                _logger.LogError($"Thread[{id}] rss error.", ex);
            }

            return(true);
        }
Пример #27
0
        public static async Task <IEnumerable <RssSchema> > FetchAndParseRSSAsync(Uri uri)
        {
            string feedContent = await HttpGetStringAsync(uri);

            RssParser parser            = new RssParser();
            IEnumerable <RssSchema> rss = parser.Parse(feedContent);

            return(rss);
        }
Пример #28
0
 public void ParseFeed_Categories()
 {
     using (var stream = File.OpenRead(GetExamplesWith("SampleRss20-01.xml")))
     {
         var feed = RssParser.Parse(stream);
         var item = feed.Items[0];
         Assert.That(item.Categories.Count, Is.EqualTo(0));
     }
 }
Пример #29
0
        /// <summary>
        /// 抓取新闻
        /// </summary>
        /// <returns></returns>
        private NewsBody[] CrawleNews()
        {
            LogManager.WriteLine("Crawle news rss...");

            List <NewsHeader> headers = new List <NewsHeader>();

            for (int i = 0; i < ConfigManager.Config.Rss.Length; i++)
            {
                string xmlUrl = ConfigManager.Config.Rss[i][0];

                if (xmlUrl.StartsWith("#"))//使用#暂时屏蔽订阅
                {
                    continue;
                }

                string rssClass = ConfigManager.Config.Rss[i][1];
                try
                {
                    string xml = Client.GET(xmlUrl);
                    headers.AddRange(RssParser.Parse(xml, rssClass));
                }
                catch (Exception ex)
                {
                    LogManager.ShowException(ex, "Cannot get " + xmlUrl);
                }
            }


            LogManager.WriteLine("Crawle news body...");
            int count = 0;

            List <NewsBody> bodyList = new List <NewsBody>();

            foreach (NewsHeader header in headers)
            {
                count++;
                if (count % 5 == 0)//每爬去5个新闻提示一次
                {
                    LogManager.WriteLine(string.Format("<{0}> items done...", count));
                }
                try
                {
                    NewsBody body = HeaderParser.Parse(header);

                    bodyList.Add(body);
                }
                catch (Exception ex)
                {
                    LogManager.ShowException(ex);
                }
            }

            return(bodyList.ToArray());
        }
Пример #30
0
        /// <summary>
        /// 从URL获取解析后的文章的信息
        /// </summary>
        /// <param name="url">地址</param>
        /// <returns></returns>
        public static async Task <List <RssSchema> > GetFeedsFromUrl(string url, bool isLimit = false, Action <List <RssSchema> > Success = null)
        {
            string feed = null;

            var client = GetClient(url);

            if (isLimit)
            {
                client.Timeout = TimeSpan.FromSeconds(20);
            }
            try
            {
                var encode = Encoding.Default;
                //client.DefaultRequestHeaders.Add("Referrer Policy", "no-referrer-when-downgrade");
                client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3837.0 Safari/537.36 Edg/77.0.211.2");
                var message = await client.GetAsync(url);

                var content = await message.Content.ReadAsByteArrayAsync();

                string con = Encoding.Default.GetString(content);
                var    c   = GetCharSet(con);
                if (c != "")
                {
                    encode = Encoding.GetEncoding(c);
                }
                using (var stream = await message.Content.ReadAsStreamAsync())
                {
                    var sr = new StreamReader(stream, encode);
                    feed = await sr.ReadToEndAsync();
                }
            }
            catch { }
            var list = new List <RssSchema>();

            if (feed != null)
            {
                try
                {
                    var parser = new RssParser();
                    var rss    = parser.Parse(feed);

                    foreach (var item in rss)
                    {
                        list.Add(item);
                    }
                }
                catch (Exception)
                {
                }
            }
            client.Dispose();
            Success?.Invoke(list);
            return(list);
        }