public static async void ParseRSS() { string feed = null; using (var client = new HttpClient()) { try { feed = await client.GetStringAsync("https://news.yandex.ru/internet.rss"); } catch { } } if (feed != null) { var parser = new RssParser(); var rss = parser.Parse(feed); foreach (var element in rss) { Console.WriteLine($"Заголовок: {element.Title}"); Console.WriteLine($"Текст: {element.Summary}"); Console.WriteLine($"Ссылка: {element.FeedUrl}"); } } }
public async Task <List <Article> > GetArticles(string endpoint) { string feed = null; using (var client = new HttpClient()) { var feedResponse = await client.GetAsync(endpoint); if (!feedResponse.IsSuccessStatusCode) { throw new Exception("Feed unavailable"); } feed = await feedResponse.Content.ReadAsStringAsync(); } var parser = new RssParser(); var rss = parser.Parse(feed); var articles = new List <Article>(); foreach (var element in rss) { var article = new Article(element.Title, element.Summary, element.ImageUrl, element.PublishDate); articles.Add(article); } return(articles); }
private async Task <List <Article> > ScrapeArticles() { var articles = new List <Article>(); var feed = await httpHelper.Get(FeedUrl); if (string.IsNullOrEmpty(feed)) { throw new Exception($"Could not get feed from url {FeedUrl}"); } var parser = new RssParser(); var rss = parser.Parse(feed); var newestArticlesSchemas = rss.OrderByDescending(x => x.PublishDate).Take(10); foreach (var schema in newestArticlesSchemas) { var document = await httpHelper.GetDocumentFromUrl(schema.InternalID); articles.Add(new Article { Source = NewsSource.PriznajemHr, Guid = GetGuidFromUrl(schema.InternalID), Title = schema.Title, Image = GetArticleImage(document), Text = await httpHelper.GetArticleText(schema.Content), Summary = await httpHelper.GetFirstParagraph(schema.Content), Keywords = keywordHelper.GetKeywordsFromTitle(schema.Title), SourceUrl = schema.InternalID }); } return(articles); }
public void Parse_WithChannelData_CorrectObjectReturns() { var fx = new Fixture(); var title = fx.Create <string>(); var link = fx.Create <Uri>(); var description = fx.Create <string>(); var lastBuildDateString = "Fri, 16 Mar 2018 20:20:20 +0000"; var lastBuildDate = new DateTimeOffset(2018, 3, 16, 20, 20, 20, TimeSpan.FromHours(0)); var rssXml = new RssXmlBuilder() .WithTitle(title) .WithLink(link.ToString()) .WithDescription(description) .WithLastBuildDate(lastBuildDateString) .Build(); var parser = new RssParser(); var rssFeed = parser.Parse(rssXml); Assert.Equal(title, rssFeed.Title); Assert.Equal(link, rssFeed.Link); Assert.Equal(description, rssFeed.Description); Assert.Equal(lastBuildDate, rssFeed.LastBuildDate); }
public async Task <IEnumerable <RssSchema> > Parse(string url) { string feed = null; using (var client = new HttpClient()) { try { feed = await client.GetStringAsync(url); } catch { // ignored } } if (feed == null) { return(new List <RssSchema>()); } var parser = new RssParser(); var rss = parser.Parse(feed); return(rss); }
public void Parse_WithItem_RssFeedItemWithCorrectDataReturns() { //Given var fx = new Fixture(); var title = fx.Create <string>(); var link = fx.Create <Uri>(); var description = fx.Create <string>(); var pubDateString = "Sun, 18 Mar 2018 16:48:17 +0000"; var pubDate = new DateTimeOffset(2018, 3, 18, 16, 48, 17, TimeSpan.FromHours(0)); var item = new RssItemXmlBuilder() .WithTitle(title) .WithDescription(description) .WithLink(link.ToString()) .WithPubDate(pubDateString) .Build(); var rssXml = new RssXmlBuilder() .WithItems(new [] { item }) .Build(); //When var parser = new RssParser(); var rssFeed = parser.Parse(rssXml); //Then Assert.Equal(1, rssFeed.Items?.Length); Assert.Equal(title, rssFeed.Items?[0].Title); Assert.Equal(description, rssFeed.Items?[0].Description); Assert.Equal(pubDate, rssFeed.Items?[0].PubDate); Assert.Equal(link, rssFeed.Items?[0].Link); }
static async Task Main(string[] args) { Console.WriteLine("Haetaan uutisotsikkoja YLE:ltä..."); HttpClient client = new HttpClient(); const string YleRSSUrl = "https://feeds.yle.fi/uutiset/v1/majorHeadlines/YLE_UUTISET.rss"; string rss = await client.GetStringAsync(YleRSSUrl); RssParser parser = new RssParser(); IEnumerable <RssSchema> feed = parser.Parse(rss); int lkm = 0; foreach (RssSchema element in feed) { Console.WriteLine($"Title: {element.Title}"); // Console.WriteLine($"Summary: {element.Summary}"); lkm++; if (lkm > 10) { break; } } }
public async void ParseRSS() { string feed = null; RSSFeed.Clear(); using (var client = new HttpClient()) { try { feed = await client.GetStringAsync(Url); } catch { } } if (feed != null) { var parser = new RssParser(); var rss = parser.Parse(feed); foreach (var element in rss) { RSSFeed.Add(element); } } }
public async Task <List <RSSResult> > GetNewsLinks(DateTime lastCheck) { logger.LogInformation("Started feed acq"); async Task <List <RSSResult> > GetFeed(string url) { var feed = await httpClient.GetStringAsync(url); if (string.IsNullOrWhiteSpace(feed)) { throw new HttpRequestException("Empty feed"); } var rss = rssParser.Parse(feed); return(rss.Where(f => f.PublishDate > lastCheck).Select(f => new RSSResult(new Uri(f.FeedUrl), new DateTimeOffset(f.PublishDate))).ToList()); } var warming = GetFeed(GLOBAL_WARMING_RSS); var climate = GetFeed(CLIMATE_RSS); var feeds = await Task.WhenAll(warming, climate); var res = feeds.SelectMany(f => f).Distinct().ToList(); logger.LogInformation("Feed count {Count}", res.Count); return(res); }
public void ParseFeed_NotRss() { using (var stream = File.OpenRead(GetExamplesWith("Sample.xml"))) { Assert.That(RssParser.Parse(stream), Is.Null); } }
public int ParseFeed_Content(string filename) { using (var stream = File.OpenRead(GetExamplesWith(filename))) { return(RssParser.Parse(stream).Items.First().Content.Length); } }
/// <summary> /// 从URL获取解析后的Item的信息 /// </summary> /// <param name="url">地址</param> /// <returns></returns> public static async Task <List <RssSchema> > GetSchemaFromUrl(string url, bool isLimit = false) { string feed = null; try { feed = await GetTextFromUrl(url, isLimit); } catch (Exception) { } var list = new List <RssSchema>(); if (!string.IsNullOrEmpty(feed)) { try { var parser = new RssParser(); var rss = parser.Parse(feed); foreach (var item in rss) { list.Add(item); } } catch (Exception ex) { Debug.WriteLine(ex.Message); } } return(list); }
public static IEnumerable <RssSchema> ParseRSS(string feedContent) { RssParser parser = new RssParser(); IEnumerable <RssSchema> rss = parser.Parse(feedContent); return(rss); }
private async Task <List <Article> > ScrapeArticles() { var articles = new List <Article>(); var feed = await httpHelper.Get(FeedUrl); if (string.IsNullOrEmpty(feed)) { throw new NullReferenceException($"Could not get feed from url {FeedUrl}"); } var parser = new RssParser(); var rss = parser.Parse(feed); var newestArticlesSchemas = rss.OrderByDescending(x => x.PublishDate).Take(10); foreach (var schema in newestArticlesSchemas) { if (schema.Categories.Contains("Vijesti")) { articles.Add(new Article { Source = NewsSource.IndexHr, Guid = GetGuidFromUrl(schema.InternalID), Title = schema.Title, Image = schema.ImageUrl, Text = await GetArticleText(schema.InternalID), Summary = schema.Summary, Keywords = keywordHelper.GetKeywordsFromTitle(schema.Title), SourceUrl = schema.InternalID }); } } return(articles); }
public async static Task <List <NewsInformation> > GetFilteredNewsAsync() { List <NewsInformation> results = new List <NewsInformation>(); var client = new HttpClient(); var feed = await client.GetStringAsync("https://www.dr.dk/nyheder/service/feeds/allenyheder/"); var parser = new RssParser(); var newsResult = parser.Parse(feed); results = (from item in newsResult select new NewsInformation() { Title = item.Title, Description = item.Summary, CreatedDate = item.PublishDate, //ImageUrl = item. ImageUrl = @"https://is1-ssl.mzstatic.com/image/thumb/Purple124/v4/29/55/2e/29552ea2-5952-af7a-f398-89d177968258/AppIcon-0-0-1x_U007emarketing-0-0-0-7-0-0-85-220.png/600x600wa.png" }).ToList(); // var filteredResult = results.Where(w => w.Description.Contains("corona") || w.Title.Contains("Covid-19")) var filteredResult = results.Where(w => (w.Title.IndexOf("Corona", StringComparison.OrdinalIgnoreCase) != -1 || (w.Title.IndexOf("covid-19", StringComparison.OrdinalIgnoreCase) != -1))) .OrderBy(w => w.CreatedDate) .Take(12).ToList(); return(filteredResult); //return results.Where(w => !string.IsNullOrEmpty(w.ImageUrl)).Take(10).ToList(); }
/* * Gets a single feed from the given url */ private async void getFeed(String feedUrl) { string rssString = null; try { rssString = await ServerConfig.Instance.client.GetStringAsync(feedUrl); } catch (HttpRequestException e) { Console.WriteLine("RSS FEED ERROR: " + e.InnerException.Message); return; } foreach (RssSchema element in rssParser.Parse(rssString)) { // If the item does not include an image, use a default one if (element.ExtraImageUrl == null) { element.ExtraImageUrl = "http://csse-s302g3.canterbury.ac.nz/donationIcon.png"; } if (element.ImageUrl == null) { element.ImageUrl = element.ExtraImageUrl; } rss.Add(element); } rss.OrderByDescending(r => r.PublishDate); }
public static async Task <IEnumerable <RssSchema> > FetchAsync(string feedUrl) { string feed = null; using (var client = new HttpClient()) { try { feed = await client.GetStringAsync(feedUrl); } catch { // 这里暂不处理异常。 } } if (feed != null) { var parser = new RssParser(); var rss = parser.Parse(feed); return(rss); } return(Enumerable.Empty <RssSchema>()); }
public virtual async Task <IEnumerable <RssSchema> > Read() { return(await Task.Run(() => { IEnumerable <RssSchema> rss = null; for (var i = 0; i < Source.Length; i++) { try { var data = Utils.HttpGET(Source[i]); if (!string.IsNullOrEmpty(data)) { var parser = new RssParser(); rss = parser.Parse(data); break; } } catch (Exception ex) { ConsoleLog.Error("Feed", ex.GetFormatString()); ConsoleLog.Error("Feed", "Target Url: ".CreateStringBuilder() .AppendLine(Source[i]).Append(ConsoleLog.ErrorLogBuilder(ex, true)).ToString()); } } return rss; })); }
/// <summary> /// Gets the feed /// </summary> /// <param name="url"></param> /// <returns></returns> private static async Task <IEnumerable <Feed> > ParseRss(string url) { string feed = null; var feedList = new List <Feed>(); try { using (var client = new HttpClient(new HttpClientHandler { AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate })) { ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls12; // Get the XML var res = await client.GetAsync(url); if (res.IsSuccessStatusCode) { feed = await res.Content.ReadAsStringAsync(); feed = feed.Replace("\r\n", string.Empty).Trim(); // Check XML is Valid if (IsValidXml(feed)) { if (feed != null) { var parser = new RssParser(); var rss = parser.Parse(feed); foreach (var element in rss) { feedList.Add(new Feed() { Author = element.Author, Categories = element.Categories, Content = element.Content, ExtraImageUrl = element.ExtraImageUrl, FeedUrl = element.FeedUrl, ImageUrl = element.ImageUrl, InternalID = element.InternalID, MediaUrl = element.MediaUrl, PublishDate = element.PublishDate, Summary = element.Summary, Title = element.Title }); } } } } } return(feedList); } catch { return(null); } }
public async Task <RssSchema[]> GetRss(string url) { var feed = await _client.GetStringAsync(url); var rss = _parser.Parse(feed); return(rss.OrderByDescending(schema => schema.PublishDate).ToArray()); }
public void Strip_Summary() { using (var stream = File.OpenRead(GetExamplesWith("SampleRss20-03.xml"))) { var dest = RssParser.Parse(stream); Assert.That(dest.Items[0].Summary, Is.EqualTo("この画像はテスト画像です。")); } }
public void Parse_WithNullInput_ThrowsArgumentNullException() { var parser = new RssParser(); Action act = () => parser.Parse(null); Assert.Throws <ArgumentNullException>(act); }
public void Parse_WithEmptyInput_ThrowsArgumentException() { var parser = new RssParser(); Action act = () => parser.Parse(string.Empty); Assert.Throws <ArgumentException>(act); }
private void DownloadCompleted(object sender, DownloadStringCompletedEventArgs e) { UIApplication.SharedApplication.BeginInvokeOnMainThread(() => { UIApplication.SharedApplication.NetworkActivityIndicatorVisible = false; mAlert.DismissWithClickedButtonIndex(0, true); if (e.Error != null) { DisplayError("Warning", "The rss feed could not be downloaded: " + e.Error.Message); } else { try { //clear the selected items this.GridView.ClearSelectedItems(); //Clear the rows mDatasource.ClearRows(); mDatasource.Apps.Clear(); foreach (var v in RssParser.Parse(e.Result)) { mDatasource.Apps.Add(v); } mAlert = new UIAlertView("Fetching Icons...", "", null, null, null); mAlert.Show(); UIApplication.SharedApplication.NetworkActivityIndicatorVisible = true; Task.Run(() => { foreach (var anApp in mDatasource.Apps) { byte[] data = null; using (var c = new GzipWebClient()) { data = c.DownloadData(anApp.ImageUrl); } anApp.Image = UIImage.LoadFromData(NSData.FromArray(data)); } }).ContinueWith(prevTask => { mAlert.DismissWithClickedButtonIndex(0, true); UIApplication.SharedApplication.NetworkActivityIndicatorVisible = false; GridView.ReloadData(); }, CancellationToken.None, TaskContinuationOptions.OnlyOnRanToCompletion, TaskScheduler.FromCurrentSynchronizationContext()); } catch { DisplayError("Warning", "Malformed Xml was found in the Rss Feed."); } } }); }
public async Task <IEnumerable <RssSchema> > Read() { return(await Task.Run(() => { var data = Utils.HttpGET(Source); var parser = new RssParser(); return parser.Parse(data); })); }
bool GetRssNews(int id) { WeiboUser user = null; try { if (RssFeedsQueue.Count == 0) { return(false); } user = RssFeedsQueue.Dequeue(); if (user == null) { return(false); } } catch (Exception ex) { _logger.LogError($"Thread[{id}] error.", ex); return(false); } try { IList <News> newsDatas = RssParser.Parse(user.RssUrl); foreach (var news in newsDatas) { try { if (_rssDataService.GetByLink(news.Link) != null) { continue; } news.City = user.City; news.Country = user.Country; news.Province = user.Province; news.Publisher = user.Name; news.Source = "微博"; news.Status = NewsStatus.Unconfirmed; _rssDataService.Create(news); } catch (Exception ex) { _logger.LogError($"Thread[{id}] mongo error.", ex); continue; } } } catch (Exception ex) { _logger.LogError($"Thread[{id}] rss error.", ex); } return(true); }
public static async Task <IEnumerable <RssSchema> > FetchAndParseRSSAsync(Uri uri) { string feedContent = await HttpGetStringAsync(uri); RssParser parser = new RssParser(); IEnumerable <RssSchema> rss = parser.Parse(feedContent); return(rss); }
public void ParseFeed_Categories() { using (var stream = File.OpenRead(GetExamplesWith("SampleRss20-01.xml"))) { var feed = RssParser.Parse(stream); var item = feed.Items[0]; Assert.That(item.Categories.Count, Is.EqualTo(0)); } }
/// <summary> /// 抓取新闻 /// </summary> /// <returns></returns> private NewsBody[] CrawleNews() { LogManager.WriteLine("Crawle news rss..."); List <NewsHeader> headers = new List <NewsHeader>(); for (int i = 0; i < ConfigManager.Config.Rss.Length; i++) { string xmlUrl = ConfigManager.Config.Rss[i][0]; if (xmlUrl.StartsWith("#"))//使用#暂时屏蔽订阅 { continue; } string rssClass = ConfigManager.Config.Rss[i][1]; try { string xml = Client.GET(xmlUrl); headers.AddRange(RssParser.Parse(xml, rssClass)); } catch (Exception ex) { LogManager.ShowException(ex, "Cannot get " + xmlUrl); } } LogManager.WriteLine("Crawle news body..."); int count = 0; List <NewsBody> bodyList = new List <NewsBody>(); foreach (NewsHeader header in headers) { count++; if (count % 5 == 0)//每爬去5个新闻提示一次 { LogManager.WriteLine(string.Format("<{0}> items done...", count)); } try { NewsBody body = HeaderParser.Parse(header); bodyList.Add(body); } catch (Exception ex) { LogManager.ShowException(ex); } } return(bodyList.ToArray()); }
/// <summary> /// 从URL获取解析后的文章的信息 /// </summary> /// <param name="url">地址</param> /// <returns></returns> public static async Task <List <RssSchema> > GetFeedsFromUrl(string url, bool isLimit = false, Action <List <RssSchema> > Success = null) { string feed = null; var client = GetClient(url); if (isLimit) { client.Timeout = TimeSpan.FromSeconds(20); } try { var encode = Encoding.Default; //client.DefaultRequestHeaders.Add("Referrer Policy", "no-referrer-when-downgrade"); client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3837.0 Safari/537.36 Edg/77.0.211.2"); var message = await client.GetAsync(url); var content = await message.Content.ReadAsByteArrayAsync(); string con = Encoding.Default.GetString(content); var c = GetCharSet(con); if (c != "") { encode = Encoding.GetEncoding(c); } using (var stream = await message.Content.ReadAsStreamAsync()) { var sr = new StreamReader(stream, encode); feed = await sr.ReadToEndAsync(); } } catch { } var list = new List <RssSchema>(); if (feed != null) { try { var parser = new RssParser(); var rss = parser.Parse(feed); foreach (var item in rss) { list.Add(item); } } catch (Exception) { } } client.Dispose(); Success?.Invoke(list); return(list); }