static Item HandleAtomLink(Item item, XElement link) { string rel = link.Attribute(XNames.Atom.Rel)?.Value ?? "alternate"; string type = link.Attribute(XNames.Atom.Type)?.Value ?? "text/html"; string href = link.Attribute(XNames.Atom.Href)?.Value; if (String.Equals(rel, "alternate", StringComparison.OrdinalIgnoreCase) && type.StartsWith("text/html", StringComparison.OrdinalIgnoreCase)) { item = item.With(link: SyndicationUtil.ParseLink(href, link)); } if (String.Equals(rel, "self", StringComparison.OrdinalIgnoreCase) && type.StartsWith("text/html", StringComparison.OrdinalIgnoreCase)) { item = item.With(permaLink: SyndicationUtil.ParseLink(href, link)); } if (link.Attribute(XNames.Atom.Rel)?.Value == "enclosure") { item = item.With(enclosures: item.Enclosures.Add(new Enclosure( length: link.Attribute(XNames.Atom.Length)?.Value, type: type, url: SyndicationUtil.ParseLink(href, link) ))); } return(item); }
static Item HandleThumbnail(Item item, XElement element) { if ( element.Name == XNames.Media.Content && element.Attribute(XNames.Media.Medium)?.Value == "image" ) { Uri url = SyndicationUtil.TryParseUrl( element.Attribute(XNames.Media.Url)?.Value, null, element ); int width, height; if ( url != null && Int32.TryParse( element.Attribute(XNames.Media.Width)?.Value, out width ) && Int32.TryParse( element.Attribute(XNames.Media.Height)?.Value, out height ) ) { item = item.With(thumbnail: new Thumbnail(url, width, height)); } } return(item); }
static Item LoadItem(XElement item) { var ri = new Item(); foreach (XElement xe in item.Elements()) { Func <Item, XElement, Item> func; if (ItemElements.TryGetValue(xe.Name, out func)) { ri = func(ri, xe); } } // Load the body; prefer explicit summaries to "description", which is ambiguous, to "content", which is // explicitly intended to be the full entry content. if (ri.Summary != null) { ri = ri.With(body: SyndicationUtil.ParseBody(ri.Summary)); } else if (ri.Description != null) { ri = ri.With(body: SyndicationUtil.ParseBody(ri.Description)); } else if (ri.Content != null) { ri = ri.With(body: SyndicationUtil.ParseBody(ri.Content)); } if (ri.PermaLink == null) { ri = ri.With(permaLink: ri.Link); } if (ri.Id == null) { ri = ri.With(id: CreateItemId(ri)); } if (String.IsNullOrWhiteSpace(ri.Title)) { string title = null; if (ri.PubDate != null) { title = ri.PubDate.ToString(); } else if (ri.PermaLink != null) { title = ri.PermaLink.AbsoluteUri; } else if (ri.Id != null) { title = ri.Id; } if (title != null) { ri = ri.With(title: title); } } return(ri); }
static Item Rebase(Item item, Uri baseUri) { return(item.With( link: SyndicationUtil.Rebase(item.Link, baseUri), permaLink: SyndicationUtil.Rebase(item.PermaLink, baseUri), enclosures: item.Enclosures.Select(e => e.With(url: SyndicationUtil.Rebase(e.Url, baseUri))) )); }
static Item HandleEnclosure(Item item, XElement element) { return(item.With(enclosures: item.Enclosures.Add(new Enclosure( length: element.Attribute(XNames.RSS.Length)?.Value, type: element.Attribute(XNames.RSS.Type)?.Value, url: SyndicationUtil.ParseLink(element.Attribute(XNames.RSS.Url)?.Value, element) )))); }
static Item HandlePubDate(Item item, XElement element) { DateTime?date = SyndicationUtil.ParseDate(element); if (date != null && (item.PubDate == null || date > item.PubDate)) { return(item.With(pubDate: date)); } return(item); }
static Item HandleGuid(Item item, XElement element) { item = item.With(id: element.Value); if (item.Id.StartsWith("http://", StringComparison.OrdinalIgnoreCase) || item.Id.StartsWith("https://", StringComparison.OrdinalIgnoreCase)) { item = item.With(permaLink: SyndicationUtil.ParseLink(item.Id, element)); } return(item); }
public async Task <River> UpdateAsync(River river) { FetchResult fetchResult = await FetchAsync( river.Metadata.OriginUrl, river.Metadata.Etag, river.Metadata.LastModified ); var updatedFeeds = river.UpdatedFeeds; if (fetchResult.Feed != null) { var feed = fetchResult.Feed; Item[] newItems = await this.feedItemStore.StoreItems( river.Metadata.OriginUrl, feed.Items.ToArray()); // TODO: Filter this out once we've loaded a bit. var existingItems = new HashSet <string>( from existingFeed in river.UpdatedFeeds.Feeds from item in existingFeed.Items where item.Id != null select item.Id ); newItems = newItems.Where(item => !existingItems.Contains(item.Id)).ToArray(); if (newItems.Length > 0) { Uri baseUri = SyndicationUtil.TryParseAbsoluteUrl(feed.WebsiteUrl) ?? feed.FeedUrl; for (int i = 0; i < newItems.Length; i++) { newItems[i] = Rebase(newItems[i], baseUri); } newItems = await this.thumbnailExtractor.LoadItemThumbnailsAsync(baseUri, newItems); await this.feedItemStore.UpdateItemThumbs(river.Metadata.OriginUrl, newItems); feed = feed.With(items: newItems); updatedFeeds = river.UpdatedFeeds.With(feeds: river.UpdatedFeeds.Feeds.Insert(0, feed)); } } var metadata = river.Metadata.With( etag: fetchResult.Etag, lastModified: fetchResult.LastModified, originUrl: fetchResult.FeedUrl, lastStatus: fetchResult.Status); return(river.With(updatedFeeds: updatedFeeds, metadata: metadata)); }
public static async Task <IList <Uri> > GetFeedUrls( string originUrl, bool findAll = false) { var allUrls = new List <Uri>(); Uri baseUri = FixupUrl(originUrl); // Maybe... maybe this one is a feed? Log.FindFeedCheckingBase(baseUri); string data = await GetFeedData(baseUri); if (LooksLikeFeed(data)) { Log.FindFeedBaseWasFeed(baseUri); return(new[] { baseUri }); } // Nope, let's dive into the soup! var parser = new HtmlParser(); IHtmlDocument document = parser.ParseDocument(data); // Link elements. Log.FindFeedCheckingLinkElements(baseUri); List <Uri> linkUrls = new List <Uri>(); foreach (IElement element in document.GetElementsByTagName("link")) { string linkType = element.GetAttribute("type"); if (linkType != null && FeedMimeTypes.Contains(linkType)) { Uri hrefUrl = SyndicationUtil.TryParseAbsoluteUrl( element.GetAttribute("href"), baseUri ); if (hrefUrl != null) { linkUrls.Add(hrefUrl); } } } await FilterUrlsByFeed(linkUrls); if (linkUrls.Count > 0) { Log.FindFeedFoundLinkElements(baseUri, linkUrls); linkUrls.Sort(UrlFeedComparison); allUrls.AddRange(linkUrls); if (!findAll) { return(allUrls); } } // <a> tags Log.FindFeedCheckingAnchorElements(baseUri); List <Uri> localGuesses = new List <Uri>(); List <Uri> remoteGuesses = new List <Uri>(); foreach (IElement element in document.GetElementsByTagName("a")) { Uri hrefUrl = SyndicationUtil.TryParseAbsoluteUrl( element.GetAttribute("href"), baseUri ); if (hrefUrl != null) { if ((hrefUrl.Host == baseUri.Host) && IsFeedUrl(hrefUrl)) { localGuesses.Add(hrefUrl); } else if (IsFeedishUrl(hrefUrl)) { remoteGuesses.Add(hrefUrl); } } } Log.FindFeedFoundSomeAnchors(baseUri, localGuesses, remoteGuesses); // (Consider ones on the same domain first.) await FilterUrlsByFeed(localGuesses); if (localGuesses.Count > 0) { Log.FindFeedsFoundLocalGuesses(baseUri, localGuesses); localGuesses.Sort(UrlFeedComparison); allUrls.AddRange(localGuesses); if (!findAll) { return(localGuesses); } } await FilterUrlsByFeed(remoteGuesses); if (remoteGuesses.Count > 0) { Log.FindFeedsFoundRemoteGuesses(baseUri, remoteGuesses); remoteGuesses.Sort(UrlFeedComparison); allUrls.AddRange(remoteGuesses); if (!findAll) { return(remoteGuesses); } } List <Uri> randomGuesses = FeedNames.Select(s => new Uri(baseUri, s)).ToList(); await FilterUrlsByFeed(randomGuesses); if (randomGuesses.Count > 0) { Log.FindFeedsFoundRandomGuesses(baseUri, randomGuesses); randomGuesses.Sort(UrlFeedComparison); allUrls.AddRange(randomGuesses); if (!findAll) { return(randomGuesses); } } // All done, nothing. (Or... everything!) Log.FindFeedFoundTotal(baseUri, allUrls); return(allUrls); }