public async Task <River> UpdateAsync(River river) { FetchResult fetchResult = await FetchAsync( river.Metadata.OriginUrl, river.Metadata.Etag, river.Metadata.LastModified ); var updatedFeeds = river.UpdatedFeeds; if (fetchResult.Feed != null) { var feed = fetchResult.Feed; Item[] newItems = await this.feedItemStore.StoreItems( river.Metadata.OriginUrl, feed.Items.ToArray()); // TODO: Filter this out once we've loaded a bit. var existingItems = new HashSet <string>( from existingFeed in river.UpdatedFeeds.Feeds from item in existingFeed.Items where item.Id != null select item.Id ); newItems = newItems.Where(item => !existingItems.Contains(item.Id)).ToArray(); if (newItems.Length > 0) { Uri baseUri = SyndicationUtil.TryParseAbsoluteUrl(feed.WebsiteUrl) ?? feed.FeedUrl; for (int i = 0; i < newItems.Length; i++) { newItems[i] = Rebase(newItems[i], baseUri); } newItems = await this.thumbnailExtractor.LoadItemThumbnailsAsync(baseUri, newItems); await this.feedItemStore.UpdateItemThumbs(river.Metadata.OriginUrl, newItems); feed = feed.With(items: newItems); updatedFeeds = river.UpdatedFeeds.With(feeds: river.UpdatedFeeds.Feeds.Insert(0, feed)); } } var metadata = river.Metadata.With( etag: fetchResult.Etag, lastModified: fetchResult.LastModified, originUrl: fetchResult.FeedUrl, lastStatus: fetchResult.Status); return(river.With(updatedFeeds: updatedFeeds, metadata: metadata)); }
public static async Task <IList <Uri> > GetFeedUrls( string originUrl, bool findAll = false) { var allUrls = new List <Uri>(); Uri baseUri = FixupUrl(originUrl); // Maybe... maybe this one is a feed? Log.FindFeedCheckingBase(baseUri); string data = await GetFeedData(baseUri); if (LooksLikeFeed(data)) { Log.FindFeedBaseWasFeed(baseUri); return(new[] { baseUri }); } // Nope, let's dive into the soup! var parser = new HtmlParser(); IHtmlDocument document = parser.ParseDocument(data); // Link elements. Log.FindFeedCheckingLinkElements(baseUri); List <Uri> linkUrls = new List <Uri>(); foreach (IElement element in document.GetElementsByTagName("link")) { string linkType = element.GetAttribute("type"); if (linkType != null && FeedMimeTypes.Contains(linkType)) { Uri hrefUrl = SyndicationUtil.TryParseAbsoluteUrl( element.GetAttribute("href"), baseUri ); if (hrefUrl != null) { linkUrls.Add(hrefUrl); } } } await FilterUrlsByFeed(linkUrls); if (linkUrls.Count > 0) { Log.FindFeedFoundLinkElements(baseUri, linkUrls); linkUrls.Sort(UrlFeedComparison); allUrls.AddRange(linkUrls); if (!findAll) { return(allUrls); } } // <a> tags Log.FindFeedCheckingAnchorElements(baseUri); List <Uri> localGuesses = new List <Uri>(); List <Uri> remoteGuesses = new List <Uri>(); foreach (IElement element in document.GetElementsByTagName("a")) { Uri hrefUrl = SyndicationUtil.TryParseAbsoluteUrl( element.GetAttribute("href"), baseUri ); if (hrefUrl != null) { if ((hrefUrl.Host == baseUri.Host) && IsFeedUrl(hrefUrl)) { localGuesses.Add(hrefUrl); } else if (IsFeedishUrl(hrefUrl)) { remoteGuesses.Add(hrefUrl); } } } Log.FindFeedFoundSomeAnchors(baseUri, localGuesses, remoteGuesses); // (Consider ones on the same domain first.) await FilterUrlsByFeed(localGuesses); if (localGuesses.Count > 0) { Log.FindFeedsFoundLocalGuesses(baseUri, localGuesses); localGuesses.Sort(UrlFeedComparison); allUrls.AddRange(localGuesses); if (!findAll) { return(localGuesses); } } await FilterUrlsByFeed(remoteGuesses); if (remoteGuesses.Count > 0) { Log.FindFeedsFoundRemoteGuesses(baseUri, remoteGuesses); remoteGuesses.Sort(UrlFeedComparison); allUrls.AddRange(remoteGuesses); if (!findAll) { return(remoteGuesses); } } List <Uri> randomGuesses = FeedNames.Select(s => new Uri(baseUri, s)).ToList(); await FilterUrlsByFeed(randomGuesses); if (randomGuesses.Count > 0) { Log.FindFeedsFoundRandomGuesses(baseUri, randomGuesses); randomGuesses.Sort(UrlFeedComparison); allUrls.AddRange(randomGuesses); if (!findAll) { return(randomGuesses); } } // All done, nothing. (Or... everything!) Log.FindFeedFoundTotal(baseUri, allUrls); return(allUrls); }