Beispiel #1
0
        public async Task <River> UpdateAsync(River river)
        {
            FetchResult fetchResult = await FetchAsync(
                river.Metadata.OriginUrl,
                river.Metadata.Etag,
                river.Metadata.LastModified
                );

            var updatedFeeds = river.UpdatedFeeds;

            if (fetchResult.Feed != null)
            {
                var    feed     = fetchResult.Feed;
                Item[] newItems = await this.feedItemStore.StoreItems(
                    river.Metadata.OriginUrl, feed.Items.ToArray());

                // TODO: Filter this out once we've loaded a bit.
                var existingItems = new HashSet <string>(
                    from existingFeed in river.UpdatedFeeds.Feeds
                    from item in existingFeed.Items
                    where item.Id != null
                    select item.Id
                    );
                newItems = newItems.Where(item => !existingItems.Contains(item.Id)).ToArray();
                if (newItems.Length > 0)
                {
                    Uri baseUri = SyndicationUtil.TryParseAbsoluteUrl(feed.WebsiteUrl) ?? feed.FeedUrl;
                    for (int i = 0; i < newItems.Length; i++)
                    {
                        newItems[i] = Rebase(newItems[i], baseUri);
                    }

                    newItems = await this.thumbnailExtractor.LoadItemThumbnailsAsync(baseUri, newItems);

                    await this.feedItemStore.UpdateItemThumbs(river.Metadata.OriginUrl, newItems);

                    feed         = feed.With(items: newItems);
                    updatedFeeds = river.UpdatedFeeds.With(feeds: river.UpdatedFeeds.Feeds.Insert(0, feed));
                }
            }

            var metadata = river.Metadata.With(
                etag: fetchResult.Etag,
                lastModified: fetchResult.LastModified,
                originUrl: fetchResult.FeedUrl,
                lastStatus: fetchResult.Status);

            return(river.With(updatedFeeds: updatedFeeds, metadata: metadata));
        }
        public static async Task <IList <Uri> > GetFeedUrls(
            string originUrl,
            bool findAll = false)
        {
            var allUrls = new List <Uri>();
            Uri baseUri = FixupUrl(originUrl);

            // Maybe... maybe this one is a feed?
            Log.FindFeedCheckingBase(baseUri);
            string data = await GetFeedData(baseUri);

            if (LooksLikeFeed(data))
            {
                Log.FindFeedBaseWasFeed(baseUri);
                return(new[] { baseUri });
            }

            // Nope, let's dive into the soup!
            var           parser   = new HtmlParser();
            IHtmlDocument document = parser.ParseDocument(data);

            // Link elements.
            Log.FindFeedCheckingLinkElements(baseUri);
            List <Uri> linkUrls = new List <Uri>();

            foreach (IElement element in document.GetElementsByTagName("link"))
            {
                string linkType = element.GetAttribute("type");
                if (linkType != null && FeedMimeTypes.Contains(linkType))
                {
                    Uri hrefUrl =
                        SyndicationUtil.TryParseAbsoluteUrl(
                            element.GetAttribute("href"),
                            baseUri
                            );
                    if (hrefUrl != null)
                    {
                        linkUrls.Add(hrefUrl);
                    }
                }
            }

            await FilterUrlsByFeed(linkUrls);

            if (linkUrls.Count > 0)
            {
                Log.FindFeedFoundLinkElements(baseUri, linkUrls);
                linkUrls.Sort(UrlFeedComparison);
                allUrls.AddRange(linkUrls);
                if (!findAll)
                {
                    return(allUrls);
                }
            }

            // <a> tags
            Log.FindFeedCheckingAnchorElements(baseUri);
            List <Uri> localGuesses  = new List <Uri>();
            List <Uri> remoteGuesses = new List <Uri>();

            foreach (IElement element in document.GetElementsByTagName("a"))
            {
                Uri hrefUrl =
                    SyndicationUtil.TryParseAbsoluteUrl(
                        element.GetAttribute("href"),
                        baseUri
                        );
                if (hrefUrl != null)
                {
                    if ((hrefUrl.Host == baseUri.Host) && IsFeedUrl(hrefUrl))
                    {
                        localGuesses.Add(hrefUrl);
                    }
                    else if (IsFeedishUrl(hrefUrl))
                    {
                        remoteGuesses.Add(hrefUrl);
                    }
                }
            }

            Log.FindFeedFoundSomeAnchors(baseUri, localGuesses, remoteGuesses);

            // (Consider ones on the same domain first.)
            await FilterUrlsByFeed(localGuesses);

            if (localGuesses.Count > 0)
            {
                Log.FindFeedsFoundLocalGuesses(baseUri, localGuesses);
                localGuesses.Sort(UrlFeedComparison);
                allUrls.AddRange(localGuesses);
                if (!findAll)
                {
                    return(localGuesses);
                }
            }

            await FilterUrlsByFeed(remoteGuesses);

            if (remoteGuesses.Count > 0)
            {
                Log.FindFeedsFoundRemoteGuesses(baseUri, remoteGuesses);
                remoteGuesses.Sort(UrlFeedComparison);
                allUrls.AddRange(remoteGuesses);
                if (!findAll)
                {
                    return(remoteGuesses);
                }
            }

            List <Uri> randomGuesses =
                FeedNames.Select(s => new Uri(baseUri, s)).ToList();

            await FilterUrlsByFeed(randomGuesses);

            if (randomGuesses.Count > 0)
            {
                Log.FindFeedsFoundRandomGuesses(baseUri, randomGuesses);
                randomGuesses.Sort(UrlFeedComparison);
                allUrls.AddRange(randomGuesses);
                if (!findAll)
                {
                    return(randomGuesses);
                }
            }

            // All done, nothing. (Or... everything!)
            Log.FindFeedFoundTotal(baseUri, allUrls);
            return(allUrls);
        }