Esempio n. 1
0
        static Item HandleAtomLink(Item item, XElement link)
        {
            string rel  = link.Attribute(XNames.Atom.Rel)?.Value ?? "alternate";
            string type = link.Attribute(XNames.Atom.Type)?.Value ?? "text/html";
            string href = link.Attribute(XNames.Atom.Href)?.Value;

            if (String.Equals(rel, "alternate", StringComparison.OrdinalIgnoreCase) &&
                type.StartsWith("text/html", StringComparison.OrdinalIgnoreCase))
            {
                item = item.With(link: SyndicationUtil.ParseLink(href, link));
            }

            if (String.Equals(rel, "self", StringComparison.OrdinalIgnoreCase) &&
                type.StartsWith("text/html", StringComparison.OrdinalIgnoreCase))
            {
                item = item.With(permaLink: SyndicationUtil.ParseLink(href, link));
            }

            if (link.Attribute(XNames.Atom.Rel)?.Value == "enclosure")
            {
                item = item.With(enclosures: item.Enclosures.Add(new Enclosure(
                                                                     length: link.Attribute(XNames.Atom.Length)?.Value,
                                                                     type: type,
                                                                     url: SyndicationUtil.ParseLink(href, link)
                                                                     )));
            }
            return(item);
        }
Esempio n. 2
0
        static Item HandleThumbnail(Item item, XElement element)
        {
            if (
                element.Name == XNames.Media.Content &&
                element.Attribute(XNames.Media.Medium)?.Value == "image"
                )
            {
                Uri url =
                    SyndicationUtil.TryParseUrl(
                        element.Attribute(XNames.Media.Url)?.Value,
                        null,
                        element
                        );

                int width, height;
                if (
                    url != null &&
                    Int32.TryParse(
                        element.Attribute(XNames.Media.Width)?.Value,
                        out width
                        ) &&
                    Int32.TryParse(
                        element.Attribute(XNames.Media.Height)?.Value,
                        out height
                        )
                    )
                {
                    item =
                        item.With(thumbnail: new Thumbnail(url, width, height));
                }
            }

            return(item);
        }
Esempio n. 3
0
        static Item LoadItem(XElement item)
        {
            var ri = new Item();

            foreach (XElement xe in item.Elements())
            {
                Func <Item, XElement, Item> func;
                if (ItemElements.TryGetValue(xe.Name, out func))
                {
                    ri = func(ri, xe);
                }
            }

            // Load the body; prefer explicit summaries to "description", which is ambiguous, to "content", which is
            // explicitly intended to be the full entry content.
            if (ri.Summary != null)
            {
                ri = ri.With(body: SyndicationUtil.ParseBody(ri.Summary));
            }
            else if (ri.Description != null)
            {
                ri = ri.With(body: SyndicationUtil.ParseBody(ri.Description));
            }
            else if (ri.Content != null)
            {
                ri = ri.With(body: SyndicationUtil.ParseBody(ri.Content));
            }

            if (ri.PermaLink == null)
            {
                ri = ri.With(permaLink: ri.Link);
            }
            if (ri.Id == null)
            {
                ri = ri.With(id: CreateItemId(ri));
            }
            if (String.IsNullOrWhiteSpace(ri.Title))
            {
                string title = null;
                if (ri.PubDate != null)
                {
                    title = ri.PubDate.ToString();
                }
                else if (ri.PermaLink != null)
                {
                    title = ri.PermaLink.AbsoluteUri;
                }
                else if (ri.Id != null)
                {
                    title = ri.Id;
                }

                if (title != null)
                {
                    ri = ri.With(title: title);
                }
            }
            return(ri);
        }
Esempio n. 4
0
 static Item Rebase(Item item, Uri baseUri)
 {
     return(item.With(
                link: SyndicationUtil.Rebase(item.Link, baseUri),
                permaLink: SyndicationUtil.Rebase(item.PermaLink, baseUri),
                enclosures: item.Enclosures.Select(e => e.With(url: SyndicationUtil.Rebase(e.Url, baseUri)))
                ));
 }
Esempio n. 5
0
 static Item HandleEnclosure(Item item, XElement element)
 {
     return(item.With(enclosures: item.Enclosures.Add(new Enclosure(
                                                          length: element.Attribute(XNames.RSS.Length)?.Value,
                                                          type: element.Attribute(XNames.RSS.Type)?.Value,
                                                          url: SyndicationUtil.ParseLink(element.Attribute(XNames.RSS.Url)?.Value, element)
                                                          ))));
 }
Esempio n. 6
0
        static Item HandlePubDate(Item item, XElement element)
        {
            DateTime?date = SyndicationUtil.ParseDate(element);

            if (date != null && (item.PubDate == null || date > item.PubDate))
            {
                return(item.With(pubDate: date));
            }
            return(item);
        }
Esempio n. 7
0
        static Item HandleGuid(Item item, XElement element)
        {
            item = item.With(id: element.Value);

            if (item.Id.StartsWith("http://", StringComparison.OrdinalIgnoreCase) ||
                item.Id.StartsWith("https://", StringComparison.OrdinalIgnoreCase))
            {
                item = item.With(permaLink: SyndicationUtil.ParseLink(item.Id, element));
            }
            return(item);
        }
Esempio n. 8
0
        public async Task <River> UpdateAsync(River river)
        {
            FetchResult fetchResult = await FetchAsync(
                river.Metadata.OriginUrl,
                river.Metadata.Etag,
                river.Metadata.LastModified
                );

            var updatedFeeds = river.UpdatedFeeds;

            if (fetchResult.Feed != null)
            {
                var    feed     = fetchResult.Feed;
                Item[] newItems = await this.feedItemStore.StoreItems(
                    river.Metadata.OriginUrl, feed.Items.ToArray());

                // TODO: Filter this out once we've loaded a bit.
                var existingItems = new HashSet <string>(
                    from existingFeed in river.UpdatedFeeds.Feeds
                    from item in existingFeed.Items
                    where item.Id != null
                    select item.Id
                    );
                newItems = newItems.Where(item => !existingItems.Contains(item.Id)).ToArray();
                if (newItems.Length > 0)
                {
                    Uri baseUri = SyndicationUtil.TryParseAbsoluteUrl(feed.WebsiteUrl) ?? feed.FeedUrl;
                    for (int i = 0; i < newItems.Length; i++)
                    {
                        newItems[i] = Rebase(newItems[i], baseUri);
                    }

                    newItems = await this.thumbnailExtractor.LoadItemThumbnailsAsync(baseUri, newItems);

                    await this.feedItemStore.UpdateItemThumbs(river.Metadata.OriginUrl, newItems);

                    feed         = feed.With(items: newItems);
                    updatedFeeds = river.UpdatedFeeds.With(feeds: river.UpdatedFeeds.Feeds.Insert(0, feed));
                }
            }

            var metadata = river.Metadata.With(
                etag: fetchResult.Etag,
                lastModified: fetchResult.LastModified,
                originUrl: fetchResult.FeedUrl,
                lastStatus: fetchResult.Status);

            return(river.With(updatedFeeds: updatedFeeds, metadata: metadata));
        }
Esempio n. 9
0
        public static async Task <IList <Uri> > GetFeedUrls(
            string originUrl,
            bool findAll = false)
        {
            var allUrls = new List <Uri>();
            Uri baseUri = FixupUrl(originUrl);

            // Maybe... maybe this one is a feed?
            Log.FindFeedCheckingBase(baseUri);
            string data = await GetFeedData(baseUri);

            if (LooksLikeFeed(data))
            {
                Log.FindFeedBaseWasFeed(baseUri);
                return(new[] { baseUri });
            }

            // Nope, let's dive into the soup!
            var           parser   = new HtmlParser();
            IHtmlDocument document = parser.ParseDocument(data);

            // Link elements.
            Log.FindFeedCheckingLinkElements(baseUri);
            List <Uri> linkUrls = new List <Uri>();

            foreach (IElement element in document.GetElementsByTagName("link"))
            {
                string linkType = element.GetAttribute("type");
                if (linkType != null && FeedMimeTypes.Contains(linkType))
                {
                    Uri hrefUrl =
                        SyndicationUtil.TryParseAbsoluteUrl(
                            element.GetAttribute("href"),
                            baseUri
                            );
                    if (hrefUrl != null)
                    {
                        linkUrls.Add(hrefUrl);
                    }
                }
            }

            await FilterUrlsByFeed(linkUrls);

            if (linkUrls.Count > 0)
            {
                Log.FindFeedFoundLinkElements(baseUri, linkUrls);
                linkUrls.Sort(UrlFeedComparison);
                allUrls.AddRange(linkUrls);
                if (!findAll)
                {
                    return(allUrls);
                }
            }

            // <a> tags
            Log.FindFeedCheckingAnchorElements(baseUri);
            List <Uri> localGuesses  = new List <Uri>();
            List <Uri> remoteGuesses = new List <Uri>();

            foreach (IElement element in document.GetElementsByTagName("a"))
            {
                Uri hrefUrl =
                    SyndicationUtil.TryParseAbsoluteUrl(
                        element.GetAttribute("href"),
                        baseUri
                        );
                if (hrefUrl != null)
                {
                    if ((hrefUrl.Host == baseUri.Host) && IsFeedUrl(hrefUrl))
                    {
                        localGuesses.Add(hrefUrl);
                    }
                    else if (IsFeedishUrl(hrefUrl))
                    {
                        remoteGuesses.Add(hrefUrl);
                    }
                }
            }

            Log.FindFeedFoundSomeAnchors(baseUri, localGuesses, remoteGuesses);

            // (Consider ones on the same domain first.)
            await FilterUrlsByFeed(localGuesses);

            if (localGuesses.Count > 0)
            {
                Log.FindFeedsFoundLocalGuesses(baseUri, localGuesses);
                localGuesses.Sort(UrlFeedComparison);
                allUrls.AddRange(localGuesses);
                if (!findAll)
                {
                    return(localGuesses);
                }
            }

            await FilterUrlsByFeed(remoteGuesses);

            if (remoteGuesses.Count > 0)
            {
                Log.FindFeedsFoundRemoteGuesses(baseUri, remoteGuesses);
                remoteGuesses.Sort(UrlFeedComparison);
                allUrls.AddRange(remoteGuesses);
                if (!findAll)
                {
                    return(remoteGuesses);
                }
            }

            List <Uri> randomGuesses =
                FeedNames.Select(s => new Uri(baseUri, s)).ToList();

            await FilterUrlsByFeed(randomGuesses);

            if (randomGuesses.Count > 0)
            {
                Log.FindFeedsFoundRandomGuesses(baseUri, randomGuesses);
                randomGuesses.Sort(UrlFeedComparison);
                allUrls.AddRange(randomGuesses);
                if (!findAll)
                {
                    return(randomGuesses);
                }
            }

            // All done, nothing. (Or... everything!)
            Log.FindFeedFoundTotal(baseUri, allUrls);
            return(allUrls);
        }