示例#1
0
 public bool Update(Feed feed)
 {
     if (feed.IconUri == null)
     {
         feed.IconUri = _findIcon.Find(feed.Uri);
     }
     return true;
 }
示例#2
0
文件: User.cs 项目: dhasenan/pierce
 public Subscription SubscribeTo(Feed f)
 {
     var sub = Subscriptions.Where(x => x.FeedId == f.Id).FirstOrDefault();
     if (sub == null)
     {
         sub = new Subscription { FeedUri = f.Uri, FeedId = f.Id, CheckInterval = DefaultCheckInterval };
         Subscriptions.Add(sub);
     }
     return sub;
 }
示例#3
0
 public bool Update(Feed feed)
 {
     if (feed.Head != null)
     {
         var head = feed.Head;
         feed.Head = null;
         // This should munge things about appropriately...I think.
         feed.SetHeadChunk(head);
     }
     return true;
 }
示例#4
0
 public void Read(Feed feed, XDocument x)
 {
     var rss = x.Descendants("rss").FirstOrDefault();
     XName atomFeedName = atom + "feed";
     var atomFeed = x.Descendants(atomFeedName).FirstOrDefault();
     if (rss == null)
     {
         if (atomFeed == null)
         {
             throw new ArgumentException("Feed did not contain an <rss> or <feed> element.");
         }
         ReadAtom(feed, x);
     }
     else
     {
         ReadRss(feed, x);
     }
 }
示例#5
0
 public bool Update(Feed feed)
 {
     var users = _db.Users.Find(Query.ElemMatch("Subscriptions", Query.EQ("FeedId", new ObjectId(feed.Id)))).ToList();
     if (!users.Any())
     {
         _db.Feeds.Remove(Query.EQ("_id", new ObjectId(feed.Id)));
         _db.Chunks.Remove(Query.EQ("FeedId", new ObjectId(feed.Id)));
         return false;
     }
     var interval = users
         .Select(x => x.GetSubscription(feed.Id))
         .Where(x => x != null)
         .Select(x => x.CheckInterval)
         .Min();
     if (interval < _config.MinUpdateInterval)
     {
         interval = _config.MinUpdateInterval;
     }
     feed.ReadInterval = interval;
     return true;
 }
示例#6
0
 public void ExecuteSingle(Feed feed)
 {
     foreach (var task in _tasks)
     {
         try
         {
             _logger.InfoFormat("running task {0} on feed {1}", task, feed);
             if (!task.Update(feed))
             {
                 return;
             }
         }
         catch (Exception ex)
         {
             _logger.ErrorFormat("failed to run task {0} on feed {1}: {2}", task, feed, ex);
             feed.Errors++;
         }
     }
     feed.LastRead = DateTime.UtcNow;
     feed.NextRead = DateTime.UtcNow + feed.ReadInterval;
     feed.Save(_db);
 }
示例#7
0
 public bool Update(Feed feed)
 {
     // Race condition:
     //  * User hit "update feed now".
     //  * We added a new chunk.
     //  * We haven't yet saved the new feed, but we saved the chunks.
     // Might be appropriate to have an assigned id for chunks.
     foreach (var chunk in _db.Chunks.Find(Query.EQ("FeedId", feed.Id)))
     {
         if (!feed.ChunkIds.Contains(chunk.Id) && feed.HeadChunkId != chunk.Id)
         {
             _logger.InfoFormat("removing orphan chunk {0}", chunk.Id);
             _db.Chunks.Remove(Query.EQ("_id", new ObjectId(chunk.Id)));
         }
         else if (chunk.Articles.Count == 0)
         {
             _logger.InfoFormat("removing empty chunk {0}", chunk.Id);
             feed.ChunkIds.Remove(chunk.Id);
             _db.Chunks.Remove(Query.EQ("_id", new ObjectId(chunk.Id)));
         }
     }
     return true;
 }
示例#8
0
 public bool Update(Feed feed)
 {
     // Now, we've already added some articles, maybe.
     // These have been added to the head chunk, which might be oversized.
     // Let's say they haven't been added to feed.Articles.
     var headChunk = feed.GetHeadChunk(_db);
     _logger.DebugFormat("incoming feed has {0} saved chunks and {1} cached chunks already", feed.ChunkIds.Count, feed.CachedChunkCount);
     while (headChunk.Articles.Count > MaxArticlesPerChunk)
     {
         _logger.InfoFormat("reshuffling chunks for feed {0}", feed);
         var oldHead = headChunk;
         headChunk = new Chunk();
         feed.SetHeadChunk(headChunk);
         headChunk.Articles = oldHead.Articles.OrderBy(x => x.PublishDate).Skip(MaxArticlesPerChunk).ToList();
         oldHead.Articles = oldHead.Articles.OrderBy(x => x.PublishDate).Take(MaxArticlesPerChunk).ToList();
         _logger.DebugFormat("old head has {0} articles; new has {1}", oldHead.Articles.Count, headChunk.Articles.Count);
     }
     feed.Save(_db);
     // Okay, let's rebuild feed.Articles.
     // This is loading way too much data...
     feed.Articles.Clear();
     foreach (var id in feed.ChunkIds)
     {
         var chunk = feed.GetChunk(id, _db);
         if (chunk == null) {
             _logger.WarnFormat("feed {0} missing chunk {1}", feed.Id, id);
             continue;
         }
         feed.Articles.AddRange(chunk.Articles);
     }
     feed.Articles = feed.Articles.OrderByDescending(x => x.PublishDate).Take(MaxArticlesPerChunk).Reverse().ToList();
     //feed.Articles = feed.ChunkIds.Select(x => feed.GetChunk(x, _db)).Where(x => x != null).SelectMany(x => x.Articles).OrderByDescending(x => x.PublishDate).Take(MaxArticlesPerChunk).Reverse().ToList();
     feed.Save(_db);
     _logger.DebugFormat("outgoing feed has {0} saved chunks", feed.ChunkIds.Count);
     return true;
 }
示例#9
0
 public bool Update(Feed feed)
 {
     _reader.Read(feed);
     return true;
 }
示例#10
0
 public void Read(Feed feed)
 {
     _logger.InfoFormat("reading feed {0} from {1}", feed.Id, feed.Uri);
     var xml = _wget.Xml(feed.Uri);
     _parser.Read(feed, xml);
 }
示例#11
0
        public List<Feed> FromHtmlPage(string pageUrl)
        {
            Uri uri;
            if (pageUrl.StartsWith("feed://"))
            {
                pageUrl = "http" + pageUrl.Substring(4);
            }
            else if (!pageUrl.StartsWith("http"))
            {
                // We don't support gopher links.
                pageUrl = "http://" + pageUrl;
            }
            uri = new Uri(pageUrl);
            _logger.InfoFormat("looking for feeds at {0}", uri);
            var feeds = new List<Feed>();
            var existing = Feed.ByUri(uri.ToString(), _db);
            if (existing != null)
            {
                _logger.Info("we already had that feed!");
                feeds.Add(existing);
                return feeds;
            }
            string text = _wget.Text(uri);
            if (text == null)
            {
                _logger.InfoFormat("we failed to find any page at that URL");
                return feeds;
            }

            // Is this an rss feed or an html page?
            try
            {
                // rss feed definitely shouldn't parse as html
                _logger.InfoFormat("trying to load the URL as an HTML document...");
                var doc = new HtmlDocument();
                doc.LoadHtml(text);
                FindFeeds(doc, uri, feeds, "application/rss+xml");
                FindFeeds(doc, uri, feeds, "application/atom+xml");
                _logger.InfoFormat("...done, found {0} feeds", feeds.Count);
            }
            catch (Exception ex)
            {
                _logger.InfoFormat(ex, "failed to find feed links");
            }

            try
            {
                _logger.InfoFormat("trying to load the URL as a feed document...");
                var feed = new Feed();
                feed.Uri = uri;
                var xdoc = XDocument.Parse(text);
                _parser.Read(feed, xdoc);
                // This supercedes the html stuff, on the off chance someone put <link> elements in their feed.
                feeds.Clear();
                feeds.Add(feed);
                _logger.InfoFormat("...success!");
            }
            catch (Exception ex)
            {
                _logger.InfoFormat(ex, "failed to parse the document as an RSS or Atom feed");
            }

            _logger.InfoFormat("done searching; found {0} feeds", feeds.Count);

            if (feeds.Count == 1 && feeds [0].Articles.Count == 0)
            {
                var f = feeds [0];
                _reader.Read(f);
                f.Save(_db);
            }
            return feeds;
        }
示例#12
0
 private Feed ReadRss(Uri pageUrl, HtmlNode link)
 {
     try
     {
         var feed = new Feed();
         var targetAttribute = link.Attributes ["href"];
         if (targetAttribute == null)
         {
             return null;
         }
         else
         {
             _logger.InfoFormat("looking for RSS / Atom document at {0}", targetAttribute.Value);
             feed.Uri = new Uri(pageUrl, targetAttribute.Value);
             // Some people in the wild use a "feed" scheme. IANA doesn't recognize this, though.
             if (feed.Uri.Scheme == "feed")
             {
                 feed.Uri = new Uri("http" + feed.Uri.ToString().Substring(4));
             }
             var existing = Feed.ByUri(feed.Uri.ToString(), _db);
             if (existing != null)
             {
                 return existing;
             }
         }
         var titleAttribute = link.Attributes ["title"];
         if (titleAttribute != null)
         {
             feed.Title = titleAttribute.Value;
         }
         else
         {
             _logger.InfoFormat("no page title and no feed found from page at {0}", pageUrl);
             feed.Title = pageUrl.Host;
         }
         return feed;
     }
     catch
     {
         // malformed
         return null;
     }
 }
示例#13
0
 private void ReadRss(Feed feed, XDocument x)
 {
     var channel = x.Element("rss").Element("channel");
     if (channel == null)
     {
         throw new ArgumentException("Feed did not contain a <channel> element.");
     }
     Elem(channel, "title", v => feed.Title = v);
     Elem(channel, "description", v => feed.Description = v);
     Elem(channel, "link", v => feed.Link = new Uri(v));
     var img = channel.Elements("image").FirstOrDefault();
     if (img != null)
     {
         Elem(img, "title", v => feed.ImageTitle = v);
         ElemLink(img, "url", v =>
             {
                 feed.LogoUri = v;
                 feed.IconUri = v;
             }
         );
         ElemLink(img, "link", v => feed.ImageLinkTarget = v);
     }
     ReadArticles(feed, channel.Elements("item").AsEnumerable());
 }
示例#14
0
 private void ReadAtom(Feed feed, XDocument x)
 {
     var xfeed = x.Descendants(atom + "feed").First();
     Elem(xfeed, atom + "title", v => feed.Title = v);
     ElemAttrLink(xfeed, "alternate", v => feed.Link = v);
     ElemLink(xfeed, atom + "icon", v => feed.IconUri = v);
     ElemLink(xfeed, atom + "logo", v => feed.LogoUri = v);
     ReadAuthors(xfeed, "author", feed.Authors);
     ReadAuthors(xfeed, "contributor", feed.Authors);
     ReadArticles(feed, xfeed.Elements(atom + "entry"));
 }
示例#15
0
        private void ReadArticles(Feed feed, IEnumerable<XElement> elements)
        {
            var now = DateTime.UtcNow;
            var headChunk = feed.GetHeadChunk(_db);
            var allArticles = elements
                .Select(x => ReadArticle(x, now))
                .Where(x => x != null)
                .ToList();

            headChunk.AddAll(allArticles
                // Articles with an explicitly set date
                .Where(x => x.PublishDate != now)
                // Order by the date, most recent to least recent
                .OrderByDescending(x => x.PublishDate)
                // Grab newest to oldest, stop if we've seen this before
                .TakeWhile(x => headChunk.GetArticle(x.UniqueId) == null)
                // But insert oldest to newest, because it can cause issues otherwise.
                .Reverse());
            headChunk.AddAll(allArticles
                // Articles with no explicitly set date
                .Where(x => x.PublishDate == now)
                // Only ones that appear in the list before the last one we've seen before.
                .TakeWhile(x => headChunk.GetArticle(x.UniqueId) == null));
        }