public bool Update(Feed feed) { if (feed.IconUri == null) { feed.IconUri = _findIcon.Find(feed.Uri); } return true; }
public Subscription SubscribeTo(Feed f) { var sub = Subscriptions.Where(x => x.FeedId == f.Id).FirstOrDefault(); if (sub == null) { sub = new Subscription { FeedUri = f.Uri, FeedId = f.Id, CheckInterval = DefaultCheckInterval }; Subscriptions.Add(sub); } return sub; }
public bool Update(Feed feed) { if (feed.Head != null) { var head = feed.Head; feed.Head = null; // This should munge things about appropriately...I think. feed.SetHeadChunk(head); } return true; }
public void Read(Feed feed, XDocument x) { var rss = x.Descendants("rss").FirstOrDefault(); XName atomFeedName = atom + "feed"; var atomFeed = x.Descendants(atomFeedName).FirstOrDefault(); if (rss == null) { if (atomFeed == null) { throw new ArgumentException("Feed did not contain an <rss> or <feed> element."); } ReadAtom(feed, x); } else { ReadRss(feed, x); } }
public bool Update(Feed feed) { var users = _db.Users.Find(Query.ElemMatch("Subscriptions", Query.EQ("FeedId", new ObjectId(feed.Id)))).ToList(); if (!users.Any()) { _db.Feeds.Remove(Query.EQ("_id", new ObjectId(feed.Id))); _db.Chunks.Remove(Query.EQ("FeedId", new ObjectId(feed.Id))); return false; } var interval = users .Select(x => x.GetSubscription(feed.Id)) .Where(x => x != null) .Select(x => x.CheckInterval) .Min(); if (interval < _config.MinUpdateInterval) { interval = _config.MinUpdateInterval; } feed.ReadInterval = interval; return true; }
public void ExecuteSingle(Feed feed) { foreach (var task in _tasks) { try { _logger.InfoFormat("running task {0} on feed {1}", task, feed); if (!task.Update(feed)) { return; } } catch (Exception ex) { _logger.ErrorFormat("failed to run task {0} on feed {1}: {2}", task, feed, ex); feed.Errors++; } } feed.LastRead = DateTime.UtcNow; feed.NextRead = DateTime.UtcNow + feed.ReadInterval; feed.Save(_db); }
public bool Update(Feed feed) { // Race condition: // * User hit "update feed now". // * We added a new chunk. // * We haven't yet saved the new feed, but we saved the chunks. // Might be appropriate to have an assigned id for chunks. foreach (var chunk in _db.Chunks.Find(Query.EQ("FeedId", feed.Id))) { if (!feed.ChunkIds.Contains(chunk.Id) && feed.HeadChunkId != chunk.Id) { _logger.InfoFormat("removing orphan chunk {0}", chunk.Id); _db.Chunks.Remove(Query.EQ("_id", new ObjectId(chunk.Id))); } else if (chunk.Articles.Count == 0) { _logger.InfoFormat("removing empty chunk {0}", chunk.Id); feed.ChunkIds.Remove(chunk.Id); _db.Chunks.Remove(Query.EQ("_id", new ObjectId(chunk.Id))); } } return true; }
public bool Update(Feed feed) { // Now, we've already added some articles, maybe. // These have been added to the head chunk, which might be oversized. // Let's say they haven't been added to feed.Articles. var headChunk = feed.GetHeadChunk(_db); _logger.DebugFormat("incoming feed has {0} saved chunks and {1} cached chunks already", feed.ChunkIds.Count, feed.CachedChunkCount); while (headChunk.Articles.Count > MaxArticlesPerChunk) { _logger.InfoFormat("reshuffling chunks for feed {0}", feed); var oldHead = headChunk; headChunk = new Chunk(); feed.SetHeadChunk(headChunk); headChunk.Articles = oldHead.Articles.OrderBy(x => x.PublishDate).Skip(MaxArticlesPerChunk).ToList(); oldHead.Articles = oldHead.Articles.OrderBy(x => x.PublishDate).Take(MaxArticlesPerChunk).ToList(); _logger.DebugFormat("old head has {0} articles; new has {1}", oldHead.Articles.Count, headChunk.Articles.Count); } feed.Save(_db); // Okay, let's rebuild feed.Articles. // This is loading way too much data... feed.Articles.Clear(); foreach (var id in feed.ChunkIds) { var chunk = feed.GetChunk(id, _db); if (chunk == null) { _logger.WarnFormat("feed {0} missing chunk {1}", feed.Id, id); continue; } feed.Articles.AddRange(chunk.Articles); } feed.Articles = feed.Articles.OrderByDescending(x => x.PublishDate).Take(MaxArticlesPerChunk).Reverse().ToList(); //feed.Articles = feed.ChunkIds.Select(x => feed.GetChunk(x, _db)).Where(x => x != null).SelectMany(x => x.Articles).OrderByDescending(x => x.PublishDate).Take(MaxArticlesPerChunk).Reverse().ToList(); feed.Save(_db); _logger.DebugFormat("outgoing feed has {0} saved chunks", feed.ChunkIds.Count); return true; }
public bool Update(Feed feed) { _reader.Read(feed); return true; }
public void Read(Feed feed) { _logger.InfoFormat("reading feed {0} from {1}", feed.Id, feed.Uri); var xml = _wget.Xml(feed.Uri); _parser.Read(feed, xml); }
public List<Feed> FromHtmlPage(string pageUrl) { Uri uri; if (pageUrl.StartsWith("feed://")) { pageUrl = "http" + pageUrl.Substring(4); } else if (!pageUrl.StartsWith("http")) { // We don't support gopher links. pageUrl = "http://" + pageUrl; } uri = new Uri(pageUrl); _logger.InfoFormat("looking for feeds at {0}", uri); var feeds = new List<Feed>(); var existing = Feed.ByUri(uri.ToString(), _db); if (existing != null) { _logger.Info("we already had that feed!"); feeds.Add(existing); return feeds; } string text = _wget.Text(uri); if (text == null) { _logger.InfoFormat("we failed to find any page at that URL"); return feeds; } // Is this an rss feed or an html page? try { // rss feed definitely shouldn't parse as html _logger.InfoFormat("trying to load the URL as an HTML document..."); var doc = new HtmlDocument(); doc.LoadHtml(text); FindFeeds(doc, uri, feeds, "application/rss+xml"); FindFeeds(doc, uri, feeds, "application/atom+xml"); _logger.InfoFormat("...done, found {0} feeds", feeds.Count); } catch (Exception ex) { _logger.InfoFormat(ex, "failed to find feed links"); } try { _logger.InfoFormat("trying to load the URL as a feed document..."); var feed = new Feed(); feed.Uri = uri; var xdoc = XDocument.Parse(text); _parser.Read(feed, xdoc); // This supercedes the html stuff, on the off chance someone put <link> elements in their feed. feeds.Clear(); feeds.Add(feed); _logger.InfoFormat("...success!"); } catch (Exception ex) { _logger.InfoFormat(ex, "failed to parse the document as an RSS or Atom feed"); } _logger.InfoFormat("done searching; found {0} feeds", feeds.Count); if (feeds.Count == 1 && feeds [0].Articles.Count == 0) { var f = feeds [0]; _reader.Read(f); f.Save(_db); } return feeds; }
private Feed ReadRss(Uri pageUrl, HtmlNode link) { try { var feed = new Feed(); var targetAttribute = link.Attributes ["href"]; if (targetAttribute == null) { return null; } else { _logger.InfoFormat("looking for RSS / Atom document at {0}", targetAttribute.Value); feed.Uri = new Uri(pageUrl, targetAttribute.Value); // Some people in the wild use a "feed" scheme. IANA doesn't recognize this, though. if (feed.Uri.Scheme == "feed") { feed.Uri = new Uri("http" + feed.Uri.ToString().Substring(4)); } var existing = Feed.ByUri(feed.Uri.ToString(), _db); if (existing != null) { return existing; } } var titleAttribute = link.Attributes ["title"]; if (titleAttribute != null) { feed.Title = titleAttribute.Value; } else { _logger.InfoFormat("no page title and no feed found from page at {0}", pageUrl); feed.Title = pageUrl.Host; } return feed; } catch { // malformed return null; } }
private void ReadRss(Feed feed, XDocument x) { var channel = x.Element("rss").Element("channel"); if (channel == null) { throw new ArgumentException("Feed did not contain a <channel> element."); } Elem(channel, "title", v => feed.Title = v); Elem(channel, "description", v => feed.Description = v); Elem(channel, "link", v => feed.Link = new Uri(v)); var img = channel.Elements("image").FirstOrDefault(); if (img != null) { Elem(img, "title", v => feed.ImageTitle = v); ElemLink(img, "url", v => { feed.LogoUri = v; feed.IconUri = v; } ); ElemLink(img, "link", v => feed.ImageLinkTarget = v); } ReadArticles(feed, channel.Elements("item").AsEnumerable()); }
private void ReadAtom(Feed feed, XDocument x) { var xfeed = x.Descendants(atom + "feed").First(); Elem(xfeed, atom + "title", v => feed.Title = v); ElemAttrLink(xfeed, "alternate", v => feed.Link = v); ElemLink(xfeed, atom + "icon", v => feed.IconUri = v); ElemLink(xfeed, atom + "logo", v => feed.LogoUri = v); ReadAuthors(xfeed, "author", feed.Authors); ReadAuthors(xfeed, "contributor", feed.Authors); ReadArticles(feed, xfeed.Elements(atom + "entry")); }
private void ReadArticles(Feed feed, IEnumerable<XElement> elements) { var now = DateTime.UtcNow; var headChunk = feed.GetHeadChunk(_db); var allArticles = elements .Select(x => ReadArticle(x, now)) .Where(x => x != null) .ToList(); headChunk.AddAll(allArticles // Articles with an explicitly set date .Where(x => x.PublishDate != now) // Order by the date, most recent to least recent .OrderByDescending(x => x.PublishDate) // Grab newest to oldest, stop if we've seen this before .TakeWhile(x => headChunk.GetArticle(x.UniqueId) == null) // But insert oldest to newest, because it can cause issues otherwise. .Reverse()); headChunk.AddAll(allArticles // Articles with no explicitly set date .Where(x => x.PublishDate == now) // Only ones that appear in the list before the last one we've seen before. .TakeWhile(x => headChunk.GetArticle(x.UniqueId) == null)); }