private int ReadSource(Source source,bool? IsForTest) { var count = 0; var client = new WebClient(); try { using (var stream = client.OpenRead(source.StreamUrl)) { try { #region RssFeed RssFeed feed = new RssFeed(); feed.Load(stream); foreach (var i in feed.Channel.Items) { try { if (ReadFeedItem(i, source,IsForTest)) count++; Thread.Sleep(500); } catch (Exception e) { var failed = new FailedUrl(); failed.Url = i.Link.ToString(); failed.Exception = e.Message; this.Data.FailedUrls.Add(failed); this.Data.SaveChanges(); continue; } } #endregion RssFeed } catch (FormatException f) { #region AtomFeed AtomFeed afeed = new AtomFeed(); using (var astream = client.OpenRead(source.StreamUrl)) { afeed.Load(astream); foreach (var i in afeed.Entries) { try { if (ReadFeedItem(i, source,IsForTest)) count++; Thread.Sleep(500); } catch (Exception e) { var failed = new FailedUrl(); failed.Url = i.Links.FirstOrDefault().ToString(); failed.Exception = e.Message; this.Data.FailedUrls.Add(failed); this.Data.SaveChanges(); continue; } } } #endregion AtomFeed } Source s = this.Data.Sources.FirstOrDefault(d => d.Id == source.Id); s.LastUpdated = DateTime.Now; this.Data.Sources.Update(s); this.Data.SaveChanges(); } //try //{ // if (count > 0) // CacheManager.Clear(CacheRegions.News); //} //catch (Exception e) //{ // //TODO handle exc //} } catch (Exception e) { //TODO handle exc } Console.WriteLine(source.Name); return count; }
private void UpdateOldItem(string url, Source source, NewsItem oldItem) { if (!oldItem.Categories.Any(c => c.Id == source.CategoryId)) { oldItem.Categories.Add(this.Data.Categories.All().FirstOrDefault(s => s.Id == source.CategoryId)); this.Data.SaveChanges(); } //Parse the article var forUpdateArticle = this.Data.NewsItems.All().FirstOrDefault(s => s.Id == oldItem.Id); if (forUpdateArticle == null && string.IsNullOrEmpty(forUpdateArticle.Content)) { forUpdateArticle = ParseArticle(url, source.SourceWebsite.Name); this.Data.NewsItems.Update(forUpdateArticle); this.Data.SaveChanges(); } }
private bool ReadFeedItem(AtomEntry i, Source source,bool? IsForTest) { //Check if there is another article with the same title var url = i.Links.FirstOrDefault(l => l.ContentType == "text/html"); NewsItem oldItem = this.Data.NewsItems.FirstOrDefault(s => s.Title.Trim().ToLower() == i.Title.Content.Trim().ToLower()); if (oldItem != null && new DateTime(oldItem.DatePublished).Date == i.UpdatedOn.Date) { UpdateOldItem(url.Uri.ToString(), source, oldItem); return false; } //Parse the article var article = ParseArticle(url.Uri.ToString(), source.SourceWebsite.Name); if (article == null) { throw new Exception(string.Format("Connot Parse Article url : {0}", url)); } if (string.IsNullOrEmpty(article.Href)) article.Href = url.Uri.ToString(); //Check if there is already an article var oldArticle = this.Data.NewsItems.All().FirstOrDefault(s => s.Href == article.Href); //If its new one if (oldArticle == null) { article.Media = source.SourceWebsite.Name; if (string.IsNullOrEmpty(article.Header)) { article.Header = Regex.Replace(i.Title.Content, @"<img\s[^>]*>(?:\s*?</img>)?", "", RegexOptions.IgnoreCase); } article.Title = article.Header; article.MainPic = article.MainPic; article.DatePublished = (i.UpdatedOn == DateTime.MinValue ? DateTime.Now : i.UpdatedOn).Ticks; article.Categories.Add(this.Data.Categories.All().FirstOrDefault(s => s.Id == source.CategoryId)); article.UsedForClassication = false; article.IsForTest = IsForTest.HasValue ? IsForTest.Value : false; this.Data.NewsItems.Add(article); this.Data.SaveChanges(); return true; } //if we already have it else { if (!oldArticle.Categories.Any(c => c.Id == source.CategoryId)) { oldArticle.Categories.Add(this.Data.Categories.All().FirstOrDefault(s => s.Id == source.CategoryId)); this.Data.NewsItems.Update(article); this.Data.SaveChanges(); } } return false; }
private bool ReadFeedItem(RssItem i, Source source,bool? IsForTest) { //Check if there is another article with the same title NewsItem oldItem = Data.NewsItems.All().FirstOrDefault(n => n.Title.Trim().ToLower() == i.Title.Trim().ToLower()); if (oldItem != null && new DateTime(oldItem.DatePublished).Date == i.PublicationDate.Date) { UpdateOldItem(i.Link.ToString(), source, oldItem); return false; } //Parse the article var article = ParseArticle(i.Link.ToString(), source.SourceWebsite.Name); if (article == null || string.IsNullOrEmpty(article.Content)) { throw new Exception(string.Format("Connot Parse Article url : {0}", i.Link.ToString())); } if (string.IsNullOrEmpty(article.Href)) article.Href = i.Link.ToString(); //Check if there is already an article var oldArticle = this.Data.NewsItems.All().FirstOrDefault(s => s.Href == article.Href); //If its new one if (oldArticle == null) { article.Media = source.SourceWebsite.Name; if (string.IsNullOrEmpty(article.Header)) { article.Header = Regex.Replace(i.Title, @"<img\s[^>]*>(?:\s*?</img>)?", "", RegexOptions.IgnoreCase); } //newsitem.RealId = article.PartitionKey; article.Title = article.Header; article.CleanContent = BaseHelper.ScrubHtml(article.Content); article.DatePublished = (i.PublicationDate == DateTime.MinValue ? DateTime.Now : i.PublicationDate).Ticks; article.Categories.Add(this.Data.Categories.All().FirstOrDefault(s => s.Id == source.CategoryId)); article.UsedForClassication = false; article.IsForTest = IsForTest.HasValue ? IsForTest.Value : false; this.Data.NewsItems.Add(article); this.Data.SaveChanges(); return true; } //if we already have it else { if (!oldArticle.Categories.Any(c => c.Id == source.CategoryId)) { oldArticle.Categories.Add(this.Data.Categories.All().FirstOrDefault(s => s.Id == source.CategoryId)); this.Data.NewsItems.Update(article); this.Data.SaveChanges(); } } return false; }